Build a Recommender System Using Google Cloud | by muffaddal qutbuddin | Jun, 2023


Recommendation AI expects a specific BigQuery table scheme. Therefore, we have to create a table with the required format and insert the catalog data into it.

Say our catalog table in BigQuery has the following fields

  1. name: name of the product
  2. id: id of the product
  3. category: assigned category of the product.
  4. description: description about the product
  5. url: URL of the product on website
  6. image_link: publically accessible image link of the product.
  7. city: the city in which the product is available.

Based on the above product catalog details the schema for our Recommendation AI table will be as follows

[
{
"name": "name",
"type": "STRING",
"description": "name of the product",
"mode": "NULLABLE"
},
{
"name": "id",
"type": "STRING",
"description": "id of the product",
"mode": "REQUIRED"
},
{
"name": "type",
"type": "STRING",
"description": "type of the product. Primary or Variant here",
"mode": "NULLABLE"
},
{
"name": "primaryProductId",
"type": "STRING",
"description": "product id here",
"mode": "NULLABLE"
},
{
"name": "collectionMemberIds",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "gtin",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "categories",
"type": "STRING",
"description": "category here in > format",
"mode": "REPEATED"
},
{
"name": "title",
"type": "STRING",
"description": "title of the product here",
"mode": "REQUIRED"
},
{
"name": "brands",
"type": "STRING",
"description": "brand name here",
"mode": "REPEATED"
},
{
"name": "description",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "languageCode",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "attributes",
"type": "RECORD",
"description": "you can have custom attribites here",
"mode": "REPEATED",
"fields": [
{
"name": "key",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "RECORD",
"description": "",
"mode": "NULLABLE",
"fields": [
{
"name": "text",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "numbers",
"type": "FLOAT",
"description": "",
"mode": "REPEATED"
},
{
"name": "searchable",
"type": "BOOLEAN",
"description": "",
"mode": "NULLABLE"
},
{
"name": "indexable",
"type": "BOOLEAN",
"description": "",
"mode": "NULLABLE"
}
]
}
]
},
{
"name": "tags",
"type": "STRING",
"description": "tags based on the product",
"mode": "REPEATED"
},
{
"name": "priceInfo",
"type": "RECORD",
"description": "",
"mode": "NULLABLE",
"fields": [
{
"name": "currencyCode",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "price",
"type": "FLOAT",
"description": "",
"mode": "NULLABLE"
},
{
"name": "originalPrice",
"type": "FLOAT",
"description": "",
"mode": "NULLABLE"
},
{
"name": "cost",
"type": "FLOAT",
"description": "",
"mode": "NULLABLE"
},
{
"name": "priceEffectiveTime",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "priceExpireTime",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
}
]
},
{
"name": "rating",
"type": "RECORD",
"description": "",
"mode": "NULLABLE",
"fields": [
{
"name": "ratingCount",
"type": "INTEGER",
"description": "",
"mode": "NULLABLE"
},
{
"name": "averageRating",
"type": "FLOAT",
"description": "",
"mode": "NULLABLE"
},
{
"name": "ratingHistogram",
"type": "INTEGER",
"description": "",
"mode": "REPEATED"
}
]
},
{
"name": "expireTime",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "ttl",
"type": "RECORD",
"description": "",
"mode": "NULLABLE",
"fields": [
{
"name": "seconds",
"type": "INTEGER",
"description": "",
"mode": "NULLABLE"
},
{
"name": "nanos",
"type": "INTEGER",
"description": "",
"mode": "NULLABLE"
}
]
},
{
"name": "availableTime",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "availability",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "availableQuantity",
"type": "INTEGER",
"description": "",
"mode": "NULLABLE"
},
{
"name": "fulfillmentInfo",
"type": "RECORD",
"description": "",
"mode": "REPEATED",
"fields": [
{
"name": "type",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "placeIds",
"type": "STRING",
"description": "",
"mode": "REPEATED"
}
]
},
{
"name": "uri",
"type": "STRING",
"description": "product url here",
"mode": "NULLABLE"
},
{
"name": "images",
"type": "RECORD",
"description": "",
"mode": "REPEATED",
"fields": [
{
"name": "uri",
"type": "STRING",
"description": "",
"mode": "REQUIRED"
},
{
"name": "height",
"type": "INTEGER",
"description": "",
"mode": "NULLABLE"
},
{
"name": "width",
"type": "INTEGER",
"description": "",
"mode": "NULLABLE"
}
]
},
{
"name": "audience",
"type": "RECORD",
"description": "",
"mode": "NULLABLE",
"fields": [
{
"name": "genders",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "ageGroups",
"type": "STRING",
"description": "",
"mode": "REPEATED"
}
]
},
{
"name": "colorInfo",
"type": "RECORD",
"description": "",
"mode": "NULLABLE",
"fields": [
{
"name": "colorFamilies",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "colors",
"type": "STRING",
"description": "",
"mode": "REPEATED"
}
]
},
{
"name": "sizes",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "materials",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "patterns",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "conditions",
"type": "STRING",
"description": "",
"mode": "REPEATED"
},
{
"name": "retrievableFields",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "publishTime",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
},
{
"name": "promotions",
"type": "RECORD",
"description": "",
"mode": "REPEATED",
"fields": [
{
"name": "promotionId",
"type": "STRING",
"description": "",
"mode": "NULLABLE"
}
]
}
]

There are a bunch of fields that we can set for our catalog. More data you can produce the better. For this demonstration purpose, I will stick with the most common ones. Note nullable fields are optional.

The typefield here is where we decide if the product is a variant or primary. I will go with PRIMARY for this article. Read here for more details.

Once our table is ready we can insert the catalog data from the main table to this table using the below query.

insert into `recommendersystem.product_data` 

(
name,id,type,primaryProductId,collectionMemberIds,gtin,categories,title,brands,description,languageCode,attributes, tags,
priceInfo,rating,expireTime,ttl,availableTime,availability,availableQuantity,fulfillmentInfo, uri, images,audience,colorInfo,sizes,materials,patterns,conditions,retrievableFields,publishTime,promotions
)

SELECT
name,
cast(id as string) as id,
"PRIMARY" as type,
cast(id as string) as primaryProductId,
null as collectionMemberIds,
null as gtin,
array [categories] as categories,
name as title,
arrayBuild a Recommender System Using Google Cloud | by muffaddal qutbuddin | Jun, 2023 as brands,
ifnull(description,name) as description,
null as languageCode,
[
struct(
'product_location' as key, STRUCT(array[ifnull(city,"empty")] as text, cast(null as ARRAY<FLOAT64>) as numbers, true as searchable, true as indexable ) as value
)]
as attributes,
ARRAY_CONCAT(
[ifnull(location,"empty")],
[ifnull(categories,"empty")]
) as tags,
null as priceInfo,
null as rating,
null as expireTime,
null as ttl,
null as availableTime,
null as availability,
null as availableQuantity,
null as fulfillmentInfo,
url,
array[struct(image_url) as uri, null as height, null as width)] as images,
null as audience,
null as colorInfo,
null as sizes,
null as materials,
null as patterns,
null as conditions,
null as retrievableFields,
null as publishTime,
null as promotions
FROM `product.product_details`

As soon as the data is available in our new table we are ready to import them into Recommendation AI.

In the Data tab of retail ai in Google Cloud click Import on the top left to import the data.



Source link

Leave a Comment