const mql = require('@microlink/mql')
const jsonLdFn = async (url, opts) => {
const { data } = await mql(url, {
prerender: true,
waitForTimeout: 1500,
meta: false,
data: {
jsonLd: {
selectorAll: 'script[type="application/ld+json"]'
},
rawMicrodata: {
selectorAll: '[itemtype="http://schema.org/Product"]'
},
microdata: {
selectorAll: '[itemtype="http://schema.org/Product"]',
attr: {
name: {
type: 'text',
selector: '[itemprop="name"]'
},
sku: {
type: 'text',
selector: '[itemprop="sku"]'
},
image: {
type: 'val',
selector: '[itemprop="image"]',
attr: 'src'
},
brand: [
{
type: 'text',
selector: '[itemprop="brand"] [itemprop="name"]'
},
{
type: 'text',
selector: '[itemprop="brand"]'
}
]
}
}
},
...opts
})
const jsonLd = data.jsonLd ? [...data.jsonLd.filter(el => el['@type'] === 'Product')] : []
return {...data, jsonLd: jsonLd || []}
}
const products = []
// Working example
products.push(await jsonLdFn('https://www.walmart.ca/en/ip/soozier-adjustable-upright-exercise-bike/6000201173838?rrid=richrelevance'))
// Staples exposes ld+json after render, but no microdata. No data is returned likely because
// The page doesn't get prerendered before scraping
products.push(await jsonLdFn('https://www.staples.ca/products/2735027-en-brother-tn760-black-toner-cartridge-high-yield'))
// Kerastase exposes microdata, but no ld+json. The HTML selectors don't return expected results
products.push(await jsonLdFn('https://www.kerastase.ca/en/collections/nutritive/3474636721832.html'))
console.log(products)