N-gram generator for Array of Objects
const ngram = require('n-gram')
const arr = [
{
"_id" : 1.0,
"name" : "Java Hut",
"description" : "Coffee and cakes"
},
{
"_id" : 2.0,
"name" : "Burger Buns",
"description" : "Gourmet hamburgers"
},
{
"_id" : 3.0,
"name" : "Coffee Shop",
"description" : "Just coffee"
},
{
"_id" : 4.0,
"name" : "Clothes Clothes Clothes",
"description" : "Discount clothing"
},
{
"_id" : 5.0,
"name" : "Java Shopping",
"description" : "Indonesian goods"
}
];
const keys = ["name", "description"];
const nMinimum = 2;
function generateNGrams(arr, keys, nMinimum = 2) {
let cache = {};
for (const elem of arr) {
let searchWords = new Set();
for (const key of keys) {
elem[key].toLowerCase().split(/[^\w]/).forEach(searchWords.add, searchWords);
}
gramSet = new Set();
for (const word of searchWords) {
if (typeof word === 'string' && word.length <= nMinimum) {
gramSet.add(word);
} else if (cache[word]) {
cache[word].forEach(gramSet.add, gramSet);
} else {
wordGrams = new Set();
for (let i = nMinimum; i < word.length; i += 1) {
ngram(i)(word).forEach(wordGrams.add, wordGrams);
}
wordGrams.add(word);
cache[word] = [...wordGrams];
cache[word].forEach(gramSet.add, gramSet);
}
}
elem.searchText = [...gramSet].join(' ');
}
cache = null;
return arr;
}
generateNGrams(arr, keys);
no comments