N-gram generator for Array of Objects

node v10.24.1
version: 1.1.0
endpointsharetweet
const ngram = require('n-gram')
Init Input Values
const arr = [ { "_id" : 1.0, "name" : "Java Hut", "description" : "Coffee and cakes" }, { "_id" : 2.0, "name" : "Burger Buns", "description" : "Gourmet hamburgers" }, { "_id" : 3.0, "name" : "Coffee Shop", "description" : "Just coffee" }, { "_id" : 4.0, "name" : "Clothes Clothes Clothes", "description" : "Discount clothing" }, { "_id" : 5.0, "name" : "Java Shopping", "description" : "Indonesian goods" } ]; const keys = ["name", "description"]; const nMinimum = 2;
Function Definition
function generateNGrams(arr, keys, nMinimum = 2) { let cache = {}; for (const elem of arr) { let searchWords = new Set(); for (const key of keys) { elem[key].toLowerCase().split(/[^\w]/).forEach(searchWords.add, searchWords); } gramSet = new Set(); for (const word of searchWords) { if (typeof word === 'string' && word.length <= nMinimum) { gramSet.add(word); } else if (cache[word]) { cache[word].forEach(gramSet.add, gramSet); } else { wordGrams = new Set(); for (let i = nMinimum; i < word.length; i += 1) { ngram(i)(word).forEach(wordGrams.add, wordGrams); } wordGrams.add(word); cache[word] = [...wordGrams]; cache[word].forEach(gramSet.add, gramSet); } } elem.searchText = [...gramSet].join(' '); } cache = null; return arr; }
Testing ...
generateNGrams(arr, keys);
Loading…

no comments

    sign in to comment