Simple Fluid Project Resource Harvesting Example

node v8.16.0
version: master
endpointsharetweet
Basic demonstration of possibilities of scraping structured resources from the fluid wiki pages, generated using the resources macro described at https://wiki.fluidproject.org/pages/viewpage.action?pageId=152994052
var https = require("https"); var cheerio = require("cheerio"); var _ = require("lodash"); const { ValueViewerSymbol } = require("@runkit/value-viewer"); var resourcePages = [ "https://wiki.fluidproject.org/display/fluid/Inclusive+Design+Resources" ]; resourcePages.forEach(function(pageURL, idx) { https.get(pageURL, function (res) { var pageContent; res.on("data", function (d) { pageContent = pageContent+d; }); res.on("end", function () { var $ = cheerio.load(pageContent); var resources = []; $(".fluidWiki-resource").each(function (idx, resource) { var resourceHTML = $(resource).html(); var resource = cheerio.load(resourceHTML); var resourceURL = resource(".fluidWiki-resource-URL a").attr("href"); var resourceInfo = {}; // Required fields resourceInfo.name = resource(".fluidWiki-resource-name").html(); resourceInfo.URL = resource(".fluidWiki-resource-URL a").attr("href"); // Optional fields var creator = resource(".fluidWiki-resource-creator").html(); var description = resource(".fluidWiki-resource-description").html(); var tags = resource(".fluidWiki-resource-tags").html(); if(creator) { resourceInfo.creator = creator; } if(description) { resourceInfo.description = description; } if(tags) { var splitTags = tags.split(","); var trimmedTags = _.transform(splitTags, function (result, tag) { result.push(tag.trim()); }); resourceInfo.tags = trimmedTags; } resources.push(resourceInfo); }); resources.sort(function (a, b) { var nameA = a.name.toUpperCase(); // ignore upper and lowercase var nameB = b.name.toUpperCase(); // ignore upper and lowercase if (nameA < nameB) { return -1; } if (nameA > nameB) { return 1; } // names must be equal return 0; }); var resourcesAsJSON = JSON.stringify(resources, null, 2); // console.log(JSON.stringify(resources, null, 2)); var tableHeader = ` <tr> <th>Name</th> <th>URL</th> <th>Creator</th> <th>Description</th> <th>Tags</th> </tr> ` var tableRows = ""; _.forEach(resources, function (resource, key) { var rowMarkup = ` <tr> <td>${resource.name}</td> <td>${resource.url}</td> <td>${resource.tags}</td> <td>${resource.description}</td> <td>${resource.url}</td> </tr> ` tableRows = tableRows + rowMarkup; }); console.log(tableRows); const resourceViewer = { [ValueViewerSymbol]: { title: "Resource Viewer", HTML: `<pre>${resourcesAsJSON}</pre>` }, resourcesAsJSON: resourcesAsJSON }; console.log(resourceViewer); }); }) });
Loading…

no comments

    sign in to comment