test web scraping with scraperjs

node v0.12.18
version: 1.0.1
endpointsharetweet
var scraperjs = require('scraperjs'); var i = 0; // This var is used to control loop. Don't want to be banned by tonicdev var scraper = (url) => { scraperjs.StaticScraper.create(url) .scrape(function($) { return $(".post-outer").map(function() { var title = $(this).find(".post-title > a").text(); var image = $(this).find("meta[itemprop='image_url']").attr("content"); var content = $(this).find(".post-body").html(); var date = $(this).find(".published[itemprop='datePublished']").attr("title"); var tags = $(this).find(".post-labels").text() .replace(/\n/g,"") // Remove all EOL .replace("Labels:",""); return { title: title, image: image, content: content, tags: tags, date: date, } }).get(); }) .then(function(data) { console.log(data); if(data.length > 0 && i < 1) { var lastDate = data[data.length-1].date; var nextUrl = 'http://medsouls.blogspot.com/search?max-results=10&updated-max='+lastDate; i++; scraper(nextUrl); } }) } scraper('http://medsouls.blogspot.com/');
Loading…

no comments

    sign in to comment