var scraperjs = require('scraperjs');
var i = 0; // This var is used to control loop. Don't want to be banned by tonicdev
var scraper = (url) => {
scraperjs.StaticScraper.create(url)
.scrape(function($) {
return $(".post-outer").map(function() {
var title = $(this).find(".post-title > a").text();
var image = $(this).find("meta[itemprop='image_url']").attr("content");
var content = $(this).find(".post-body").html();
var date = $(this).find(".published[itemprop='datePublished']").attr("title");
var tags = $(this).find(".post-labels").text()
.replace(/\n/g,"") // Remove all EOL
.replace("Labels:","");
return {
title: title,
image: image,
content: content,
tags: tags,
date: date,
}
}).get();
})
.then(function(data) {
console.log(data);
if(data.length > 0 && i < 1) {
var lastDate = data[data.length-1].date;
var nextUrl = 'http://medsouls.blogspot.com/search?max-results=10&updated-max='+lastDate;
i++;
scraper(nextUrl);
}
});
}
scraper('http://medsouls.blogspot.com/');