pacific's notebooks

  • data.go.id search - /pacific/data-go-id-search
    Last edited 7 years ago
    var request = require('request'); var cheerio = require('cheerio'); var text, $, found; var page = 1; runCrawler(); function runCrawler(){ collection = []; crawl(page, function next(url){ page++; crawl(page, next); console.log(url); }); } function crawl(page, callback){ var returnedUrl = []; var url = 'http://data.go.id/dataset?page='+page; request(url, function (error, response, body) { if (!error && response.statusCode == 200) { $ = cheerio.load(body); $('h3.dataset-heading').each(function(idx){ url = $(this).find('a').attr('href'); getID(url, function(returnedUrl){ return callback(returnedUrl); }); }); } }); } function getID(url, callback){ var dataID; var url = 'http://data.go.id'+url; request(url, function (error, response, body) { if (!error && response.statusCode == 200) { $ = cheerio.load(body); dataID = $('li.resource-item').attr('data-id'); getQuery(dataID, "jawa+timur", function(res){ return callback(res); }); } }); } function getQuery(id, query, callback){ var url = 'http://data.go.id/api/action/datastore_search?resource_id='+id+'&q='+query; request(url, function (error, response, body){ if (!error) { var result = JSON.parse(body); if(result.success){ return callback(null); } } else { return callback(response.statusCode); } }); }
  • tonic + npm: request - /pacific/request-clean-url
    Last edited 7 years ago - from: https://tonicdev.com/npm/request
    var r = require("request") r("http://goo.gl/PBtXyS", {followRedirect: false}, function(error, response, body){ console.log(response.headers.location) })