ego's notebooks

  • endpoint - /ego/endpoint
    Last edited 7 years ago - from: https://runkit.com/npm/scrapper-x
    // var express = require('express'); // var app = express(); var sX = require('scrapper-x'); var request = require('request'); var tonicExpress = require("https://runkit.com/ego/endpoint")//("tonic/endpoint/"); var app = tonicExpress(module.exports) // app.get('/',function( req, res){ // console.log('done scraping'); var option1 = { repeatItemGroup: 'tr.match ', dataFormat: { date: { selector: 'td.date', type: 'text' }, home: { selector: '.team.team-a > a', type: 'text' }, away: { selector: '.team-b', type: 'text' }, goals: { selector: '.score', type: 'text' } } }; /* GET home page. */ app.get('/match', function(req, res, next) { request('http://int.soccerway.com/national/myanmar/national-league/2016/regular-season/r33992/matches/', function(error, response, body) { var scrappedResult = {}; if (!error && response.statusCode == 200) { scrappedResult = sX.scrape(body, option1); } // scrappedResult.push({ // "date" : date, // "home" : home, // "away" : away, // "goals" : goals // }); scrappedResult.push({date:'', home: ''}) res.status(200).json(scrappedResult); }); }); var option2 = { repeatItemGroup: 'tr.team_rank', dataFormat: { rank: { selector: '.rank', type: 'text' }, team: { selector: '.text.team.large-link > a', type: 'text' }, played: { selector: '.number.total.mp', type: 'text' }, wins: { selector: '.number.total.won.total_won', type: 'text' }, loses: { selector: '.number.lost', type: 'text' }, draws: { selector: '.number.drawn', type: 'text' }, gd: { selector: 'td.number.gd', type: 'text' }, ga: { selector: 'td.number.total.ga', type: 'text' }, goaltotal: { selector: 'td.number.total.gf', type: 'text' }, point: { selector: '.number.points', type: 'text' } } }; /* GET home page. */ app.get('/standing', function(req, res, next) { request('http://int.soccerway.com/national/myanmar/national-league/2016/regular-season/r33992/tables/', function(error, response, body) { var ranking = {}; if (!error && response.statusCode == 200) { rankingResult = sX.scrape(body, option2); } // scrappedResult.push({ // "date" : date, // "home" : home, // "away" : away, // "goals" : goals // }); // scrappedResult.push({date:'', home: ''}) res.status(200).json(rankingResult); console.log(rankingResult); }); }); exports.endpoint = function(request, response) { response.end(standing); } var port = process.env.PORT || 3000; // app.listen('3000') // console.log('Magic happens on port 8081'); // exports = module.exports = app; app.listen(3000) console.log('The party is on at port ' + port); exports = module.exports = app;
  • Untitled - /ego/firebasetest
    Last edited 7 years ago
    var cheerio = require('cheerio'); var request = require('request'); var prettyjson = require('prettyjson'); var firebase = require('firebase'); /** SET keyword for searching **/ var KEYWORD = "SEARCHKEYWORD" /** Init firebase **/ firebase.initializeApp({ serviceAccount: "SERVICEACCOUNT.json", databaseURL: "DATABASE.firebaseio.com/" }); var db = firebase.database(); var pantipRef = db.ref("pantip_"+KEYWORD); var baseUrl = "http://search.pantip.com/" var searchUrl = baseUrl + "ss?s=a&nms=1&sa=Smart+Search&q=" + KEYWORD; var options = { url: searchUrl, headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36', 'accept': 'application/json', 'X-Requested-With': 'XMLHttpRequest' } }; request(options, function(err, resp, html) { $ = cheerio.load(resp.body); links = $('a'); var title = []; $(links).each(function(i, link) { var options = { url: baseUrl + $(link).attr('href'), headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36', 'accept': 'application/json', 'X-Requested-With': 'XMLHttpRequest' } }; request(options, function(err, resp, html) { $ = cheerio.load(resp.body); if (title.indexOf($('.display-post-title').text()) == -1) { title.push($('.display-post-title').text()); var id = $('.display-post-wrapper').attr('id'); if ($('.display-post-title').text() && id) { var topicRef = pantipRef.child(id); topicRef.set({ title: $('.display-post-title').text(), time: $('.display-post-timestamp').children().attr('data-utime'), story: $('.display-post-story').text(), author: $('.display-post-name').text(), author_url: $('.display-post-name').attr('href') }); var options = { url: 'http://pantip.com/forum/topic/render_comments?tid=' + String(id).split("-")[1], headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36', 'accept': 'application/json', 'X-Requested-With': 'XMLHttpRequest' } }; request(options, function(err, resp, html) { var comments = []; var topicId = "topic-" + JSON.parse(resp.body).paging; if (JSON.parse(resp.body).comments) { for (var j = 0; j < JSON.parse(resp.body).comments.length; j++) { comments.push({ "user": JSON.parse(resp.body).comments[j].user.name, "link": JSON.parse(resp.body).comments[j].user.link, "message": JSON.parse(resp.body).comments[j].message, "time": JSON.parse(resp.body).comments[j].data_utime }); if (JSON.parse(resp.body).comments[j].replies.length > 0) { for (var k = 0; k < JSON.parse(resp.body).comments[j].replies.length; k++) { comments.push({ "user": JSON.parse(resp.body).comments[j].replies[k].user.name, "link": JSON.parse(resp.body).comments[j].replies[k].user.link, "message": JSON.parse(resp.body).comments[j].replies[k].message, "time": JSON.parse(resp.body).comments[j].replies[k].data_utime }) } } } } var topicRef = pantipRef.child(topicId); topicRef.update({ comments: comments }) }) } } }); }); })
  • livescoretv - /ego/mmlivescoretv
    Last edited 7 years ago
    var request = require('request'); var cheerio = require('cheerio'); request('http://m.livesoccertv.com/schedules/', function (error, response, html) { if (!error && response.statusCode == 200) { var $ = cheerio.load(html); $('.-fiture.col-xs-12').each(function(i, element){ var match = $(this); // League Name and League Logo var lg = $(match).parent().prev().children().children(); var llogo = $(lg).eq(0).find('img').attr('data-cfsrc'); var lname = $(lg).find('.fll.b_league_name.b_trim_inner').text(); // home and away Teame // var homeLogo = $(match).find('.fll.b_match_teams').children().eq(0). var home = $(match).find('.fll.b_match_teams').children().eq(0).text().trim(); var away = $(match).find('.fll.b_match_teams').children().eq(1).text().trim(); var mins = $(match).find('.fll.b_match_info').children().children().children().eq(1).text(); //var HG = $(match).find('.flr.b_match_counts').children().eq(0).text(); //var AG = $(match).find('.flr.b_match_counts').children().eq(1).text(); console.log(mins) // Output var yodata = { aaaleague : lname, leagueLogo: llogo, minute : mins ,homeT: home, awayT: away , homeGoal: HG, awayGoal: AG}; console.log(yodata); }) } }); // request('http://m.livesoccertv.com/schedules/', function (error, response, html) { // if (!error && response.statusCode == 200) { // var $ = cheerio.load(html); // $('.-live.col-xs-12').each(function(i, element){ // var match = $(this); // // League Name and League Logo // var lg = $(match).parent().prev().children().children(); // var llogo = $(lg).eq(0).find('img').attr('data-cfsrc'); // var lname = $(lg).find('.fll.b_league_name.b_trim_inner').text(); // // home and away Teame // var homeLogo = $(match).find('.fll.b_match_teams').children().eq(0). // var home = $(match).find('.fll.b_match_teams').children().eq(0).text().trim(); // var away = $(match).find('.fll.b_match_teams').children().eq(1).text().trim(); // var mins = $(match).find('.fll.b_match_info').children().children().children().eq(1).text(); // var HG = $(match).find('.flr.b_match_counts').children().eq(0).text(); // var AG = $(match).find('.flr.b_match_counts').children().eq(1).text(); // // Output // var yodata = { aaaleague : lname, leagueLogo: llogo, minute : mins ,homeT: home, awayT: away , homeGoal: HG, awayGoal: AG}; // console.log(yodata); // }); // } // });
  • livescoretv - /ego/livescoretv
    Last edited 7 years ago
    var cheerio = require('cheerio'); var request = require('request'); //var url = 'http://int.soccerway.com/?ICID=TN_01'; var url = 'http://proxy.livesoccertv.com/rss/livescores.json'; request(url, function (error, response, JSON) { if (!error && response.statusCode == 200) { var $ = cheerio.load(JSON); var data = $('team').text(); //var team = data.find('team').text(); console.log(JSON); } }); exports.tonicEndpoint = function(request, response) { response.end(JSON); } /* request(url, function (error, response, body) { if (!error && response.statusCode == 200) { var $ = cheerio.load(body); var get, league, liveTime, match; console.log(body); $('.block_home_matches.real-content').each(function(i, element){ var data = $(this); var get = data.find('.matches.date_matches.grouped').find('tbody') ; var league = get.find('.group-head.live.expanded'); (league).each(function(i,element){ var league = get.find('.group-head.live.expanded').find('th').find('h3').text(); var match = get.find('.highlight.expanded.match'); var liveTime = match.find('td.minute').text(); console.log(league); console.log(liveTime); var metadata = { league : league, liveTime: liveTime, //parseInt(team), //away : away, //score : score, //id : id //title: title, //url: url, //points: parseInt(points), //username: username, //comments: parseInt(comments) }; console.log(metadata); }); }); } }); */