|
| 1 | +const http = require('https'); |
| 2 | +const JSDOM = require('jsdom').JSDOM; |
| 3 | +const url = require('url'); |
| 4 | + |
| 5 | +const topics = [ |
| 6 | + 'artificial-intelligence', |
| 7 | + 'data-science', |
| 8 | + 'javascript', |
| 9 | + 'programming', |
| 10 | + 'software-engineering', |
| 11 | +]; |
| 12 | + |
| 13 | +function downloadPage(urlToDownload, callback) { |
| 14 | + const request = http.get(urlToDownload, (response) => { |
| 15 | + if (response.statusCode != 200) { |
| 16 | + console.error('Error while downloading page %s.', urlToDownload); |
| 17 | + console.error('Response was: %s %s', response.statusCode, response.statusMessage); |
| 18 | + return; |
| 19 | + } |
| 20 | + |
| 21 | + let content = ''; |
| 22 | + response.on('data', (chunk) => content += chunk.toString()); |
| 23 | + response.on('close', () => callback(content)); |
| 24 | + }); |
| 25 | + request.end(); |
| 26 | +} |
| 27 | + |
| 28 | +function findArticles(document) { |
| 29 | + const articles = {}; |
| 30 | + Array.from(document.querySelectorAll('h1 a, h3 a')) |
| 31 | + .filter(el => { |
| 32 | + const parsedUrl = url.parse(el.href); |
| 33 | + const split = parsedUrl.pathname.split('/').filter((s) => s.trim() != ''); |
| 34 | + return split.length == 2; |
| 35 | + }).forEach(el => { |
| 36 | + const description = el.parentNode.nextSibling.querySelector('p a').text; |
| 37 | + articles[el.text] = { |
| 38 | + description: description, |
| 39 | + link: url.parse(el.href).pathname, |
| 40 | + title: el.text, |
| 41 | + }; |
| 42 | + }); |
| 43 | + return articles; |
| 44 | +} |
| 45 | + |
| 46 | +function printArticle(article) { |
| 47 | + console.log('-----'); |
| 48 | + console.log(` ${article.title}`); |
| 49 | + console.log(` ${article.description}`); |
| 50 | + console.log(` https://medium.com${article.link}`); |
| 51 | +} |
| 52 | + |
| 53 | +topics.forEach(topic => { |
| 54 | + downloadPage(`https://medium.com/topic/${topic}`, (content) => { |
| 55 | + const articles = findArticles(new JSDOM(content).window.document); |
| 56 | + Object.values(articles) |
| 57 | + .forEach(printArticle); |
| 58 | + }); |
| 59 | +}); |
0 commit comments