Skip to content

Commit d7e1075

Browse files
authored
Merge pull request #15 from TrainingByPackt/lesson-03-web-scraping
Lesson 03 web scraping
2 parents e7532a7 + 73e8656 commit d7e1075

15 files changed

Lines changed: 2355 additions & 5 deletions

File tree

Lesson01/data/products.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ const products = [
174174
{
175175
"price": 2.79,
176176
"unit": "lb",
177-
"name": "Smilling Cookies",
177+
"name": "Smiling Cookies",
178178
"description": "Delicious sandwich cookies with creamy chocolate filling that always smile back to you, even knowing that you will eat them.",
179179
"image": "../images/products/smiley_cookies.jpg",
180180
"tags": [

Lesson01/sample_002/sample-store-front.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ <h1 class="title">Welcome to Fresh Products Store!</h1>
248248
<div class="item">
249249
<div class="image"><img src="../images/products/smiley_cookies.jpg" /></div>
250250
<div class="content">
251-
<a class="header">Smilling Cookies</a>
251+
<a class="header">Smiling Cookies</a>
252252
<div class="meta">
253253
<span>$2.79 / lb</span>
254254
</div>

Lesson03/activity_001/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
products.csv

Lesson03/activity_001/index.js

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
const fs = require('fs');
2+
const http = require('http');
3+
const JSDOM = require('jsdom').JSDOM;
4+
5+
function extractProducts(document) {
6+
const products = [];
7+
console.log('Parsing product data...');
8+
Array.from(document.getElementsByClassName('item'))
9+
.forEach((el) => {
10+
process.stdout.write('.');
11+
const priceAndUnitElement = el.getElementsByTagName('span')[0];
12+
const priceAndUnit = priceAndUnitElement.textContent.split("/");
13+
14+
const price = priceAndUnit[0].trim().substr(1);
15+
const unit = priceAndUnit[1].trim();
16+
17+
const name = el.getElementsByTagName('a')[0].textContent;
18+
19+
products.push({ name, price: parseFloat(price), unit });
20+
});
21+
console.log();
22+
console.log(`Found ${products.length} products.`);
23+
return products;
24+
}
25+
26+
function writeCSV(products) {
27+
const fileName = 'products.csv';
28+
29+
console.log(`Writing data to ${fileName}...`);
30+
fs.open(fileName, 'w', (error, fileDescriptor) => {
31+
if (error != null) {
32+
console.error(`Can not write to file: ${fileName}`, error);
33+
return;
34+
}
35+
36+
// Write header
37+
fs.writeSync(fileDescriptor, 'name,price,unit\n');
38+
39+
// Write content
40+
products.forEach((product) => {
41+
const line = `${product.name},${product.price},${product.unit}\n`;
42+
fs.writeSync(fileDescriptor, line);
43+
});
44+
console.log('Done.');
45+
});
46+
}
47+
48+
const page = 'http://localhost:3000';
49+
console.log(`Downloading ${page}...`);
50+
const request = http.get(page, (response) => {
51+
if (response.statusCode != 200) {
52+
console.error(`Error while fetching page ${page}: ${response.statusCode}`);
53+
console.error(`Status message: ${response.statusMessage}`);
54+
return;
55+
}
56+
57+
let content = '';
58+
response.on('data', (chunk) => content += chunk.toString());
59+
response.on('close', () => {
60+
console.log('Download finished.');
61+
const document = new JSDOM(content).window.document;
62+
writeCSV(extractProducts(document));
63+
});
64+
});
65+
66+
request.end();

0 commit comments

Comments
 (0)