Skip to content

Commit 4db4b15

Browse files
committed
Naming scheme adjustments; additional exception.
1 parent 6ba3c74 commit 4db4b15

5 files changed

Lines changed: 20 additions & 14 deletions

File tree

aiohttp_aiofiles_tutorial/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ async def init_script():
2020
await outfile.write("title,description,primary_tag,url,published_at\n")
2121
await execute_fetcher_tasks(outfile)
2222
await outfile.close()
23-
LOGGER.success(f"Executed {__name__} in {time.perf_counter() - start_time:0.2f} seconds.")
23+
LOGGER.success(
24+
f"Executed {__name__} in {time.perf_counter() - start_time:0.2f} seconds."
25+
)
2426

2527

2628
async def execute_fetcher_tasks(outfile: AsyncIOFile):

aiohttp_aiofiles_tutorial/parser.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Parse metadata from raw HTML."""
22
from bs4 import BeautifulSoup
3+
from bs4.builder import ParserRejectedMarkup
34
from logger import LOGGER
45

56

6-
async def parse_html_page_data(html: str, url: str) -> str:
7+
async def parse_html_page_metadata(html: str, url: str) -> str:
78
"""
8-
Extract page title from raw HTML of fetched URL; return a title
9+
Extract page metadata from raw HTML into a CSV row.
910
1011
:param str html: Raw HTML source of a given fetched URL.
1112
:param str url: URL associated with the extracted HTML.
@@ -33,6 +34,8 @@ async def parse_html_page_data(html: str, url: str) -> str:
3334
if primary_tag is None:
3435
primary_tag = ""
3536
return f"{title}, {description}, {primary_tag}, {url}, {published_at}"
37+
except ParserRejectedMarkup as e:
38+
LOGGER.error(f"Failed to parse invalid html for {url}: {e}")
3639
except ValueError as e:
3740
LOGGER.error(f"ValueError occurred when parsing html for {url}: {e}")
3841
except Exception as e:

aiohttp_aiofiles_tutorial/tests/test_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest_asyncio
55
from config import BASE_DIR
66

7-
from aiohttp_aiofiles_tutorial.parser import parse_html_page_data
7+
from aiohttp_aiofiles_tutorial.parser import parse_html_page_metadata
88

99

1010
@pytest_asyncio.fixture
@@ -25,6 +25,6 @@ async def test_parse_html_page_data(sample_page_metadata):
2525
async with aiofiles.open(test_file, mode="r") as file:
2626
html = await file.read()
2727
url = "https://hackersandslackers.com/intro-to-asyncio-concurrency/"
28-
metadata = await parse_html_page_data(html, url)
28+
metadata = await parse_html_page_metadata(html, url)
2929
assert metadata == sample_page_metadata
3030
await file.close()

aiohttp_aiofiles_tutorial/writer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from aiofiles.threadpool.text import AsyncTextIOWrapper as AsyncIOFile
33
from logger import LOGGER
44

5-
from .parser import parse_html_page_data
5+
from .parser import parse_html_page_metadata
66

77

88
async def write_to_outfile(
@@ -18,8 +18,8 @@ async def write_to_outfile(
1818
:param int i: Current iteration of URL out of total URLs.
1919
"""
2020
try:
21-
page_title = await parse_html_page_data(html, url)
22-
await outfile.write(f"{page_title}\n")
23-
LOGGER.info(f"Fetched URL {i} of {total_count}: {page_title}")
21+
page_metadata = await parse_html_page_metadata(html, url)
22+
await outfile.write(f"{page_metadata}\n")
23+
LOGGER.info(f"Fetched URL {i} of {total_count}: {page_metadata}")
2424
except Exception as e:
2525
LOGGER.error(f"Unexpected error while writing page title: {e}")

poetry.lock

Lines changed: 6 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)