11from unittest .mock import MagicMock , patch
2- import pytest
2+
33import httpx
4+ import pytest
45
56from scrapegraph_py import (
6- ScrapeGraphAI ,
7- ScrapeRequest ,
8- ExtractRequest ,
9- SearchRequest ,
107 CrawlRequest ,
8+ ExtractRequest ,
9+ FetchConfig ,
1110 HistoryFilter ,
12- MarkdownFormatConfig ,
1311 HtmlFormatConfig ,
14- LinksFormatConfig ,
1512 ImagesFormatConfig ,
1613 JsonFormatConfig ,
17- ScreenshotFormatConfig ,
18- FetchConfig ,
14+ LinksFormatConfig ,
15+ MarkdownFormatConfig ,
1916 MonitorCreateRequest ,
17+ ScrapeGraphAI ,
18+ ScrapeRequest ,
19+ ScreenshotFormatConfig ,
20+ SearchRequest ,
2021)
2122
2223API_KEY = "test-sgai-key"
@@ -57,17 +58,19 @@ def test_with_fetch_config_js_mode(self):
5758 }
5859 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
5960 sgai = ScrapeGraphAI (api_key = API_KEY )
60- res = sgai .scrape (ScrapeRequest (
61- url = "https://example.com" ,
62- fetch_config = FetchConfig (
63- mode = "js" ,
64- stealth = True ,
65- timeout = 45000 ,
66- wait = 2000 ,
67- scrolls = 3 ,
68- ),
69- formats = [MarkdownFormatConfig ()],
70- ))
61+ res = sgai .scrape (
62+ ScrapeRequest (
63+ url = "https://example.com" ,
64+ fetch_config = FetchConfig (
65+ mode = "js" ,
66+ stealth = True ,
67+ timeout = 45000 ,
68+ wait = 2000 ,
69+ scrolls = 3 ,
70+ ),
71+ formats = [MarkdownFormatConfig ()],
72+ )
73+ )
7174
7275 assert res .status == "success"
7376 _ , kwargs = mock .call_args
@@ -82,15 +85,17 @@ def test_with_fetch_config_headers_cookies(self):
8285 }
8386 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
8487 sgai = ScrapeGraphAI (api_key = API_KEY )
85- res = sgai .scrape (ScrapeRequest (
86- url = "https://example.com" ,
87- fetch_config = FetchConfig (
88- mode = "fast" ,
89- headers = {"X-Custom" : "test" },
90- cookies = {"session" : "abc123" },
91- ),
92- formats = [HtmlFormatConfig ()],
93- ))
88+ res = sgai .scrape (
89+ ScrapeRequest (
90+ url = "https://example.com" ,
91+ fetch_config = FetchConfig (
92+ mode = "fast" ,
93+ headers = {"X-Custom" : "test" },
94+ cookies = {"session" : "abc123" },
95+ ),
96+ formats = [HtmlFormatConfig ()],
97+ )
98+ )
9499
95100 assert res .status == "success"
96101 _ , kwargs = mock .call_args
@@ -109,15 +114,17 @@ def test_multiple_formats(self):
109114 }
110115 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
111116 sgai = ScrapeGraphAI (api_key = API_KEY )
112- res = sgai .scrape (ScrapeRequest (
113- url = "https://example.com" ,
114- formats = [
115- MarkdownFormatConfig (mode = "reader" ),
116- HtmlFormatConfig (mode = "prune" ),
117- LinksFormatConfig (),
118- ImagesFormatConfig (),
119- ],
120- ))
117+ res = sgai .scrape (
118+ ScrapeRequest (
119+ url = "https://example.com" ,
120+ formats = [
121+ MarkdownFormatConfig (mode = "reader" ),
122+ HtmlFormatConfig (mode = "prune" ),
123+ LinksFormatConfig (),
124+ ImagesFormatConfig (),
125+ ],
126+ )
127+ )
121128
122129 assert res .status == "success"
123130 assert res .data ["results" ]["markdown" ] is not None
@@ -132,15 +139,17 @@ def test_json_format_with_schema(self):
132139 }
133140 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
134141 sgai = ScrapeGraphAI (api_key = API_KEY )
135- res = sgai .scrape (ScrapeRequest (
136- url = "https://example.com" ,
137- formats = [
138- JsonFormatConfig (
139- prompt = "Extract product info" ,
140- schema = {"type" : "object" , "properties" : {"title" : {"type" : "string" }}},
141- ),
142- ],
143- ))
142+ res = sgai .scrape (
143+ ScrapeRequest (
144+ url = "https://example.com" ,
145+ formats = [
146+ JsonFormatConfig (
147+ prompt = "Extract product info" ,
148+ schema = {"type" : "object" , "properties" : {"title" : {"type" : "string" }}},
149+ ),
150+ ],
151+ )
152+ )
144153
145154 assert res .status == "success"
146155 _ , kwargs = mock .call_args
@@ -154,18 +163,22 @@ def test_screenshot_format(self):
154163 }
155164 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
156165 sgai = ScrapeGraphAI (api_key = API_KEY )
157- res = sgai .scrape (ScrapeRequest (
158- url = "https://example.com" ,
159- formats = [ScreenshotFormatConfig (full_page = True , width = 1920 , height = 1080 )],
160- ))
166+ res = sgai .scrape (
167+ ScrapeRequest (
168+ url = "https://example.com" ,
169+ formats = [ScreenshotFormatConfig (full_page = True , width = 1920 , height = 1080 )],
170+ )
171+ )
161172
162173 assert res .status == "success"
163174 _ , kwargs = mock .call_args
164175 assert kwargs ["json" ]["formats" ][0 ]["fullPage" ] is True
165176 assert kwargs ["json" ]["formats" ][0 ]["width" ] == 1920
166177
167178 def test_http_401_error (self ):
168- with patch .object (httpx .Client , "request" , return_value = mock_response ({"detail" : "Invalid key" }, 401 )):
179+ with patch .object (
180+ httpx .Client , "request" , return_value = mock_response ({"detail" : "Invalid key" }, 401 )
181+ ):
169182 sgai = ScrapeGraphAI (api_key = API_KEY )
170183 res = sgai .scrape (ScrapeRequest (url = "https://example.com" ))
171184
@@ -207,10 +220,12 @@ def test_success(self):
207220 }
208221 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
209222 sgai = ScrapeGraphAI (api_key = API_KEY )
210- res = sgai .extract (ExtractRequest (
211- url = "https://example.com" ,
212- prompt = "What is this page about?" ,
213- ))
223+ res = sgai .extract (
224+ ExtractRequest (
225+ url = "https://example.com" ,
226+ prompt = "What is this page about?" ,
227+ )
228+ )
214229
215230 assert res .status == "success"
216231 assert res .data ["json" ] == {"title" : "Example" }
@@ -219,11 +234,13 @@ def test_with_schema(self):
219234 body = {"raw" : None , "json" : {"name" : "Test" }, "usage" : {}, "metadata" : {}}
220235 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
221236 sgai = ScrapeGraphAI (api_key = API_KEY )
222- res = sgai .extract (ExtractRequest (
223- url = "https://example.com" ,
224- prompt = "Extract data" ,
225- schema = {"type" : "object" },
226- ))
237+ res = sgai .extract (
238+ ExtractRequest (
239+ url = "https://example.com" ,
240+ prompt = "Extract data" ,
241+ schema = {"type" : "object" },
242+ )
243+ )
227244
228245 assert res .status == "success"
229246 _ , kwargs = mock .call_args
@@ -251,10 +268,12 @@ def test_with_extraction(self):
251268 }
252269 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
253270 sgai = ScrapeGraphAI (api_key = API_KEY )
254- res = sgai .search (SearchRequest (
255- query = "test" ,
256- prompt = "Summarize results" ,
257- ))
271+ res = sgai .search (
272+ SearchRequest (
273+ query = "test" ,
274+ prompt = "Summarize results" ,
275+ )
276+ )
258277
259278 assert res .status == "success"
260279 _ , kwargs = mock .call_args
@@ -266,11 +285,13 @@ def test_start(self):
266285 body = {"id" : "crawl-123" , "status" : "running" , "total" : 0 , "finished" : 0 , "pages" : []}
267286 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
268287 sgai = ScrapeGraphAI (api_key = API_KEY )
269- res = sgai .crawl .start (CrawlRequest (
270- url = "https://example.com" ,
271- max_pages = 10 ,
272- max_depth = 2 ,
273- ))
288+ res = sgai .crawl .start (
289+ CrawlRequest (
290+ url = "https://example.com" ,
291+ max_pages = 10 ,
292+ max_depth = 2 ,
293+ )
294+ )
274295
275296 assert res .status == "success"
276297 assert res .data ["id" ] == "crawl-123"
@@ -313,11 +334,13 @@ def test_create(self):
313334 }
314335 with patch .object (httpx .Client , "request" , return_value = mock_response (body )) as mock :
315336 sgai = ScrapeGraphAI (api_key = API_KEY )
316- res = sgai .monitor .create (MonitorCreateRequest (
317- url = "https://example.com" ,
318- name = "Test Monitor" ,
319- interval = "0 * * * *" ,
320- ))
337+ res = sgai .monitor .create (
338+ MonitorCreateRequest (
339+ url = "https://example.com" ,
340+ name = "Test Monitor" ,
341+ interval = "0 * * * *" ,
342+ )
343+ )
321344
322345 assert res .status == "success"
323346 assert res .data ["cronId" ] == "mon-123"
@@ -397,14 +420,16 @@ class TestCamelCaseSerialization:
397420 def test_snake_to_camel (self ):
398421 with patch .object (httpx .Client , "request" , return_value = mock_response ({})) as mock :
399422 sgai = ScrapeGraphAI (api_key = API_KEY )
400- sgai .scrape (ScrapeRequest (
401- url = "https://example.com" ,
402- content_type = "application/pdf" ,
403- fetch_config = FetchConfig (
404- mode = "js" ,
405- timeout = 30000 ,
406- ),
407- ))
423+ sgai .scrape (
424+ ScrapeRequest (
425+ url = "https://example.com" ,
426+ content_type = "application/pdf" ,
427+ fetch_config = FetchConfig (
428+ mode = "js" ,
429+ timeout = 30000 ,
430+ ),
431+ )
432+ )
408433
409434 _ , kwargs = mock .call_args
410435 body = kwargs ["json" ]
0 commit comments