Skip to content

Commit 25dbdd8

Browse files
feat: add examples matching JS SDK
- scrape: basic, json extraction, pdf, multi-format, fetchconfig - extract: basic, with schema - search: basic, with extraction - crawl: basic, with formats - monitor: basic, with webhook - utilities: credits, health, history Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 60e99b6 commit 25dbdd8

17 files changed

Lines changed: 364 additions & 0 deletions

examples/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SGAI_API_KEY=your_api_key_here

examples/crawl/crawl_basic.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from scrapegraph_py import ScrapeGraphAI, CrawlRequest
2+
3+
sgai = ScrapeGraphAI()
4+
5+
start_res = sgai.crawl.start(CrawlRequest(
6+
url="https://example.com",
7+
max_pages=5,
8+
max_depth=2,
9+
))
10+
11+
if start_res.status != "success" or not start_res.data:
12+
print("Failed to start:", start_res.error)
13+
else:
14+
print("Crawl started:", start_res.data["id"])
15+
print("Status:", start_res.data["status"])
16+
17+
get_res = sgai.crawl.get(start_res.data["id"])
18+
if get_res.status == "success":
19+
print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
20+
print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from scrapegraph_py import (
2+
ScrapeGraphAI,
3+
CrawlRequest,
4+
MarkdownFormatConfig,
5+
LinksFormatConfig,
6+
)
7+
8+
sgai = ScrapeGraphAI()
9+
10+
start_res = sgai.crawl.start(CrawlRequest(
11+
url="https://example.com",
12+
max_pages=3,
13+
max_depth=1,
14+
formats=[
15+
MarkdownFormatConfig(),
16+
LinksFormatConfig(),
17+
],
18+
))
19+
20+
if start_res.status != "success" or not start_res.data:
21+
print("Failed to start:", start_res.error)
22+
else:
23+
crawl_id = start_res.data["id"]
24+
print("Crawl started:", crawl_id)
25+
print("Status:", start_res.data["status"])
26+
27+
get_res = sgai.crawl.get(crawl_id)
28+
if get_res.status == "success":
29+
print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
30+
31+
for page in get_res.data.get("pages", []):
32+
print(f"\n Page: {page['url']}")
33+
print(f" Status: {page['status']}")
34+
print(f" Depth: {page['depth']}")

examples/extract/extract_basic.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import json
2+
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
3+
4+
sgai = ScrapeGraphAI()
5+
6+
res = sgai.extract(ExtractRequest(
7+
url="https://example.com",
8+
prompt="What is this page about? Extract the main heading and description.",
9+
))
10+
11+
if res.status == "success":
12+
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
13+
print("\nTokens used:", res.data.get("usage"))
14+
else:
15+
print("Failed:", res.error)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import json
2+
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
3+
4+
sgai = ScrapeGraphAI()
5+
6+
res = sgai.extract(ExtractRequest(
7+
url="https://example.com",
8+
prompt="Extract structured information about this page",
9+
schema={
10+
"type": "object",
11+
"properties": {
12+
"title": {"type": "string"},
13+
"description": {"type": "string"},
14+
"links": {
15+
"type": "array",
16+
"items": {"type": "string"},
17+
},
18+
},
19+
"required": ["title"],
20+
},
21+
))
22+
23+
if res.status == "success":
24+
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
25+
print("\nRaw:", res.data.get("raw"))
26+
print("\nTokens used:", res.data.get("usage"))
27+
else:
28+
print("Failed:", res.error)

examples/monitor/monitor_basic.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
2+
3+
sgai = ScrapeGraphAI()
4+
5+
res = sgai.monitor.create(MonitorCreateRequest(
6+
url="https://example.com",
7+
name="Example Monitor",
8+
interval="0 * * * *",
9+
formats=[MarkdownFormatConfig()],
10+
))
11+
12+
if res.status == "success":
13+
print("Monitor created:", res.data["cronId"])
14+
print("Status:", res.data["status"])
15+
print("Interval:", res.data["interval"])
16+
else:
17+
print("Failed:", res.error)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
2+
3+
sgai = ScrapeGraphAI()
4+
5+
res = sgai.monitor.create(MonitorCreateRequest(
6+
url="https://example.com",
7+
name="Example Monitor with Webhook",
8+
interval="0 */6 * * *",
9+
webhook_url="https://your-webhook-endpoint.com/hook",
10+
formats=[MarkdownFormatConfig()],
11+
))
12+
13+
if res.status == "success":
14+
print("Monitor created:", res.data["cronId"])
15+
print("Status:", res.data["status"])
16+
print("Interval:", res.data["interval"])
17+
print("Webhook configured")
18+
else:
19+
print("Failed:", res.error)

examples/scrape/scrape_basic.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig
2+
3+
sgai = ScrapeGraphAI()
4+
5+
res = sgai.scrape(ScrapeRequest(
6+
url="https://example.com",
7+
formats=[MarkdownFormatConfig()],
8+
))
9+
10+
if res.status == "success":
11+
print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
12+
print(f"\nTook {res.elapsed_ms}ms")
13+
else:
14+
print("Failed:", res.error)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import json
2+
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest, JsonFormatConfig
3+
4+
sgai = ScrapeGraphAI()
5+
6+
res = sgai.scrape(ScrapeRequest(
7+
url="https://example.com",
8+
formats=[
9+
JsonFormatConfig(
10+
prompt="Extract the company name, tagline, and list of features",
11+
schema={
12+
"type": "object",
13+
"properties": {
14+
"companyName": {"type": "string"},
15+
"tagline": {"type": "string"},
16+
"features": {
17+
"type": "array",
18+
"items": {"type": "string"},
19+
},
20+
},
21+
"required": ["companyName"],
22+
},
23+
),
24+
],
25+
))
26+
27+
if res.status == "success":
28+
json_result = res.data["results"].get("json", {})
29+
30+
print("=== JSON Extraction ===\n")
31+
print("Extracted data:")
32+
print(json.dumps(json_result.get("data"), indent=2))
33+
34+
chunker = json_result.get("metadata", {}).get("chunker")
35+
if chunker:
36+
chunks = chunker.get("chunks", [])
37+
print("\nChunker info:")
38+
print(" Chunks:", len(chunks))
39+
print(" Total size:", sum(c.get("size", 0) for c in chunks), "chars")
40+
else:
41+
print("Failed:", res.error)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from scrapegraph_py import (
2+
ScrapeGraphAI,
3+
ScrapeRequest,
4+
MarkdownFormatConfig,
5+
LinksFormatConfig,
6+
ScreenshotFormatConfig,
7+
)
8+
9+
sgai = ScrapeGraphAI()
10+
11+
res = sgai.scrape(ScrapeRequest(
12+
url="https://example.com",
13+
formats=[
14+
MarkdownFormatConfig(),
15+
LinksFormatConfig(),
16+
ScreenshotFormatConfig(width=1280, height=720),
17+
],
18+
))
19+
20+
if res.status == "success":
21+
results = res.data["results"]
22+
23+
print("=== Markdown ===")
24+
print(results.get("markdown", {}).get("data", [""])[0][:500], "...")
25+
26+
print("\n=== Links ===")
27+
links = results.get("links", {}).get("data", [])
28+
print(f"Found {len(links)} links")
29+
for link in links[:5]:
30+
print(f" - {link}")
31+
32+
print("\n=== Screenshot ===")
33+
screenshot = results.get("screenshot", {}).get("data", {})
34+
print(f"URL: {screenshot.get('url')}")
35+
print(f"Size: {screenshot.get('width')}x{screenshot.get('height')}")
36+
else:
37+
print("Failed:", res.error)

0 commit comments

Comments
 (0)