feat: add examples matching JS SDK

FrancescoSaverioZuppichini · claude · FrancescoSaverioZuppichini · commit 25dbdd8f346f · 2026-04-14T22:29:56.000+02:00
- scrape: basic, json extraction, pdf, multi-format, fetchconfig
- extract: basic, with schema
- search: basic, with extraction
- crawl: basic, with formats
- monitor: basic, with webhook
- utilities: credits, health, history

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/examples/.env.example b/examples/.env.example
@@ -0,0 +1 @@
+SGAI_API_KEY=your_api_key_here
diff --git a/examples/crawl/crawl_basic.py b/examples/crawl/crawl_basic.py
@@ -0,0 +1,20 @@
+from scrapegraph_py import ScrapeGraphAI, CrawlRequest
+
+sgai = ScrapeGraphAI()
+
+start_res = sgai.crawl.start(CrawlRequest(
+    url="https://example.com",
+    max_pages=5,
+    max_depth=2,
+))
+
+if start_res.status != "success" or not start_res.data:
+    print("Failed to start:", start_res.error)
+else:
+    print("Crawl started:", start_res.data["id"])
+    print("Status:", start_res.data["status"])
+
+    get_res = sgai.crawl.get(start_res.data["id"])
+    if get_res.status == "success":
+        print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
+        print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
diff --git a/examples/crawl/crawl_with_formats.py b/examples/crawl/crawl_with_formats.py
@@ -0,0 +1,34 @@
+from scrapegraph_py import (
+    ScrapeGraphAI,
+    CrawlRequest,
+    MarkdownFormatConfig,
+    LinksFormatConfig,
+)
+
+sgai = ScrapeGraphAI()
+
+start_res = sgai.crawl.start(CrawlRequest(
+    url="https://example.com",
+    max_pages=3,
+    max_depth=1,
+    formats=[
+        MarkdownFormatConfig(),
+        LinksFormatConfig(),
+    ],
+))
+
+if start_res.status != "success" or not start_res.data:
+    print("Failed to start:", start_res.error)
+else:
+    crawl_id = start_res.data["id"]
+    print("Crawl started:", crawl_id)
+    print("Status:", start_res.data["status"])
+
+    get_res = sgai.crawl.get(crawl_id)
+    if get_res.status == "success":
+        print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
+
+        for page in get_res.data.get("pages", []):
+            print(f"\n  Page: {page['url']}")
+            print(f"  Status: {page['status']}")
+            print(f"  Depth: {page['depth']}")
diff --git a/examples/extract/extract_basic.py b/examples/extract/extract_basic.py
@@ -0,0 +1,15 @@
+import json
+from scrapegraph_py import ScrapeGraphAI, ExtractRequest
+
+sgai = ScrapeGraphAI()
+
+res = sgai.extract(ExtractRequest(
+    url="https://example.com",
+    prompt="What is this page about? Extract the main heading and description.",
+))
+
+if res.status == "success":
+    print("Extracted:", json.dumps(res.data.get("json"), indent=2))
+    print("\nTokens used:", res.data.get("usage"))
+else:
+    print("Failed:", res.error)
diff --git a/examples/extract/extract_with_schema.py b/examples/extract/extract_with_schema.py
@@ -0,0 +1,28 @@
+import json
+from scrapegraph_py import ScrapeGraphAI, ExtractRequest
+
+sgai = ScrapeGraphAI()
+
+res = sgai.extract(ExtractRequest(
+    url="https://example.com",
+    prompt="Extract structured information about this page",
+    schema={
+        "type": "object",
+        "properties": {
+            "title": {"type": "string"},
+            "description": {"type": "string"},
+            "links": {
+                "type": "array",
+                "items": {"type": "string"},
+            },
+        },
+        "required": ["title"],
+    },
+))
+
+if res.status == "success":
+    print("Extracted:", json.dumps(res.data.get("json"), indent=2))
+    print("\nRaw:", res.data.get("raw"))
+    print("\nTokens used:", res.data.get("usage"))
+else:
+    print("Failed:", res.error)
diff --git a/examples/monitor/monitor_basic.py b/examples/monitor/monitor_basic.py
@@ -0,0 +1,17 @@
+from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
+
+sgai = ScrapeGraphAI()
+
+res = sgai.monitor.create(MonitorCreateRequest(
+    url="https://example.com",
+    name="Example Monitor",
+    interval="0 * * * *",
+    formats=[MarkdownFormatConfig()],
+))
+
+if res.status == "success":
+    print("Monitor created:", res.data["cronId"])
+    print("Status:", res.data["status"])
+    print("Interval:", res.data["interval"])
+else:
+    print("Failed:", res.error)
diff --git a/examples/monitor/monitor_with_webhook.py b/examples/monitor/monitor_with_webhook.py
@@ -0,0 +1,19 @@
+from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
+
+sgai = ScrapeGraphAI()
+
+res = sgai.monitor.create(MonitorCreateRequest(
+    url="https://example.com",
+    name="Example Monitor with Webhook",
+    interval="0 */6 * * *",
+    webhook_url="https://your-webhook-endpoint.com/hook",
+    formats=[MarkdownFormatConfig()],
+))
+
+if res.status == "success":
+    print("Monitor created:", res.data["cronId"])
+    print("Status:", res.data["status"])
+    print("Interval:", res.data["interval"])
+    print("Webhook configured")
+else:
+    print("Failed:", res.error)
diff --git a/examples/scrape/scrape_basic.py b/examples/scrape/scrape_basic.py
@@ -0,0 +1,14 @@
+from scrapegraph_py import ScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig
+
+sgai = ScrapeGraphAI()
+
+res = sgai.scrape(ScrapeRequest(
+    url="https://example.com",
+    formats=[MarkdownFormatConfig()],
+))
+
+if res.status == "success":
+    print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
+    print(f"\nTook {res.elapsed_ms}ms")
+else:
+    print("Failed:", res.error)
diff --git a/examples/scrape/scrape_json_extraction.py b/examples/scrape/scrape_json_extraction.py
@@ -0,0 +1,41 @@
+import json
+from scrapegraph_py import ScrapeGraphAI, ScrapeRequest, JsonFormatConfig
+
+sgai = ScrapeGraphAI()
+
+res = sgai.scrape(ScrapeRequest(
+    url="https://example.com",
+    formats=[
+        JsonFormatConfig(
+            prompt="Extract the company name, tagline, and list of features",
+            schema={
+                "type": "object",
+                "properties": {
+                    "companyName": {"type": "string"},
+                    "tagline": {"type": "string"},
+                    "features": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                    },
+                },
+                "required": ["companyName"],
+            },
+        ),
+    ],
+))
+
+if res.status == "success":
+    json_result = res.data["results"].get("json", {})
+
+    print("=== JSON Extraction ===\n")
+    print("Extracted data:")
+    print(json.dumps(json_result.get("data"), indent=2))
+
+    chunker = json_result.get("metadata", {}).get("chunker")
+    if chunker:
+        chunks = chunker.get("chunks", [])
+        print("\nChunker info:")
+        print("  Chunks:", len(chunks))
+        print("  Total size:", sum(c.get("size", 0) for c in chunks), "chars")
+else:
+    print("Failed:", res.error)
diff --git a/examples/scrape/scrape_multi_format.py b/examples/scrape/scrape_multi_format.py
@@ -0,0 +1,37 @@
+from scrapegraph_py import (
+    ScrapeGraphAI,
+    ScrapeRequest,
+    MarkdownFormatConfig,
+    LinksFormatConfig,
+    ScreenshotFormatConfig,
+)
+
+sgai = ScrapeGraphAI()
+
+res = sgai.scrape(ScrapeRequest(
+    url="https://example.com",
+    formats=[
+        MarkdownFormatConfig(),
+        LinksFormatConfig(),
+        ScreenshotFormatConfig(width=1280, height=720),
+    ],
+))
+
+if res.status == "success":
+    results = res.data["results"]
+
+    print("=== Markdown ===")
+    print(results.get("markdown", {}).get("data", [""])[0][:500], "...")
+
+    print("\n=== Links ===")
+    links = results.get("links", {}).get("data", [])
+    print(f"Found {len(links)} links")
+    for link in links[:5]:
+        print(f"  - {link}")
+
+    print("\n=== Screenshot ===")
+    screenshot = results.get("screenshot", {}).get("data", {})
+    print(f"URL: {screenshot.get('url')}")
+    print(f"Size: {screenshot.get('width')}x{screenshot.get('height')}")
+else:
+    print("Failed:", res.error)
diff --git a/examples/scrape/scrape_pdf.py b/examples/scrape/scrape_pdf.py
@@ -0,0 +1,15 @@
+from scrapegraph_py import ScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig
+
+sgai = ScrapeGraphAI()
+
+res = sgai.scrape(ScrapeRequest(
+    url="https://www.w3.org/WAI/WCAG21/Techniques/pdf/img/table-word.pdf",
+    content_type="application/pdf",
+    formats=[MarkdownFormatConfig()],
+))
+
+if res.status == "success":
+    print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
+    print(f"\nTook {res.elapsed_ms}ms")
+else:
+    print("Failed:", res.error)
diff --git a/examples/scrape/scrape_with_fetchconfig.py b/examples/scrape/scrape_with_fetchconfig.py
@@ -0,0 +1,20 @@
+from scrapegraph_py import ScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig, FetchConfig
+
+sgai = ScrapeGraphAI()
+
+res = sgai.scrape(ScrapeRequest(
+    url="https://example.com",
+    formats=[MarkdownFormatConfig()],
+    fetch_config=FetchConfig(
+        mode="js",
+        timeout=45000,
+        wait=2000,
+        stealth=True,
+    ),
+))
+
+if res.status == "success":
+    print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
+    print(f"\nTook {res.elapsed_ms}ms")
+else:
+    print("Failed:", res.error)
diff --git a/examples/search/search_basic.py b/examples/search/search_basic.py
@@ -0,0 +1,16 @@
+from scrapegraph_py import ScrapeGraphAI, SearchRequest
+
+sgai = ScrapeGraphAI()
+
+res = sgai.search(SearchRequest(
+    query="best programming languages 2024",
+    num_results=3,
+))
+
+if res.status == "success":
+    for result in res.data.get("results", []):
+        print(f"\n{result['title']}")
+        print(f"URL: {result['url']}")
+        print(f"Content: {result['content'][:200]}...")
+else:
+    print("Failed:", res.error)
diff --git a/examples/search/search_with_extraction.py b/examples/search/search_with_extraction.py
@@ -0,0 +1,36 @@
+import json
+from scrapegraph_py import ScrapeGraphAI, SearchRequest
+
+sgai = ScrapeGraphAI()
+
+res = sgai.search(SearchRequest(
+    query="best programming languages 2024",
+    num_results=3,
+    prompt="Summarize the top programming languages mentioned and why they are recommended",
+    schema={
+        "type": "object",
+        "properties": {
+            "languages": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "reason": {"type": "string"},
+                    },
+                },
+            },
+        },
+    },
+))
+
+if res.status == "success":
+    print("=== Search Results ===")
+    for result in res.data.get("results", []):
+        print(f"\n{result['title']}")
+        print(f"URL: {result['url']}")
+
+    print("\n=== Extracted Summary ===")
+    print(json.dumps(res.data.get("json"), indent=2))
+else:
+    print("Failed:", res.error)
diff --git a/examples/utilities/credits.py b/examples/utilities/credits.py
@@ -0,0 +1,16 @@
+from scrapegraph_py import ScrapeGraphAI
+
+sgai = ScrapeGraphAI()
+
+res = sgai.credits()
+
+if res.status == "success":
+    data = res.data
+    print("Plan:", data["plan"])
+    print("Remaining credits:", data["remaining"])
+    print("Used credits:", data["used"])
+    print("\nJob limits:")
+    print("  Crawl:", data["jobs"]["crawl"]["used"], "/", data["jobs"]["crawl"]["limit"])
+    print("  Monitor:", data["jobs"]["monitor"]["used"], "/", data["jobs"]["monitor"]["limit"])
+else:
+    print("Failed:", res.error)
diff --git a/examples/utilities/health.py b/examples/utilities/health.py
@@ -0,0 +1,16 @@
+from scrapegraph_py import ScrapeGraphAI
+
+sgai = ScrapeGraphAI()
+
+res = sgai.health()
+
+if res.status == "success":
+    data = res.data
+    print("Status:", data["status"])
+    print("Uptime:", data["uptime"], "seconds")
+    if data.get("services"):
+        print("Services:")
+        print("  Redis:", data["services"]["redis"])
+        print("  DB:", data["services"]["db"])
+else:
+    print("Failed:", res.error)
diff --git a/examples/utilities/history.py b/examples/utilities/history.py
@@ -0,0 +1,19 @@
+from scrapegraph_py import ScrapeGraphAI, HistoryFilter
+
+sgai = ScrapeGraphAI()
+
+res = sgai.history.list(HistoryFilter(limit=5))
+
+if res.status == "success":
+    data = res.data
+    print(f"Total: {data['pagination']['total']}")
+    print(f"Page: {data['pagination']['page']} / {(data['pagination']['total'] // data['pagination']['limit']) + 1}")
+
+    for entry in data["data"]:
+        print(f"\n  ID: {entry['id']}")
+        print(f"  Service: {entry['service']}")
+        print(f"  Status: {entry['status']}")
+        print(f"  Created: {entry['createdAt']}")
+        print(f"  Elapsed: {entry['elapsedMs']}ms")
+else:
+    print("Failed:", res.error)