Skip to content

Commit ad3844f

Browse files
Merge pull request #8 from PlayerData/raw-ball-data-pull
feat: add ball raw data pulls
2 parents 157e37f + 5cd787a commit ad3844f

4 files changed

Lines changed: 421 additions & 0 deletions

File tree

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
"""
2+
Fetch ball data recordings for a session and download raw JSON for each recording
3+
that has a URL. Uses GraphQL for session/recording metadata and HTTP GET for the
4+
raw data files.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import asyncio
10+
import json
11+
import os
12+
import sys
13+
from datetime import datetime, timezone
14+
15+
import httpx
16+
17+
from playerdatapy.constants import API_BASE_URL
18+
from playerdatapy.gqlauth import AuthenticationType, GraphqlAuth
19+
from playerdatapy.gqlclient import Client
20+
21+
# -----------------------------------------------------------------------------
22+
# Config (env or override below)
23+
# -----------------------------------------------------------------------------
24+
CLIENT_ID = os.environ.get("CLIENT_ID")
25+
CLUB_ID = os.environ.get("CLUB_ID")
26+
# -----------------------------------------------------------------------------
27+
# GraphQL
28+
# -----------------------------------------------------------------------------
29+
SESSIONS_QUERY = """
30+
query($clubIdEq: ID!, $startTimeGteq: ISO8601DateTime!, $endTimeLteq: ISO8601DateTime!) {
31+
sessions(filter: { clubIdEq: $clubIdEq, startTimeGteq: $startTimeGteq, endTimeLteq: $endTimeLteq }) {
32+
id
33+
startTime
34+
endTime
35+
}
36+
}
37+
"""
38+
39+
SESSION_BALL_DATA_QUERY = """
40+
query($sessionId: ID!) {
41+
session(id: $sessionId) {
42+
id
43+
startTime
44+
endTime
45+
ballDataRecordings(withData: true) {
46+
id
47+
url(format: json)
48+
ball { id serialNumber }
49+
}
50+
}
51+
}
52+
"""
53+
54+
55+
def _record_count(data: list | dict) -> int:
56+
if isinstance(data, list):
57+
return len(data)
58+
return len(data.get("records", []))
59+
60+
61+
def _format_session_line(i: int, s: dict) -> str:
62+
"""One line for a session: number, start–end, id."""
63+
start = s.get("startTime", "")[:19].replace("T", " ")
64+
end = s.get("endTime", "")[:19].replace("T", " ")
65+
sid = s.get("id", "")
66+
return f" {i}. {start}{end} {sid}"
67+
68+
69+
def _choose_session(sessions: list[dict]) -> dict | None:
70+
"""
71+
Let the user choose a session when running interactively; otherwise use latest.
72+
Returns the chosen session dict or None if invalid/abort.
73+
"""
74+
if not sessions:
75+
return None
76+
77+
print("Sessions (most recent first):")
78+
for i, s in enumerate(sessions, start=1):
79+
print(_format_session_line(i, s))
80+
81+
if not sys.stdin.isatty():
82+
chosen = sessions[0]
83+
print(f"Using latest session: {chosen['id']}")
84+
return chosen
85+
86+
n = len(sessions)
87+
try:
88+
raw = input(f"Select session (1–{n}, or Enter for latest): ").strip()
89+
if not raw:
90+
return sessions[0]
91+
idx = int(raw)
92+
if 1 <= idx <= n:
93+
return sessions[idx - 1]
94+
except (ValueError, EOFError):
95+
pass
96+
print("Invalid choice; using latest session.")
97+
return sessions[0]
98+
99+
100+
async def fetch_recordings_for_session(
101+
client: Client,
102+
session_id: str,
103+
) -> list[dict] | None:
104+
"""Return session dict with ballDataRecordings, or None if not found."""
105+
resp = await client.execute(
106+
query=SESSION_BALL_DATA_QUERY,
107+
variables={"sessionId": session_id},
108+
)
109+
data = client.get_data(resp)
110+
return data.get("session")
111+
112+
113+
async def download_recording(
114+
http_client: httpx.AsyncClient,
115+
recording: dict,
116+
out_dir: str,
117+
) -> bool:
118+
"""Download one recording's raw JSON to out_dir. Returns True if saved, False if skipped."""
119+
url = recording.get("url")
120+
if not url:
121+
ball = recording.get("ball") or {}
122+
serial = ball.get("serialNumber", "?")
123+
print(f" Skip {recording['id']} (Ball {serial}): no URL")
124+
return False
125+
if url.startswith("/"):
126+
url = f"{API_BASE_URL.rstrip('/')}{url}"
127+
128+
ball = recording.get("ball") or {}
129+
serial = ball.get("serialNumber", "?")
130+
131+
try:
132+
r = await http_client.get(url)
133+
r.raise_for_status()
134+
raw = r.json()
135+
except httpx.HTTPStatusError as e:
136+
print(f" Skip {recording['id']} (Ball {serial}): {e.response.status_code}")
137+
return False
138+
except httpx.RequestError as e:
139+
reason = str(e).strip() or type(e).__name__
140+
print(f" Skip {recording['id']} (Ball {serial}): {reason}")
141+
return False
142+
143+
if _record_count(raw) == 0:
144+
print(f" Skip {recording['id']} (Ball {serial}): empty data")
145+
return False
146+
147+
path = os.path.join(out_dir, f"{recording['id']}.json")
148+
with open(path, "w") as f:
149+
json.dump(raw, f, indent=2)
150+
print(f" Ball {serial}: {_record_count(raw)} records -> {path}")
151+
return True
152+
153+
154+
async def main() -> None:
155+
auth = GraphqlAuth(
156+
client_id=CLIENT_ID,
157+
type=AuthenticationType.AUTHORISATION_CODE_FLOW_PCKE,
158+
)
159+
client = Client(
160+
url=f"{API_BASE_URL}/api/graphql",
161+
headers={"Authorization": f"Bearer {auth._get_authentication_token()}"},
162+
)
163+
164+
now = datetime.now(timezone.utc)
165+
# No time filter: get all sessions (use a wide range for the required query params)
166+
list_vars = {
167+
"clubIdEq": CLUB_ID,
168+
"startTimeGteq": datetime(2000, 1, 1, tzinfo=timezone.utc).isoformat(),
169+
"endTimeLteq": now.isoformat(),
170+
}
171+
172+
resp = await client.execute(query=SESSIONS_QUERY, variables=list_vars)
173+
sessions = client.get_data(resp).get("sessions") or []
174+
175+
if not sessions:
176+
print("No sessions found.")
177+
return
178+
print(f"Found {len(sessions)} session(s).")
179+
180+
chosen = _choose_session(sessions)
181+
if not chosen:
182+
return
183+
184+
session = await fetch_recordings_for_session(client, chosen["id"])
185+
if not session:
186+
print(f"Session {chosen['id']} not found.")
187+
return
188+
189+
recordings_with_url = [
190+
r for r in (session.get("ballDataRecordings") or []) if r.get("url")
191+
]
192+
if not recordings_with_url:
193+
print(f"No ball data recordings with URLs for session {session['id']}.")
194+
return
195+
196+
out_dir = session["id"]
197+
os.makedirs(out_dir, exist_ok=True)
198+
print(f"Session {session['id']} ({session['startTime']}{session['endTime']})")
199+
print(f"Downloading {len(recordings_with_url)} recording(s) to {out_dir}/")
200+
201+
headers = {"Authorization": f"Bearer {auth._get_authentication_token()}"}
202+
async with httpx.AsyncClient(headers=headers, timeout=60.0) as http_client:
203+
ok = sum(
204+
await asyncio.gather(
205+
*[
206+
download_recording(http_client, r, out_dir)
207+
for r in recordings_with_url
208+
]
209+
)
210+
)
211+
print(f"Done: {ok}/{len(recordings_with_url)} saved.")
212+
213+
214+
if __name__ == "__main__":
215+
asyncio.run(main())
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
"""
2+
Fetch ball data recordings for a session and download raw JSON for each recording
3+
that has a URL. Uses the Pydantic API (PlayerDataAPI + query builders) for
4+
session/recording metadata and HTTP GET for the raw data files.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import asyncio
10+
import json
11+
import os
12+
import sys
13+
import httpx
14+
15+
from playerdatapy.constants import API_BASE_URL
16+
from playerdatapy.gqlauth import AuthenticationType
17+
from playerdatapy.playerdata_api import PlayerDataAPI
18+
19+
from examples.pydantic.queries.club_sessions import club_sessions
20+
from examples.pydantic.queries.session_ball_data import session_ball_data
21+
22+
# -----------------------------------------------------------------------------
23+
# Config (env or override below)
24+
# -----------------------------------------------------------------------------
25+
CLIENT_ID = os.environ.get("CLIENT_ID")
26+
CLUB_ID = os.environ.get("CLUB_ID")
27+
28+
29+
def _record_count(data: list | dict) -> int:
30+
if isinstance(data, list):
31+
return len(data)
32+
return len(data.get("records", []))
33+
34+
35+
def _format_session_line(i: int, s: dict) -> str:
36+
"""One line for a session: number, start–end, id."""
37+
start = (s.get("startTime") or "")[:19].replace("T", " ")
38+
end = (s.get("endTime") or "")[:19].replace("T", " ")
39+
sid = s.get("id", "")
40+
return f" {i}. {start}{end} {sid}"
41+
42+
43+
def _choose_session(sessions: list[dict]) -> dict | None:
44+
"""
45+
Let the user choose a session when running interactively; otherwise use latest.
46+
Returns the chosen session dict or None if invalid/abort.
47+
"""
48+
if not sessions:
49+
return None
50+
51+
print("Sessions (most recent first):")
52+
for i, s in enumerate(sessions, start=1):
53+
print(_format_session_line(i, s))
54+
55+
if not sys.stdin.isatty():
56+
chosen = sessions[0]
57+
print(f"Using latest session: {chosen['id']}")
58+
return chosen
59+
60+
n = len(sessions)
61+
try:
62+
raw = input(f"Select session (1–{n}, or Enter for latest): ").strip()
63+
if not raw:
64+
return sessions[0]
65+
idx = int(raw)
66+
if 1 <= idx <= n:
67+
return sessions[idx - 1]
68+
except (ValueError, EOFError):
69+
pass
70+
print("Invalid choice; using latest session.")
71+
return sessions[0]
72+
73+
74+
async def download_recording(
75+
http_client: httpx.AsyncClient,
76+
recording: dict,
77+
out_dir: str,
78+
) -> bool:
79+
"""Download one recording's raw JSON to out_dir. Returns True if saved, False if skipped."""
80+
url = recording.get("url")
81+
if not url:
82+
ball = recording.get("ball") or {}
83+
serial = ball.get("serialNumber", "?")
84+
print(f" Skip {recording['id']} (Ball {serial}): no URL")
85+
return False
86+
if url.startswith("/"):
87+
url = f"{API_BASE_URL.rstrip('/')}{url}"
88+
89+
ball = recording.get("ball") or {}
90+
serial = ball.get("serialNumber", "?")
91+
92+
try:
93+
r = await http_client.get(url)
94+
r.raise_for_status()
95+
raw = r.json()
96+
except httpx.HTTPStatusError as e:
97+
print(f" Skip {recording['id']} (Ball {serial}): {e.response.status_code}")
98+
return False
99+
except httpx.RequestError as e:
100+
reason = str(e).strip() or type(e).__name__
101+
print(f" Skip {recording['id']} (Ball {serial}): {reason}")
102+
return False
103+
104+
if _record_count(raw) == 0:
105+
print(f" Skip {recording['id']} (Ball {serial}): empty data")
106+
return False
107+
108+
path = os.path.join(out_dir, f"{recording['id']}.json")
109+
with open(path, "w") as f:
110+
json.dump(raw, f, indent=2)
111+
print(f" Ball {serial}: {_record_count(raw)} records -> {path}")
112+
return True
113+
114+
115+
async def main() -> None:
116+
api = PlayerDataAPI(
117+
client_id=CLIENT_ID,
118+
client_secret="",
119+
authentication_type=AuthenticationType.AUTHORISATION_CODE_FLOW_PCKE,
120+
)
121+
122+
sessions_response = await api.run_queries(
123+
"ClubSessionsQuery",
124+
club_sessions(club_id=CLUB_ID),
125+
)
126+
sessions = sessions_response.get("sessions") or []
127+
128+
if not sessions:
129+
print("No sessions found.")
130+
return
131+
print(f"Found {len(sessions)} session(s).")
132+
133+
chosen = _choose_session(sessions)
134+
if not chosen:
135+
return
136+
137+
session_response = await api.run_queries(
138+
"SessionBallDataQuery",
139+
session_ball_data(chosen["id"]),
140+
)
141+
session = session_response.get("session")
142+
143+
if not session:
144+
print(f"Session {chosen['id']} not found.")
145+
return
146+
147+
recordings_with_url = [
148+
r for r in (session.get("ballDataRecordings") or []) if r.get("url")
149+
]
150+
if not recordings_with_url:
151+
print(f"No ball data recordings with URLs for session {session['id']}.")
152+
return
153+
154+
out_dir = session["id"]
155+
os.makedirs(out_dir, exist_ok=True)
156+
print(f"Session {session['id']} ({session['startTime']}{session['endTime']})")
157+
print(f"Downloading {len(recordings_with_url)} recording(s) to {out_dir}/")
158+
159+
headers = {"Authorization": f"Bearer {api._get_authentication_token()}"}
160+
async with httpx.AsyncClient(headers=headers, timeout=60.0) as http_client:
161+
ok = sum(
162+
await asyncio.gather(
163+
*[
164+
download_recording(http_client, r, out_dir)
165+
for r in recordings_with_url
166+
]
167+
)
168+
)
169+
print(f"Done: {ok}/{len(recordings_with_url)} saved.")
170+
171+
172+
if __name__ == "__main__":
173+
asyncio.run(main())
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from playerdatapy.custom_queries import Query
2+
from playerdatapy.input_types import SessionsSessionFilter
3+
from playerdatapy.custom_fields import SessionInterface
4+
5+
6+
def club_sessions(club_id: str) -> SessionInterface:
7+
return Query.sessions(filter=SessionsSessionFilter(clubIdEq=club_id)).fields(
8+
SessionInterface.id, SessionInterface.start_time, SessionInterface.end_time
9+
)

0 commit comments

Comments
 (0)