Skip to content

Commit 6dc9f1f

Browse files
committed
Refactor to enable testing with script.py
1 parent a1fd4a9 commit 6dc9f1f

2 files changed

Lines changed: 962 additions & 54 deletions

File tree

terminusdb_client/scripts/scripts.py

Lines changed: 48 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,53 @@
1919
from ..woqlschema.woql_schema import WOQLSchema
2020

2121

22+
def _df_to_schema(class_name, df, np, embedded=None, id_col=None, na_mode=None, keys=None):
23+
"""Convert a pandas DataFrame to a TerminusDB schema class definition.
24+
25+
Args:
26+
class_name: Name of the schema class to create
27+
df: pandas DataFrame with columns to convert
28+
np: numpy module reference
29+
embedded: List of column names to treat as embedded references
30+
id_col: Column name to use as document ID
31+
na_mode: NA handling mode ('error', 'skip', or 'optional')
32+
keys: List of column names to use as keys
33+
34+
Returns:
35+
dict: Schema class definition dictionary
36+
"""
37+
if keys is None:
38+
keys = []
39+
if embedded is None:
40+
embedded = []
41+
42+
class_dict = {"@type": "Class", "@id": class_name}
43+
np_to_builtin = {
44+
v: getattr(builtins, k)
45+
for k, v in np.sctypeDict.items()
46+
if k in vars(builtins)
47+
}
48+
np_to_builtin[np.datetime64] = dt.datetime
49+
50+
for col, dtype in dict(df.dtypes).items():
51+
if embedded and col in embedded:
52+
converted_type = class_name
53+
else:
54+
converted_type = np_to_builtin.get(dtype.type, object)
55+
if converted_type is object:
56+
converted_type = str # pandas treats all strings as objects
57+
converted_type = wt.to_woql_type(converted_type)
58+
59+
if id_col and col == id_col:
60+
class_dict[col] = converted_type
61+
elif na_mode == "optional" and col not in keys:
62+
class_dict[col] = {"@type": "Optional", "@class": converted_type}
63+
else:
64+
class_dict[col] = converted_type
65+
66+
return class_dict
67+
68+
2269
@click.group()
2370
def tdbpy():
2471
pass
@@ -453,40 +500,6 @@ def importcsv(
453500
# "not schema" make it always False if adding the schema option
454501
has_schema = not schema and class_name in client.get_existing_classes()
455502

456-
def _df_to_schema(class_name, df):
457-
class_dict = {"@type": "Class", "@id": class_name}
458-
np_to_buildin = {
459-
v: getattr(builtins, k)
460-
for k, v in np.sctypeDict.items()
461-
if k in vars(builtins)
462-
}
463-
np_to_buildin[np.datetime64] = dt.datetime
464-
for col, dtype in dict(df.dtypes).items():
465-
if embedded and col in embedded:
466-
converted_type = class_name
467-
else:
468-
converted_type = np_to_buildin[dtype.type]
469-
if converted_type is object:
470-
converted_type = str # pandas treats all string as objects
471-
converted_type = wt.to_woql_type(converted_type)
472-
473-
if id_ and col == id_:
474-
class_dict[col] = converted_type
475-
elif na == "optional" and col not in keys:
476-
class_dict[col] = {"@type": "Optional", "@class": converted_type}
477-
else:
478-
class_dict[col] = converted_type
479-
# if id_ is not None:
480-
# pass # don't need key if id is specified
481-
# elif keys:
482-
# class_dict["@key"] = {"@type": "Random"}
483-
# elif na == "optional":
484-
# # have to use random key cause keys will be optional
485-
# class_dict["@key"] = {"@type": "Random"}
486-
# else:
487-
# class_dict["@key"] = {"@type": "Random"}
488-
return class_dict
489-
490503
with pd.read_csv(csv_file, sep=sep, chunksize=chunksize, dtype=dtype) as reader:
491504
for df in tqdm(reader):
492505
if any(df.isna().any()) and na == "error":
@@ -499,7 +512,7 @@ def _df_to_schema(class_name, df):
499512
converted_col = col.lower().replace(" ", "_").replace(".", "_")
500513
df.rename(columns={col: converted_col}, inplace=True)
501514
if not has_schema:
502-
class_dict = _df_to_schema(class_name, df)
515+
class_dict = _df_to_schema(class_name, df, np, embedded=embedded, id_col=id_, na_mode=na, keys=keys)
503516
if message is None:
504517
schema_msg = f"Schema object insert/ update with {csv_file} by Python client."
505518
else:

0 commit comments

Comments
 (0)