1919from ..woqlschema .woql_schema import WOQLSchema
2020
2121
22+ def _df_to_schema (class_name , df , np , embedded = None , id_col = None , na_mode = None , keys = None ):
23+ """Convert a pandas DataFrame to a TerminusDB schema class definition.
24+
25+ Args:
26+ class_name: Name of the schema class to create
27+ df: pandas DataFrame with columns to convert
28+ np: numpy module reference
29+ embedded: List of column names to treat as embedded references
30+ id_col: Column name to use as document ID
31+ na_mode: NA handling mode ('error', 'skip', or 'optional')
32+ keys: List of column names to use as keys
33+
34+ Returns:
35+ dict: Schema class definition dictionary
36+ """
37+ if keys is None :
38+ keys = []
39+ if embedded is None :
40+ embedded = []
41+
42+ class_dict = {"@type" : "Class" , "@id" : class_name }
43+ np_to_builtin = {
44+ v : getattr (builtins , k )
45+ for k , v in np .sctypeDict .items ()
46+ if k in vars (builtins )
47+ }
48+ np_to_builtin [np .datetime64 ] = dt .datetime
49+
50+ for col , dtype in dict (df .dtypes ).items ():
51+ if embedded and col in embedded :
52+ converted_type = class_name
53+ else :
54+ converted_type = np_to_builtin .get (dtype .type , object )
55+ if converted_type is object :
56+ converted_type = str # pandas treats all strings as objects
57+ converted_type = wt .to_woql_type (converted_type )
58+
59+ if id_col and col == id_col :
60+ class_dict [col ] = converted_type
61+ elif na_mode == "optional" and col not in keys :
62+ class_dict [col ] = {"@type" : "Optional" , "@class" : converted_type }
63+ else :
64+ class_dict [col ] = converted_type
65+
66+ return class_dict
67+
68+
2269@click .group ()
2370def tdbpy ():
2471 pass
@@ -453,40 +500,6 @@ def importcsv(
453500 # "not schema" make it always False if adding the schema option
454501 has_schema = not schema and class_name in client .get_existing_classes ()
455502
456- def _df_to_schema (class_name , df ):
457- class_dict = {"@type" : "Class" , "@id" : class_name }
458- np_to_buildin = {
459- v : getattr (builtins , k )
460- for k , v in np .sctypeDict .items ()
461- if k in vars (builtins )
462- }
463- np_to_buildin [np .datetime64 ] = dt .datetime
464- for col , dtype in dict (df .dtypes ).items ():
465- if embedded and col in embedded :
466- converted_type = class_name
467- else :
468- converted_type = np_to_buildin [dtype .type ]
469- if converted_type is object :
470- converted_type = str # pandas treats all string as objects
471- converted_type = wt .to_woql_type (converted_type )
472-
473- if id_ and col == id_ :
474- class_dict [col ] = converted_type
475- elif na == "optional" and col not in keys :
476- class_dict [col ] = {"@type" : "Optional" , "@class" : converted_type }
477- else :
478- class_dict [col ] = converted_type
479- # if id_ is not None:
480- # pass # don't need key if id is specified
481- # elif keys:
482- # class_dict["@key"] = {"@type": "Random"}
483- # elif na == "optional":
484- # # have to use random key cause keys will be optional
485- # class_dict["@key"] = {"@type": "Random"}
486- # else:
487- # class_dict["@key"] = {"@type": "Random"}
488- return class_dict
489-
490503 with pd .read_csv (csv_file , sep = sep , chunksize = chunksize , dtype = dtype ) as reader :
491504 for df in tqdm (reader ):
492505 if any (df .isna ().any ()) and na == "error" :
@@ -499,7 +512,7 @@ def _df_to_schema(class_name, df):
499512 converted_col = col .lower ().replace (" " , "_" ).replace ("." , "_" )
500513 df .rename (columns = {col : converted_col }, inplace = True )
501514 if not has_schema :
502- class_dict = _df_to_schema (class_name , df )
515+ class_dict = _df_to_schema (class_name , df , np , embedded = embedded , id_col = id_ , na_mode = na , keys = keys )
503516 if message is None :
504517 schema_msg = f"Schema object insert/ update with { csv_file } by Python client."
505518 else :
0 commit comments