Skip to content

Commit 4ae9ddf

Browse files
committed
A few more helpers for datasets
1 parent e606010 commit 4ae9ddf

1 file changed

Lines changed: 27 additions & 3 deletions

File tree

dataikuapi/dss/dataset.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ def get_object_discussions(self):
410410

411411
FS_TYPES = ["Filesystem", "UploadedFiles", "FilesInFolder",
412412
"HDFS", "S3", "Azure", "GCS", "FTP", "SCP", "SFTP"]
413+
# HTTP is FSLike but not FS
413414

414415
SQL_TYPES = ["JDBC", "PostgreSQL", "MySQL", "Vertica", "Snowflake", "Redshift",
415416
"Greenplum", "Teradata", "Oracle", "SQLServer", "SAPHANA", "Netezza",
@@ -495,8 +496,13 @@ def __init__(self, dataset, settings):
495496
self.settings = settings
496497

497498
def get_raw(self):
499+
"""Get the raw dataset settings as a dict"""
498500
return self.settings
499501

502+
def get_raw_params(self):
503+
"""Get the type-specific params, as a raw dict"""
504+
return self.settings["params"]
505+
500506
def get_type(self):
501507
return self.settings["type"]
502508

@@ -509,6 +515,9 @@ def add_discrete_partitioning_dimension(self, dim_name):
509515
def add_time_partitioning_dimension(self, dim_name, period="DAY"):
510516
self.settings["partitioning"]["dimensions"].append({"name": dim_name, "type": "time", "params":{"period": period}})
511517

518+
def add_raw_schema_column(self, column):
519+
self.settings["schema"]["columns"].append(column)
520+
512521
def save(self):
513522
self.dataset.client._perform_empty(
514523
"PUT", "/projects/%s/datasets/%s" % (self.dataset.project_key, self.dataset.dataset_name),
@@ -518,13 +527,21 @@ class FSLikeDatasetSettings(DSSDatasetSettings):
518527
def __init__(self, dataset, settings):
519528
super(FSLikeDatasetSettings, self).__init__(dataset, settings)
520529

521-
def set_format(format_type, format_params = None):
530+
def set_connection_and_path(self, connection, path):
531+
self.settings["params"]["connection"] = connection
532+
self.settings["params"]["path"] = path
533+
534+
def get_raw_format_params(self):
535+
"""Get the raw format parameters as a dict"""
536+
return self.settings["formatParams"]
537+
538+
def set_format(self, format_type, format_params = None):
522539
if format_params is None:
523540
format_params = {}
524541
self.settings["formatType"] = format_type
525542
self.settings["formatParams"] = format_params
526543

527-
def set_csv_format(separator=",", style="excel", skip_rows_before=0, header_row=True, skip_rows_after=0):
544+
def set_csv_format(self, separator=",", style="excel", skip_rows_before=0, header_row=True, skip_rows_after=0):
528545
format_params = {
529546
"style" : style,
530547
"separator": separator,
@@ -541,7 +558,14 @@ class SQLDatasetSettings(DSSDatasetSettings):
541558
def __init__(self, dataset, settings):
542559
super(SQLDatasetSettings, self).__init__(dataset, settings)
543560

544-
561+
def set_table(self, connection, schema, table):
562+
"""Sets this SQL dataset in 'table' mode, targeting a particular table of a connection"""
563+
self.settings["params"].update({
564+
"connection": connection,
565+
"mode": "table",
566+
"schema": schema,
567+
"table": table
568+
})
545569

546570
class DSSManagedDatasetCreationHelper(object):
547571

0 commit comments

Comments
 (0)