openml · Manas-7854 · Mar 18, 2026 · Mar 18, 2026
diff --git a/openml/cli.py b/openml/cli.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import argparse
+import pickle
 import string
 import sys
 from collections.abc import Callable
@@ -301,6 +302,119 @@ def configure_field(  # noqa: PLR0913
     verbose_set(field, value)
 
 
+def upload_dataset(args: argparse.Namespace) -> None:
+    """Upload a dataset from a CSV or ARFF file to OpenML."""
+    import pandas as pd
+
+    file_path = Path(args.file_path)
+    if not file_path.is_file():
+        print(f"Error: File '{file_path}' not found.")
+        sys.exit(1)
+
+    suffix = file_path.suffix.lower()
+    if suffix == ".csv":
+        data = pd.read_csv(file_path)
+    elif suffix == ".arff":
+        import arff
+
+        with file_path.open() as fh:
+            arff_data = arff.load(fh)
+        data = pd.DataFrame(
+            arff_data["data"],
+            columns=[attr[0] for attr in arff_data["attributes"]],
+        )
+    else:
+        print(f"Error: Unsupported file format '{suffix}'. Supported formats: .csv, .arff")
+        sys.exit(1)
+
+    dataset = openml.datasets.create_dataset(
+        name=args.name,
+        description=args.description,
+        creator=args.creator,
+        contributor=args.contributor,
+        collection_date=args.collection_date,
+        language=args.language,
+        licence=args.licence,
+        attributes="auto",
+        data=data,
+        default_target_attribute=args.default_target_attribute,
+        ignore_attribute=args.ignore_attribute,
+        citation=args.citation or "",
+        row_id_attribute=args.row_id_attribute,
+        original_data_url=args.original_data_url,
+        paper_url=args.paper_url,
+        version_label=args.version_label,
+        update_comment=args.update_comment,
+    )
+    dataset.publish()
+    print(f"Dataset successfully uploaded. ID: {dataset.id}")
+    print(f"URL: {dataset.openml_url}")
+
+
+def upload_flow(args: argparse.Namespace) -> None:
+    """Upload a flow from a serialized model file to OpenML."""
+    from openml_sklearn import SklearnExtension
+
+    file_path = Path(args.file_path)
+    if not file_path.is_file():
+        print(f"Error: File '{file_path}' not found.")
+        sys.exit(1)
+
+    with file_path.open("rb") as fh:
+        model = pickle.load(fh)  # noqa: S301
+
+    extension = SklearnExtension()
+    flow = extension.model_to_flow(model)
+
+    if args.name:
+        flow.custom_name = args.name
+    if args.description:
+        flow.description = args.description
+
+    flow.publish()
+    print(f"Flow successfully uploaded. ID: {flow.flow_id}")
+    print(f"URL: {flow.openml_url}")
+
+
+def upload_run(args: argparse.Namespace) -> None:
+    """Upload a run from a directory containing run files to OpenML."""
+    directory = Path(args.file_path)
+    if not directory.is_dir():
+        print(f"Error: Directory '{directory}' not found.")
+        sys.exit(1)
+
+    expect_model = not args.no_model
+    run = openml.runs.OpenMLRun.from_filesystem(directory, expect_model=expect_model)
+    run.publish()
+    print(f"Run successfully uploaded. ID: {run.run_id}")
+    print(f"URL: {run.openml_url}")
+
+
+def upload(args: argparse.Namespace) -> None:
+    """Dispatch upload subcommands."""
+    if not openml.config.apikey:
+        print(
+            "Error: No API key configured. Set your API key with:\n"
+            "  openml configure apikey\n"
+            "For more information, see: "
+            "https://openml.github.io/openml-python/latest/examples/Basics/"
+            "introduction_tutorial/#authentication",
+        )
+        sys.exit(1)
+
+    upload_functions: dict[str, Callable[[argparse.Namespace], None]] = {
+        "dataset": upload_dataset,
+        "flow": upload_flow,
+        "run": upload_run,
+    }
+
+    if args.upload_resource not in upload_functions:
+        print("Please specify a resource to upload: dataset, flow, or run.")
+        sys.exit(1)
+
+    upload_functions[args.upload_resource](args)
+
+
 def configure(args: argparse.Namespace) -> None:
     """Calls the right submenu(s) to edit `args.field` in the configuration file."""
     set_functions = {
@@ -330,7 +444,10 @@ def not_supported_yet(_: str) -> None:
 
 
 def main() -> None:
-    subroutines = {"configure": configure}
+    subroutines: dict[str, Callable[[argparse.Namespace], None]] = {
+        "configure": configure,
+        "upload": upload,
+    }
 
     parser = argparse.ArgumentParser()
     # Add a global --version flag to display installed version and exit
@@ -371,6 +488,84 @@ def main() -> None:
         help="The value to set the FIELD to.",
     )
 
+    # --- upload subcommand ---
+    parser_upload = subparsers.add_parser(
+        "upload",
+        description="Upload resources (datasets, flows, or runs) to OpenML.",
+    )
+    upload_subparsers = parser_upload.add_subparsers(dest="upload_resource")
+
+    # upload dataset
+    parser_upload_dataset = upload_subparsers.add_parser(
+        "dataset",
+        description="Upload a dataset from a CSV or ARFF file.",
+    )
+    parser_upload_dataset.add_argument(
+        "file_path",
+        type=str,
+        help="Path to the dataset file (.csv or .arff).",
+    )
+    _dataset_args: list[tuple[str, str, bool]] = [
+        ("--name", "Name of the dataset.", True),
+        ("--description", "Description of the dataset.", True),
+        ("--default_target_attribute", "The default target attribute.", True),
+        ("--creator", "The person who created the dataset.", False),
+        ("--contributor", "People who contributed to the dataset.", False),
+        ("--collection_date", "The date the data was originally collected.", False),
+        ("--language", "Language in which the data is represented.", False),
+        ("--licence", "License of the data.", False),
+        ("--ignore_attribute", "Attributes to exclude in modelling (comma separated).", False),
+        ("--citation", "Reference(s) that should be cited.", False),
+        ("--row_id_attribute", "The attribute that represents the row-id column.", False),
+        ("--original_data_url", "URL to the original dataset (for derived data).", False),
+        ("--paper_url", "Link to a paper describing the dataset.", False),
+        ("--version_label", "Version label (e.g. date, hash).", False),
+        ("--update_comment", "An explanation for when the dataset is uploaded.", False),
+    ]
+    for flag, help_text, required in _dataset_args:
+        parser_upload_dataset.add_argument(
+            flag,
+            type=str,
+            required=required,
+            default=None,
+            help=help_text,
+        )
+
+    # upload flow
+    parser_upload_flow = upload_subparsers.add_parser(
+        "flow",
+        description="Upload a flow from a serialized model file (.pkl).",
+    )
+    parser_upload_flow.add_argument(
+        "file_path",
+        type=str,
+        help="Path to the serialized model file (.pkl).",
+    )
+    parser_upload_flow.add_argument("--name", type=str, default=None, help="Custom flow name.")
+    parser_upload_flow.add_argument(
+        "--description",
+        type=str,
+        default=None,
+        help="Description of the flow.",
+    )
+
+    # upload run
+    parser_upload_run = upload_subparsers.add_parser(
+        "run",
+        description="Upload a run from a directory containing run files.",
+    )
+    parser_upload_run.add_argument(
+        "file_path",
+        type=str,
+        help="Path to directory with run files (description.xml, predictions.arff, etc.).",
+    )
+    parser_upload_run.add_argument(
+        "--no_model",
+        action="store_true",
+        default=False,
+        help="If set, do not require model.pkl in the run directory.",
+    )
+
     args = parser.parse_args()
     subroutines.get(args.subroutine, lambda _: parser.print_help())(args)
 

diff --git a/tests/test_openml/test_cli.py b/tests/test_openml/test_cli.py
@@ -4,17 +4,19 @@
 import shutil
 import subprocess
 import sys
+from unittest import mock
 
-import openml
 import pytest
 
+import openml
+from openml.cli import main
+
 
 def test_cli_version_prints_package_version():
     # Invoke the CLI via module to avoid relying on console script installation
-    result = subprocess.run(
+    result = subprocess.run(  # noqa: S603
         [sys.executable, "-m", "openml.cli", "--version"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
+        capture_output=True,
         text=True,
         check=False,
     )
@@ -31,14 +33,80 @@ def test_console_script_version_prints_package_version():
     if console is None:
         pytest.skip("'openml' console script not found in PATH")
 
-    result = subprocess.run(
+    result = subprocess.run(  # noqa: S603
         [console, "--version"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
+        capture_output=True,
         text=True,
         check=False,
     )
 
     assert result.returncode == 0
     assert result.stderr == ""
     assert openml.__version__ in result.stdout
+
+
+def test_upload_dataset_arg_parsing():
+    # Test that the dataset subcommand correctly parses required and optional arguments
+    test_args = [
+        "upload", "dataset", "data.csv",
+        "--name", "MyDataset",
+        "--description", "A test dataset",
+        "--default_target_attribute", "target",
+        "--creator", "TestUser",
+    ]
+    with (
+        mock.patch("sys.argv", ["openml", *test_args]),
+        mock.patch("openml.cli.upload") as mock_upload,
+    ):
+        main()
+        args = mock_upload.call_args[0][0]
+        assert args.subroutine == "upload"
+        assert args.upload_resource == "dataset"
+        assert args.file_path == "data.csv"
+        assert args.name == "MyDataset"
+        assert args.description == "A test dataset"
+        assert args.default_target_attribute == "target"
+        assert args.creator == "TestUser"
+        assert args.contributor is None
+        assert args.licence is None
+
+
+def test_upload_flow_arg_parsing():
+    # Test that the flow subcommand correctly parses positional and optional arguments
+    test_args = ["upload", "flow", "model.pkl", "--name", "MyFlow", "--description", "A flow"]
+    with (
+        mock.patch("sys.argv", ["openml", *test_args]),
+        mock.patch("openml.cli.upload") as mock_upload,
+    ):
+        main()
+        args = mock_upload.call_args[0][0]
+        assert args.upload_resource == "flow"
+        assert args.file_path == "model.pkl"
+        assert args.name == "MyFlow"
+        assert args.description == "A flow"
+
+
+def test_upload_run_arg_parsing():
+    # Test that the run subcommand correctly parses positional and flag arguments
+    test_args = ["upload", "run", "/path/to/run_dir", "--no_model"]
+    with (
+        mock.patch("sys.argv", ["openml", *test_args]),
+        mock.patch("openml.cli.upload") as mock_upload,
+    ):
+        main()
+        args = mock_upload.call_args[0][0]
+        assert args.upload_resource == "run"
+        assert args.file_path == "/path/to/run_dir"
+        assert args.no_model is True
+
+
+def test_upload_run_no_model_defaults_false():
+    # Test that the --no_model flag defaults to False if not provided
+    test_args = ["upload", "run", "/path/to/run_dir"]
+    with (
+        mock.patch("sys.argv", ["openml", *test_args]),
+        mock.patch("openml.cli.upload") as mock_upload,
+    ):
+        main()
+        args = mock_upload.call_args[0][0]
+        assert args.no_model is False