Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 196 additions & 1 deletion openml/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import argparse
import pickle
import string
import sys
from collections.abc import Callable
Expand Down Expand Up @@ -301,6 +302,119 @@ def configure_field( # noqa: PLR0913
verbose_set(field, value)


def upload_dataset(args: argparse.Namespace) -> None:
"""Upload a dataset from a CSV or ARFF file to OpenML."""
import pandas as pd

file_path = Path(args.file_path)
if not file_path.is_file():
print(f"Error: File '{file_path}' not found.")
sys.exit(1)

suffix = file_path.suffix.lower()
if suffix == ".csv":
data = pd.read_csv(file_path)
elif suffix == ".arff":
import arff

with file_path.open() as fh:
arff_data = arff.load(fh)
data = pd.DataFrame(
arff_data["data"],
columns=[attr[0] for attr in arff_data["attributes"]],
)
else:
print(f"Error: Unsupported file format '{suffix}'. Supported formats: .csv, .arff")
sys.exit(1)

dataset = openml.datasets.create_dataset(
name=args.name,
description=args.description,
creator=args.creator,
contributor=args.contributor,
collection_date=args.collection_date,
language=args.language,
licence=args.licence,
attributes="auto",
data=data,
default_target_attribute=args.default_target_attribute,
ignore_attribute=args.ignore_attribute,
citation=args.citation or "",
row_id_attribute=args.row_id_attribute,
original_data_url=args.original_data_url,
paper_url=args.paper_url,
version_label=args.version_label,
update_comment=args.update_comment,
)
dataset.publish()
print(f"Dataset successfully uploaded. ID: {dataset.id}")
print(f"URL: {dataset.openml_url}")


def upload_flow(args: argparse.Namespace) -> None:
"""Upload a flow from a serialized model file to OpenML."""
from openml_sklearn import SklearnExtension

Comment on lines +354 to +356
file_path = Path(args.file_path)
if not file_path.is_file():
print(f"Error: File '{file_path}' not found.")
sys.exit(1)

with file_path.open("rb") as fh:
model = pickle.load(fh) # noqa: S301

Comment thread
Manas-7854 marked this conversation as resolved.
extension = SklearnExtension()
flow = extension.model_to_flow(model)

if args.name:
flow.custom_name = args.name
if args.description:
flow.description = args.description

flow.publish()
print(f"Flow successfully uploaded. ID: {flow.flow_id}")
print(f"URL: {flow.openml_url}")


def upload_run(args: argparse.Namespace) -> None:
"""Upload a run from a directory containing run files to OpenML."""
directory = Path(args.file_path)
if not directory.is_dir():
print(f"Error: Directory '{directory}' not found.")
sys.exit(1)

expect_model = not args.no_model
run = openml.runs.OpenMLRun.from_filesystem(directory, expect_model=expect_model)
run.publish()
print(f"Run successfully uploaded. ID: {run.run_id}")
print(f"URL: {run.openml_url}")


def upload(args: argparse.Namespace) -> None:
"""Dispatch upload subcommands."""
if not openml.config.apikey:
print(
"Error: No API key configured. Set your API key with:\n"
" openml configure apikey\n"
"For more information, see: "
"https://openml.github.io/openml-python/latest/examples/Basics/"
"introduction_tutorial/#authentication",
)
sys.exit(1)

upload_functions: dict[str, Callable[[argparse.Namespace], None]] = {
"dataset": upload_dataset,
"flow": upload_flow,
"run": upload_run,
}

if args.upload_resource not in upload_functions:
print("Please specify a resource to upload: dataset, flow, or run.")
sys.exit(1)

upload_functions[args.upload_resource](args)


def configure(args: argparse.Namespace) -> None:
"""Calls the right submenu(s) to edit `args.field` in the configuration file."""
set_functions = {
Expand Down Expand Up @@ -330,7 +444,10 @@ def not_supported_yet(_: str) -> None:


def main() -> None:
subroutines = {"configure": configure}
subroutines: dict[str, Callable[[argparse.Namespace], None]] = {
"configure": configure,
"upload": upload,
}

parser = argparse.ArgumentParser()
# Add a global --version flag to display installed version and exit
Expand Down Expand Up @@ -371,6 +488,84 @@ def main() -> None:
help="The value to set the FIELD to.",
)

# --- upload subcommand ---
parser_upload = subparsers.add_parser(
"upload",
description="Upload resources (datasets, flows, or runs) to OpenML.",
)
upload_subparsers = parser_upload.add_subparsers(dest="upload_resource")

# upload dataset
parser_upload_dataset = upload_subparsers.add_parser(
"dataset",
description="Upload a dataset from a CSV or ARFF file.",
)
parser_upload_dataset.add_argument(
"file_path",
type=str,
help="Path to the dataset file (.csv or .arff).",
)
_dataset_args: list[tuple[str, str, bool]] = [
("--name", "Name of the dataset.", True),
("--description", "Description of the dataset.", True),
("--default_target_attribute", "The default target attribute.", True),
("--creator", "The person who created the dataset.", False),
Comment on lines +512 to +516
("--contributor", "People who contributed to the dataset.", False),
("--collection_date", "The date the data was originally collected.", False),
("--language", "Language in which the data is represented.", False),
("--licence", "License of the data.", False),
("--ignore_attribute", "Attributes to exclude in modelling (comma separated).", False),
("--citation", "Reference(s) that should be cited.", False),
("--row_id_attribute", "The attribute that represents the row-id column.", False),
("--original_data_url", "URL to the original dataset (for derived data).", False),
("--paper_url", "Link to a paper describing the dataset.", False),
("--version_label", "Version label (e.g. date, hash).", False),
("--update_comment", "An explanation for when the dataset is uploaded.", False),
]
for flag, help_text, required in _dataset_args:
parser_upload_dataset.add_argument(
flag,
type=str,
required=required,
default=None,
help=help_text,
)

# upload flow
parser_upload_flow = upload_subparsers.add_parser(
"flow",
description="Upload a flow from a serialized model file (.pkl).",
)
parser_upload_flow.add_argument(
"file_path",
type=str,
help="Path to the serialized model file (.pkl).",
)
parser_upload_flow.add_argument("--name", type=str, default=None, help="Custom flow name.")
parser_upload_flow.add_argument(
"--description",
type=str,
default=None,
help="Description of the flow.",
)

# upload run
parser_upload_run = upload_subparsers.add_parser(
"run",
description="Upload a run from a directory containing run files.",
)
parser_upload_run.add_argument(
"file_path",
type=str,
help="Path to directory with run files (description.xml, predictions.arff, etc.).",
)
Comment on lines +559 to +566
Comment thread
Manas-7854 marked this conversation as resolved.
parser_upload_run.add_argument(
"--no_model",
action="store_true",
default=False,
help="If set, do not require model.pkl in the run directory.",
)

args = parser.parse_args()
subroutines.get(args.subroutine, lambda _: parser.print_help())(args)

Expand Down
82 changes: 75 additions & 7 deletions tests/test_openml/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@
import shutil
import subprocess
import sys
from unittest import mock

import openml
import pytest

import openml
from openml.cli import main


def test_cli_version_prints_package_version():
# Invoke the CLI via module to avoid relying on console script installation
result = subprocess.run(
result = subprocess.run( # noqa: S603
[sys.executable, "-m", "openml.cli", "--version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
capture_output=True,
text=True,
check=False,
)
Expand All @@ -31,14 +33,80 @@ def test_console_script_version_prints_package_version():
if console is None:
pytest.skip("'openml' console script not found in PATH")

result = subprocess.run(
result = subprocess.run( # noqa: S603
[console, "--version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
capture_output=True,
text=True,
check=False,
)

assert result.returncode == 0
assert result.stderr == ""
assert openml.__version__ in result.stdout


def test_upload_dataset_arg_parsing():
# Test that the dataset subcommand correctly parses required and optional arguments
test_args = [
"upload", "dataset", "data.csv",
"--name", "MyDataset",
"--description", "A test dataset",
"--default_target_attribute", "target",
"--creator", "TestUser",
]
with (
mock.patch("sys.argv", ["openml", *test_args]),
mock.patch("openml.cli.upload") as mock_upload,
):
main()
args = mock_upload.call_args[0][0]
Comment on lines +48 to +62
assert args.subroutine == "upload"
Comment thread
Manas-7854 marked this conversation as resolved.
assert args.upload_resource == "dataset"
assert args.file_path == "data.csv"
assert args.name == "MyDataset"
assert args.description == "A test dataset"
assert args.default_target_attribute == "target"
assert args.creator == "TestUser"
assert args.contributor is None
assert args.licence is None


def test_upload_flow_arg_parsing():
# Test that the flow subcommand correctly parses positional and optional arguments
test_args = ["upload", "flow", "model.pkl", "--name", "MyFlow", "--description", "A flow"]
with (
mock.patch("sys.argv", ["openml", *test_args]),
mock.patch("openml.cli.upload") as mock_upload,
):
main()
args = mock_upload.call_args[0][0]
assert args.upload_resource == "flow"
assert args.file_path == "model.pkl"
assert args.name == "MyFlow"
assert args.description == "A flow"


def test_upload_run_arg_parsing():
# Test that the run subcommand correctly parses positional and flag arguments
test_args = ["upload", "run", "/path/to/run_dir", "--no_model"]
with (
mock.patch("sys.argv", ["openml", *test_args]),
mock.patch("openml.cli.upload") as mock_upload,
):
main()
args = mock_upload.call_args[0][0]
assert args.upload_resource == "run"
assert args.file_path == "/path/to/run_dir"
assert args.no_model is True


def test_upload_run_no_model_defaults_false():
# Test that the --no_model flag defaults to False if not provided
test_args = ["upload", "run", "/path/to/run_dir"]
with (
mock.patch("sys.argv", ["openml", *test_args]),
mock.patch("openml.cli.upload") as mock_upload,
):
main()
args = mock_upload.call_args[0][0]
assert args.no_model is False
Loading