From 4c48c3e0051f8625d575a32ed86cb3651e5dc84a Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 7 Nov 2025 17:34:37 +0000 Subject: [PATCH 01/33] Update copyright year --- CMakeLists.txt | 2 +- cmake/CompilerWarnings.cmake | 2 +- cmake/ProjectSettings.cmake | 2 +- cmake/Sanitisers.cmake | 2 +- cmake/StaticAnalysers.cmake | 2 +- setup.py | 2 +- src/CMakeLists.txt | 2 +- src/__init__.py | 2 +- src/decoders.py | 2 +- src/hashing/FileUtils.cpp | 2 +- src/hashing/FileUtils.hpp | 2 +- src/hashing/HapData.cpp | 2 +- src/hashing/HapData.hpp | 2 +- src/hashing/pybind.cpp | 2 +- src/hashing/utils.hpp | 2 +- src/inference.py | 2 +- src/scripts/__init__.py | 2 +- src/scripts/infer_args.py | 2 +- src/scripts/infer_args_advanced.py | 2 +- src/scripts/prepare_example.py | 2 +- src/simulator.py | 2 +- src/utils.py | 2 +- test/test_import.py | 2 +- 23 files changed, 23 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b42203c..123832b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/cmake/CompilerWarnings.cmake b/cmake/CompilerWarnings.cmake index 53f2bce..16c37c7 100644 --- a/cmake/CompilerWarnings.cmake +++ b/cmake/CompilerWarnings.cmake @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/cmake/ProjectSettings.cmake b/cmake/ProjectSettings.cmake index b73d53e..ad5289b 100644 --- a/cmake/ProjectSettings.cmake +++ b/cmake/ProjectSettings.cmake @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/cmake/Sanitisers.cmake b/cmake/Sanitisers.cmake index 8334ae8..8fbf606 100644 --- a/cmake/Sanitisers.cmake +++ b/cmake/Sanitisers.cmake @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/cmake/StaticAnalysers.cmake b/cmake/StaticAnalysers.cmake index f8bb49f..260e6f0 100644 --- a/cmake/StaticAnalysers.cmake +++ b/cmake/StaticAnalysers.cmake @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/setup.py b/setup.py index 1ba9ccb..b74c1f0 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5dc1c6c..e3e3a88 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/__init__.py b/src/__init__.py index d857496..258d285 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/decoders.py b/src/decoders.py index 7c13680..f80a758 100644 --- a/src/decoders.py +++ b/src/decoders.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/hashing/FileUtils.cpp b/src/hashing/FileUtils.cpp index 0699994..f521a49 100644 --- a/src/hashing/FileUtils.cpp +++ b/src/hashing/FileUtils.cpp @@ -1,7 +1,7 @@ /* This file is part of the ARG-Needle genealogical inference and analysis software suite. - Copyright (C) 2023 ARG-Needle Developers. + Copyright (C) 2023-2025 ARG-Needle Developers. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/hashing/FileUtils.hpp b/src/hashing/FileUtils.hpp index 3f1d262..cd2f070 100644 --- a/src/hashing/FileUtils.hpp +++ b/src/hashing/FileUtils.hpp @@ -1,7 +1,7 @@ /* This file is part of the ARG-Needle genealogical inference and analysis software suite. - Copyright (C) 2023 ARG-Needle Developers. + Copyright (C) 2023-2025 ARG-Needle Developers. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/hashing/HapData.cpp b/src/hashing/HapData.cpp index 4be4e90..b368605 100644 --- a/src/hashing/HapData.cpp +++ b/src/hashing/HapData.cpp @@ -1,7 +1,7 @@ /* This file is part of the ARG-Needle genealogical inference and analysis software suite. - Copyright (C) 2023 ARG-Needle Developers. + Copyright (C) 2023-2025 ARG-Needle Developers. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/hashing/HapData.hpp b/src/hashing/HapData.hpp index 17d47f4..c5c4741 100644 --- a/src/hashing/HapData.hpp +++ b/src/hashing/HapData.hpp @@ -1,7 +1,7 @@ /* This file is part of the ARG-Needle genealogical inference and analysis software suite. - Copyright (C) 2023 ARG-Needle Developers. + Copyright (C) 2023-2025 ARG-Needle Developers. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/hashing/pybind.cpp b/src/hashing/pybind.cpp index 7e77bde..9dbe58e 100644 --- a/src/hashing/pybind.cpp +++ b/src/hashing/pybind.cpp @@ -1,7 +1,7 @@ /* This file is part of the ARG-Needle genealogical inference and analysis software suite. - Copyright (C) 2023 ARG-Needle Developers. + Copyright (C) 2023-2025 ARG-Needle Developers. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/hashing/utils.hpp b/src/hashing/utils.hpp index 0fb16d7..a47df25 100644 --- a/src/hashing/utils.hpp +++ b/src/hashing/utils.hpp @@ -1,7 +1,7 @@ /* This file is part of the ARG-Needle genealogical inference and analysis software suite. - Copyright (C) 2023 ARG-Needle Developers. + Copyright (C) 2023-2025 ARG-Needle Developers. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/inference.py b/src/inference.py index ae2f7b5..abb56fe 100644 --- a/src/inference.py +++ b/src/inference.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/scripts/__init__.py b/src/scripts/__init__.py index 4783f5a..638e42b 100644 --- a/src/scripts/__init__.py +++ b/src/scripts/__init__.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/scripts/infer_args.py b/src/scripts/infer_args.py index 10908b8..6b61f0a 100644 --- a/src/scripts/infer_args.py +++ b/src/scripts/infer_args.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/scripts/infer_args_advanced.py b/src/scripts/infer_args_advanced.py index 6cb035b..5d73f20 100644 --- a/src/scripts/infer_args_advanced.py +++ b/src/scripts/infer_args_advanced.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/scripts/prepare_example.py b/src/scripts/prepare_example.py index bc02370..9a0ec90 100644 --- a/src/scripts/prepare_example.py +++ b/src/scripts/prepare_example.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/simulator.py b/src/simulator.py index 5c5f47f..6ff9a38 100644 --- a/src/simulator.py +++ b/src/simulator.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/src/utils.py b/src/utils.py index 102728c..30bd384 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/test/test_import.py b/test/test_import.py index b73d831..0e07581 100644 --- a/test/test_import.py +++ b/test/test_import.py @@ -1,6 +1,6 @@ # This file is part of the ARG-Needle genealogical inference and # analysis software suite. -# Copyright (C) 2023 ARG-Needle Developers. +# Copyright (C) 2023-2025 ARG-Needle Developers. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by From 313061d468ffea52d5bb82dbb456acea354be234 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 7 Nov 2025 17:37:18 +0000 Subject: [PATCH 02/33] Migrate python to standard package dir structure --- MANIFEST.in | 1 - arg_needle | 1 - setup-pre-commit.sh | 29 ------------------- src/{ => arg_needle}/__init__.py | 0 src/{ => arg_needle}/decoders.py | 0 src/{ => arg_needle}/inference.py | 0 src/{ => arg_needle}/scripts/README.md | 0 src/{ => arg_needle}/scripts/__init__.py | 0 src/{ => arg_needle}/scripts/infer_args.py | 0 .../scripts/infer_args_advanced.py | 0 .../scripts/prepare_example.py | 0 src/{ => arg_needle}/simulator.py | 0 src/{ => arg_needle}/utils.py | 0 13 files changed, 31 deletions(-) delete mode 100644 MANIFEST.in delete mode 120000 arg_needle delete mode 100755 setup-pre-commit.sh rename src/{ => arg_needle}/__init__.py (100%) rename src/{ => arg_needle}/decoders.py (100%) rename src/{ => arg_needle}/inference.py (100%) rename src/{ => arg_needle}/scripts/README.md (100%) rename src/{ => arg_needle}/scripts/__init__.py (100%) rename src/{ => arg_needle}/scripts/infer_args.py (100%) rename src/{ => arg_needle}/scripts/infer_args_advanced.py (100%) rename src/{ => arg_needle}/scripts/prepare_example.py (100%) rename src/{ => arg_needle}/simulator.py (100%) rename src/{ => arg_needle}/utils.py (100%) diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 59dffca..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -recursive-include src/resources * diff --git a/arg_needle b/arg_needle deleted file mode 120000 index e831038..0000000 --- a/arg_needle +++ /dev/null @@ -1 +0,0 @@ -src \ No newline at end of file diff --git a/setup-pre-commit.sh b/setup-pre-commit.sh deleted file mode 100755 index 95333fe..0000000 --- a/setup-pre-commit.sh +++ /dev/null @@ -1,29 +0,0 @@ -# Set up pre-commit hooks, in this case just clang-format checking -# -# Note: this overwrites existing .git/hooks/pre-commit -# -# See .clang-format for configuration file -# Code modified from https://github.com/KDAB/kdabtv/tree/master/Qt-Widgets-and-more/clang-format -# Based on this tutorial: https://www.youtube.com/watch?v=Cz36YveDI2E - -echo "#!/bin/sh - -python .git/hooks/pre-commit-clang-format.py" > .git/hooks/pre-commit - - -echo "import subprocess -try: - output = str(subprocess.check_output([\"git\", \"clang-format\", \"--diff\"])) -except subprocess.CalledProcessError as e: - print(e) - print(\"Error raised, try installing clang-format.\\n\") - exit(1) - -if \"clang-format did not modify any files\" not in output and \"no modified files to format\" not in output: - print(\"Run git clang-format, add the modified files, then commit.\\n\") - exit(1) -else: - exit(0)" > .git/hooks/pre-commit-clang-format.py - - -chmod +x .git/hooks/pre-commit .git/hooks/pre-commit-clang-format.py diff --git a/src/__init__.py b/src/arg_needle/__init__.py similarity index 100% rename from src/__init__.py rename to src/arg_needle/__init__.py diff --git a/src/decoders.py b/src/arg_needle/decoders.py similarity index 100% rename from src/decoders.py rename to src/arg_needle/decoders.py diff --git a/src/inference.py b/src/arg_needle/inference.py similarity index 100% rename from src/inference.py rename to src/arg_needle/inference.py diff --git a/src/scripts/README.md b/src/arg_needle/scripts/README.md similarity index 100% rename from src/scripts/README.md rename to src/arg_needle/scripts/README.md diff --git a/src/scripts/__init__.py b/src/arg_needle/scripts/__init__.py similarity index 100% rename from src/scripts/__init__.py rename to src/arg_needle/scripts/__init__.py diff --git a/src/scripts/infer_args.py b/src/arg_needle/scripts/infer_args.py similarity index 100% rename from src/scripts/infer_args.py rename to src/arg_needle/scripts/infer_args.py diff --git a/src/scripts/infer_args_advanced.py b/src/arg_needle/scripts/infer_args_advanced.py similarity index 100% rename from src/scripts/infer_args_advanced.py rename to src/arg_needle/scripts/infer_args_advanced.py diff --git a/src/scripts/prepare_example.py b/src/arg_needle/scripts/prepare_example.py similarity index 100% rename from src/scripts/prepare_example.py rename to src/arg_needle/scripts/prepare_example.py diff --git a/src/simulator.py b/src/arg_needle/simulator.py similarity index 100% rename from src/simulator.py rename to src/arg_needle/simulator.py diff --git a/src/utils.py b/src/arg_needle/utils.py similarity index 100% rename from src/utils.py rename to src/arg_needle/utils.py From b80588ce4b22f1bb6a4b9f2a455b1f6b9c837c70 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 7 Nov 2025 17:45:36 +0000 Subject: [PATCH 03/33] First draft of modern pyproject.toml --- pyproject.toml | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..47689a1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[build-system] +requires = ["scikit-build-core>=0.11.6", "pybind11"] +build-backend = "scikit_build_core.build" + +[project] +dynamic = ["readme"] +name = "arg-needle" +version = "1.0.3" +description = "Ancestral recombination graph (ARG)" +authors = [ + { name = "ARG-Needle Developers" } +] +requires-python = ">=3.9" + +dependencies = [ + 'arg-needle-lib>=1.0.0', + 'asmc-asmc>=1.3.1', + 'fastcluster', + 'msprime>=1.0.0', + 'numpy>=1.17.0', + 'pandas', + 'psutil', + 'tskit>=0.1.5', +] + +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] + +#[project.optional-dependencies] +#dev = [ +# "pytest", +# "tszip>=0.2.5", +#] +#docs = [ +# "sphinx", +# "sphinx-rtd-theme", +#] + +[project.scripts] +infer_args="arg_needle.scripts.infer_args:main" +infer_args_advanced="arg_needle.scripts.infer_args_advanced:main" +prepare_example="arg_needle.scripts.prepare_example:main" + +[tool.scikit-build] +cmake.args = [ + "-DPYTHON_BINDINGS=ON", + "-DWARNINGS_AS_ERRORS=OFF", + "-DENABLE_TESTING=OFF", +] +build.verbose = true +cmake.build-type = "Release" + +metadata.readme.provider = "scikit_build_core.metadata.fancy_pypi_readme" +[tool.hatch.metadata.hooks.fancy-pypi-readme] +content-type = "text/markdown" + +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +path = "PyPI_README.md" + +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +path = "RELEASE_NOTES.md" From fc5a0d08443a045c568e6c98353227c0bb1f76af Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 4 Dec 2025 17:50:00 +0000 Subject: [PATCH 04/33] Reposition library, and include resources in the wheel --- pyproject.toml | 9 ++++++++- src/CMakeLists.txt | 1 + src/arg_needle/decoders.py | 6 +++--- .../resources/30-100-2000_CEU.decodingQuantities.gz | Bin src/{ => arg_needle}/resources/CEU.demo | 0 .../resources/ukb_chr2_spectrum.tsv | 0 6 files changed, 12 insertions(+), 4 deletions(-) rename src/{ => arg_needle}/resources/30-100-2000_CEU.decodingQuantities.gz (100%) rename src/{ => arg_needle}/resources/CEU.demo (100%) rename src/{ => arg_needle}/resources/ukb_chr2_spectrum.tsv (100%) diff --git a/pyproject.toml b/pyproject.toml index 47689a1..0a671f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["scikit-build-core>=0.11.6", "pybind11"] +requires = ["scikit-build-core>=0.11.6", "pybind11", "setuptools"] build-backend = "scikit_build_core.build" [project] @@ -68,3 +68,10 @@ path = "PyPI_README.md" [[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] path = "RELEASE_NOTES.md" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +"arg_needle" = ["resources/*"] + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e3e3a88..18a68b9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -47,4 +47,5 @@ if (PYTHON_BINDINGS) set_target_properties(arg_needle_hashing PROPERTIES POSITION_INDEPENDENT_CODE TRUE) pybind11_add_module(arg_needle_hashing_pybind hashing/pybind.cpp) target_link_libraries(arg_needle_hashing_pybind PRIVATE arg_needle_hashing) + install(TARGETS arg_needle_hashing_pybind LIBRARY DESTINATION arg_needle) endif () diff --git a/src/arg_needle/decoders.py b/src/arg_needle/decoders.py index f80a758..3c1336e 100644 --- a/src/arg_needle/decoders.py +++ b/src/arg_needle/decoders.py @@ -30,7 +30,7 @@ # our packages from asmc.asmc import DecodingParams, ASMC -import arg_needle_hashing_pybind as arg_needle_hashing +from .arg_needle_hashing_pybind import HapData from .utils import btime logging.basicConfig( @@ -329,7 +329,7 @@ def make_asmc_decoder( if use_hashing: if verbose: logging.info("Making HapData object") - hasher = arg_needle_hashing.HapData( + hasher = HapData( mode, haps_file_root, hash_word_size, mapfile, fill_sites=False) logging.info("Hashing data is {} by {}".format(hasher.num_haps, hasher.num_sites)) @@ -337,7 +337,7 @@ def make_asmc_decoder( if use_hashing and backup_hash_word_size > 0: if verbose: logging.info("Making backup HapData object") - backup_hasher = arg_needle_hashing.HapData( + backup_hasher = HapData( mode, haps_file_root, backup_hash_word_size, map_file_path=mapfile, fill_sites=False) logging.info("Backup hashing data is {} by {}".format(hasher.num_haps, hasher.num_sites)) diff --git a/src/resources/30-100-2000_CEU.decodingQuantities.gz b/src/arg_needle/resources/30-100-2000_CEU.decodingQuantities.gz similarity index 100% rename from src/resources/30-100-2000_CEU.decodingQuantities.gz rename to src/arg_needle/resources/30-100-2000_CEU.decodingQuantities.gz diff --git a/src/resources/CEU.demo b/src/arg_needle/resources/CEU.demo similarity index 100% rename from src/resources/CEU.demo rename to src/arg_needle/resources/CEU.demo diff --git a/src/resources/ukb_chr2_spectrum.tsv b/src/arg_needle/resources/ukb_chr2_spectrum.tsv similarity index 100% rename from src/resources/ukb_chr2_spectrum.tsv rename to src/arg_needle/resources/ukb_chr2_spectrum.tsv From 9c9441554b895447675e789bcf5013e96061a73f Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 4 Dec 2025 17:50:55 +0000 Subject: [PATCH 05/33] Update deps, include pytest explicitly --- pyproject.toml | 37 +++++++++++++++++-------------------- test/test_import.py | 4 +++- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0a671f1..61da1f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,17 +12,6 @@ authors = [ ] requires-python = ">=3.9" -dependencies = [ - 'arg-needle-lib>=1.0.0', - 'asmc-asmc>=1.3.1', - 'fastcluster', - 'msprime>=1.0.0', - 'numpy>=1.17.0', - 'pandas', - 'psutil', - 'tskit>=0.1.5', -] - classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", @@ -35,15 +24,21 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] -#[project.optional-dependencies] -#dev = [ -# "pytest", -# "tszip>=0.2.5", -#] -#docs = [ -# "sphinx", -# "sphinx-rtd-theme", -#] +dependencies = [ + 'arg-needle-lib>=1.2.0', + 'asmc-asmc>=1.4.0', + 'fastcluster', + 'msprime>=1.3.0', + 'numpy>=1.17.0', + 'pandas', + 'psutil', + 'tskit>=0.5.8', +] + +[project.optional-dependencies] +dev = [ + "pytest", +] [project.scripts] infer_args="arg_needle.scripts.infer_args:main" @@ -75,3 +70,5 @@ where = ["src"] [tool.setuptools.package-data] "arg_needle" = ["resources/*"] +[tool.pytest.ini_options] +testpaths = ["test"] diff --git a/test/test_import.py b/test/test_import.py index 0e07581..5c0bc44 100644 --- a/test/test_import.py +++ b/test/test_import.py @@ -20,4 +20,6 @@ add_default_arg_building_arguments, normalize_arg, trim_arg ) -print("Successfully imported all arg_needle components") +def test_import(): + assert True + print("Successfully imported all arg_needle components") From 5c15d4dbd32761e312ee4efb0628871ff8c30746 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 4 Dec 2025 17:55:16 +0000 Subject: [PATCH 06/33] Update wheel building --- .github/workflows/build-wheels.yml | 85 +++++++++++++++++++----------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index 734cb1d..8b7b8ea 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -10,66 +10,87 @@ on: release: types: - published - -env: - CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* - CIBW_SKIP: cp3*-musllinux_* - CIBW_ARCHS_MACOS: x86_64 - CIBW_ARCHS_LINUX: auto64 - CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 - CIBW_BEFORE_ALL_LINUX: yum -y install boost-devel zlib-devel - CIBW_BEFORE_ALL_MACOS: brew install boost zlib gsl - CIBW_BEFORE_BUILD: pip install --upgrade pip setuptools wheel ninja numpy cython - CIBW_ENVIRONMENT_MACOS: CXX="$(brew --prefix llvm@15)/bin/clang++" - MACOSX_DEPLOYMENT_TARGET: 10.15 + workflow_dispatch: jobs: - build_wheels: + build_wheels_cloud: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macos-12] + include: + - os: ubuntu-24.04 + arch: x86_64 + py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* + py-vers-pr: cp311-* + before-all: dnf -y install boost-devel zlib + extra-env: "" + mdt: "" + - os: ubuntu-24.04-arm + arch: aarch64 + py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* + py-vers-pr: cp312-* + before-all: dnf -y install boost-devel zlib + extra-env: "" + mdt: "" + - os: macos-15-intel + arch: x86_64 + py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* + py-vers-pr: cp313-* + before-all: brew install boost zlib + extra-env: CC=clang CXX=clang++ + mdt: 15 + - os: macos-14 + arch: arm64 + py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* + py-vers-pr: cp314-* + before-all: brew install boost zlib + extra-env: CC=clang CXX=clang++ + mdt: 14 + + env: + CIBW_BUILD: ${{ github.event_name != 'pull_request' && matrix.py-vers-full || matrix.py-vers-pr }} + CIBW_SKIP: cp3*-musllinux_* + CIBW_ARCHS: ${{ matrix.arch }} + CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 + CIBW_BEFORE_ALL: ${{ matrix.before-all }} + CIBW_BEFORE_BUILD: pip install --upgrade pip setuptools wheel ninja numpy cython + CIBW_ENVIRONMENT: ${{ matrix.extra-env }} + MACOSX_DEPLOYMENT_TARGET: ${{ matrix.mdt }} steps: - name: checkout repo & submodules - uses: actions/checkout@v3 + uses: actions/checkout@v5 with: + submodules: true fetch-depth: 0 - - name: Set up Python - uses: actions/setup-python@v3 - - - name: Install cibuildwheel - run: python -m pip install cibuildwheel==2.12.0 - - name: Build wheels - run: python -m cibuildwheel --output-dir wheelhouse + uses: pypa/cibuildwheel@v3.2.0 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: wheels + name: wheels-cloud-${{ matrix.os }} path: ./wheelhouse/*.whl retention-days: 1 upload_all: name: Upload to PyPI - needs: build_wheels + needs: [build_wheels_cloud] runs-on: ubuntu-latest - if: github.event_name == 'release' && github.event.action == 'published' + if: ${{ github.event_name == 'release' && github.event.action == 'published' }} steps: - - uses: actions/setup-python@v3 - - - name: Download wheels - uses: actions/download-artifact@v3 + - name: Download wheels from cloud runners + uses: actions/download-artifact@v5 with: - name: wheels + pattern: wheels-cloud-* + merge-multiple: true path: wheels - - uses: pypa/gh-action-pypi-publish@v1.6.4 + - uses: pypa/gh-action-pypi-publish@v1.13.0 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} From c79a4607fb7d94fff70025b738dea64504885774 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 4 Dec 2025 18:02:46 +0000 Subject: [PATCH 07/33] Upgrade python unit testing workflow --- .github/workflows/pip-install-macos.yml | 48 ---------------------- .github/workflows/pip-install-ubuntu.yml | 49 ---------------------- .github/workflows/python_tests.yml | 52 ++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 97 deletions(-) delete mode 100644 .github/workflows/pip-install-macos.yml delete mode 100644 .github/workflows/pip-install-ubuntu.yml create mode 100644 .github/workflows/python_tests.yml diff --git a/.github/workflows/pip-install-macos.yml b/.github/workflows/pip-install-macos.yml deleted file mode 100644 index 897a57b..0000000 --- a/.github/workflows/pip-install-macos.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: pip install on macOS - -on: - push: - branches: - - main - pull_request: - branches: - - '**' - -jobs: - - pip-install-and-import: - name: pip install on macOS - runs-on: macos-11 - - steps: - - name: checkout arg_needle - uses: actions/checkout@v3 - - - uses: actions/setup-python@v3 - with: - python-version: 3.x - - - name: install system packages - run: | - brew install boost zlib gsl - - - name: install python pip dependencies - run: | - python --version - python -m pip install --upgrade pip setuptools wheel - python -m pip install cmake ninja - - - name: install Palamara python dependencies - run: | - python --version - python -m pip install asmc-asmc arg-needle-lib - - - name: install arg_needle - run: | - python --version - python -m pip install . - - - name: test import works as expected - run: | - python --version - python test/test_import.py diff --git a/.github/workflows/pip-install-ubuntu.yml b/.github/workflows/pip-install-ubuntu.yml deleted file mode 100644 index 1e2f27a..0000000 --- a/.github/workflows/pip-install-ubuntu.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: pip install on Ubuntu - -on: - push: - branches: - - main - pull_request: - branches: - - '**' - -jobs: - - pip-install-and-import: - name: pip install on Ubuntu - runs-on: ubuntu-22.04 - - steps: - - name: checkout arg_needle - uses: actions/checkout@v3 - - - uses: actions/setup-python@v3 - with: - python-version: 3.x - - - name: install system packages - run: | - sudo apt -y update - sudo apt -y install libboost-iostreams-dev zlib1g-dev - - - name: install python pip dependencies - run: | - python --version - python -m pip install --upgrade pip setuptools wheel - python -m pip install cmake ninja - - - name: install Palamara python dependencies - run: | - python --version - python -m pip install asmc-asmc arg-needle-lib - - - name: install arg_needle - run: | - python --version - python -m pip install . - - - name: test import works as expected - run: | - python --version - python test/test_import.py diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml new file mode 100644 index 0000000..19eef17 --- /dev/null +++ b/.github/workflows/python_tests.yml @@ -0,0 +1,52 @@ +name: Python unit tests + +on: + push: + branches: + - main + pull_request: + branches: + - '**' + workflow_dispatch: + +jobs: + + build-and-test: + name: Python unit tests + runs-on: ${{ matrix.os }} + timeout-minutes: 10 + + strategy: + matrix: + include: + - os: ubuntu-24.04 + py: "3.14" + sys_install: "sudo apt -y update && sudo apt -y install libboost-iostreams-dev" + - os: macos-15 + py: "3.10" + sys_install: "brew install boost" + + steps: + - name: checkout repo & submodules + uses: actions/checkout@v5 + with: + submodules: true + fetch-depth: 0 + + - name: install deps + run: | + ${{ matrix.sys_install }} + + - name: Set up Python ${{ matrix.py }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.py }} + + - name: install python bindings + run: | + python -m pip install --upgrade pip setuptools wheel + python -m pip install . + + - name: python unit tests + run: | + python -m pytest From 2a792f6df4903db7d172378e6ccf569b59d52af4 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 4 Dec 2025 18:30:22 +0000 Subject: [PATCH 08/33] Upgrade building the python bindings --- .github/workflows/python_tests.yml | 1 + CMakeLists.txt | 40 ++++++++++++++++++++---------- pyproject.toml | 21 ++++++++++------ src/CMakeLists.txt | 3 +-- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 19eef17..6ff8573 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -17,6 +17,7 @@ jobs: timeout-minutes: 10 strategy: + fail-fast: false matrix: include: - os: ubuntu-24.04 diff --git a/CMakeLists.txt b/CMakeLists.txt index 123832b..a8ab4b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,11 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +option(ARG_NEEDLE_PYTHON_BINDINGS "Whether to build the python bindings" OFF) +if (ARG_NEEDLE_PYTHON_BINDINGS) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +endif () + # Project settings including default build type include(cmake/ProjectSettings.cmake) @@ -44,21 +49,30 @@ option(BUILD_SHARED_LIBS "Enable compilation of shared libraries" OFF) set(arg_needle_testdata_dir ${CMAKE_CURRENT_SOURCE_DIR}/testdata) add_definitions(-DARG_NEEDLE_TESTDATA_DIR=\"${arg_needle_testdata_dir}\") -option(PYTHON_BINDINGS "Whether to build the python bindings" ON) -if (PYTHON_BINDINGS) - include(FetchContent) - FetchContent_Declare( - pybind11 - GIT_REPOSITORY https://github.com/pybind/pybind11 - GIT_TAG 0bd8896a4010f2d91b2340570c24fa08606ec406 # Version 2.10.3 - ) - FetchContent_GetProperties(pybind11) - if (NOT pybind11_POPULATED) - FetchContent_Populate(pybind11) - add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) - endif () + + + + +# Python bindings +if (ARG_NEEDLE_PYTHON_BINDINGS) + option(ARG_NEEDLE_BUILDING_FROM_PYPROJECT "Are we building from pyproject.toml (pip install)?" OFF) + + if(ARG_NEEDLE_BUILDING_FROM_PYPROJECT) + message(STATUS "Using pybind11 from pyproject.toml build environment") + find_package(pybind11 REQUIRED) + else() + message(STATUS "Using FetchContent to get pybind11") + include(FetchContent) + FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG f5fbe867d2d26e4a0a9177a51f6e568868ad3dc8 # Version 3.0.1 + ) + FetchContent_MakeAvailable(pybind11) + endif() endif () + add_subdirectory(src) #add_subdirectory(example) diff --git a/pyproject.toml b/pyproject.toml index 61da1f0..ab0915c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,9 @@ [build-system] -requires = ["scikit-build-core>=0.11.6", "pybind11", "setuptools"] +requires = [ + "scikit-build-core>=0.11.6", + "pybind11==3.0.1", + "setuptools" +] build-backend = "scikit_build_core.build" [project] @@ -46,15 +50,18 @@ infer_args_advanced="arg_needle.scripts.infer_args_advanced:main" prepare_example="arg_needle.scripts.prepare_example:main" [tool.scikit-build] -cmake.args = [ - "-DPYTHON_BINDINGS=ON", - "-DWARNINGS_AS_ERRORS=OFF", - "-DENABLE_TESTING=OFF", -] +minimum-version = "build-system.requires" build.verbose = true cmake.build-type = "Release" - +build.targets = ["arg_needle_hashing_pybind"] +wheel.packages = ["src/arg_needle"] metadata.readme.provider = "scikit_build_core.metadata.fancy_pypi_readme" + +[tool.scikit-build.cmake.define] +ARG_NEEDLE_TESTING = "OFF" +ARG_NEEDLE_PYTHON_BINDINGS = "ON" +ARG_NEEDLE_BUILDING_FROM_PYPROJECT = "ON" + [tool.hatch.metadata.hooks.fancy-pypi-readme] content-type = "text/markdown" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 18a68b9..7482dad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,8 +43,7 @@ set_target_properties(arg_needle_hashing PROPERTIES PUBLIC_HEADER "${arg_needle_ target_link_libraries(arg_needle_hashing PRIVATE Boost::headers Boost::iostreams) target_link_libraries(arg_needle_hashing PRIVATE project_warnings) -if (PYTHON_BINDINGS) - set_target_properties(arg_needle_hashing PROPERTIES POSITION_INDEPENDENT_CODE TRUE) +if (ARG_NEEDLE_PYTHON_BINDINGS) pybind11_add_module(arg_needle_hashing_pybind hashing/pybind.cpp) target_link_libraries(arg_needle_hashing_pybind PRIVATE arg_needle_hashing) install(TARGETS arg_needle_hashing_pybind LIBRARY DESTINATION arg_needle) From d1d2dbfb177173190976d274b7b4ca05d542bd52 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:03:05 +0000 Subject: [PATCH 09/33] Remove old setup.py --- setup.py | 165 ------------------------------------------------------- 1 file changed, 165 deletions(-) delete mode 100644 setup.py diff --git a/setup.py b/setup.py deleted file mode 100644 index b74c1f0..0000000 --- a/setup.py +++ /dev/null @@ -1,165 +0,0 @@ -# This file is part of the ARG-Needle genealogical inference and -# analysis software suite. -# Copyright (C) 2023-2025 ARG-Needle Developers. - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# Based on https://github.com/pybind/cmake_example - -import os -import sys -import subprocess - -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext - -# Convert distutils Windows platform specifiers to CMake -A arguments -PLAT_TO_CMAKE = { - "win32": "Win32", - "win-amd64": "x64", - "win-arm32": "ARM", - "win-arm64": "ARM64", -} - - -# A CMakeExtension needs a sourcedir instead of a file list. -# The name must be the _single_ output extension from the CMake build. -# If you need multiple extensions, see scikit-build. -class CMakeExtension(Extension): - def __init__(self, name, sourcedir=""): - Extension.__init__(self, name, sources=[]) - self.sourcedir = os.path.abspath(sourcedir) - - -class CMakeBuild(build_ext): - - def build_extension(self, ext): - extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - - # required for auto-detection of auxiliary "native" libs - if not extdir.endswith(os.path.sep): - extdir += os.path.sep - - cfg = "Debug" if self.debug else "Release" - - # CMake lets you override the generator - we need to check this. - # Can be set with Conda-Build, for example. - cmake_generator = os.environ.get("CMAKE_GENERATOR", "") - - # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON - # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code - # from Python. - cmake_args = [ - f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}", - f"-DPYTHON_EXECUTABLE={sys.executable}", - f"-DCMAKE_BUILD_TYPE={cfg}", - f"-DWARNINGS_AS_ERRORS=OFF", - f"-DENABLE_TESTING=OFF", - f"-DMAKE_DOCS=OFF", - ] - build_args = [] - - if self.compiler.compiler_type != "msvc": - # Using Ninja-build since it a) is available as a wheel and b) - # multithreads automatically. MSVC would require all variables be - # exported for Ninja to pick it up, which is a little tricky to do. - # Users can override the generator with CMAKE_GENERATOR in CMake - # 3.15+. - if not cmake_generator: - cmake_args += ["-GNinja"] - - else: - - # Single config generators are handled "normally" - single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) - - # CMake allows an arch-in-generator style for backward compatibility - contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"}) - - # Specify the arch if using MSVC generator, but only if it doesn't - # contain a backward-compatibility arch spec already in the - # generator name. - if not single_config and not contains_arch: - cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]] - - # Multi-config generators have a different way to specify configs - if not single_config: - cmake_args += [ - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir) - ] - build_args += ["--config", cfg] - - # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level - # across all generators. - if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: - # self.parallel is a Python 3 only way to set parallel jobs by hand - # using -j in the build_ext call, not supported by pip or PyPA-build. - if hasattr(self, "parallel") and self.parallel: - # CMake 3.12+ only. - build_args += ["-j{}".format(self.parallel)] - - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - - subprocess.check_call( - ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp - ) - subprocess.check_call( - ["cmake", "--build", "."] + build_args, cwd=self.build_temp - ) - - -with open('PyPI_README.md', encoding='utf-8') as f: - long_description = f.read() - -with open('RELEASE_NOTES.md', encoding='utf-8') as f: - release_notes = f.read() - -setup( - name='arg-needle', - version='1.0.3', - author='PalamaraLab (https://palamaralab.github.io/)', - url='https://github.com/PalamaraLab/arg_needle/', - install_requires=[ - 'arg-needle-lib>=1.0.0', - 'asmc-asmc>=1.3.1', - 'fastcluster', - 'msprime>=1.0.0', - 'numpy>=1.17.0', - 'pandas', - 'psutil', - 'tskit>=0.1.5', - ], - extras_require={ - 'dev': [ - 'pytest', - ], - }, - description='Ancestral recombination graph (ARG)', - packages=['arg_needle', 'arg_needle.scripts'], - long_description='\n'.join([long_description, release_notes]), - long_description_content_type='text/markdown', - ext_modules=[CMakeExtension('arg_needle')], - cmdclass=dict(build_ext=CMakeBuild), - entry_points = { - 'console_scripts': [ - 'arg_needle=arg_needle.scripts.infer_args:main', - 'arg_needle_multistep=arg_needle.scripts.infer_args_advanced:main', - 'arg_needle_prepare_example=arg_needle.scripts.prepare_example:main', - ], - }, - include_package_data=True, - package_data={'': ['resources/*']}, - zip_safe=False, -) From 89f70569fec6bfde38d72a3760b22b0334451d29 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:11:20 +0000 Subject: [PATCH 10/33] Bump tskit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ab0915c..748718c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ 'numpy>=1.17.0', 'pandas', 'psutil', - 'tskit>=0.5.8', + 'tskit>=1.0.0', ] [project.optional-dependencies] From 3655b82af37f1f36606b4d37a412ed84bed1825d Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:11:29 +0000 Subject: [PATCH 11/33] Build all wheels on PR --- .github/workflows/build-wheels.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index 8b7b8ea..39c0019 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -23,28 +23,28 @@ jobs: - os: ubuntu-24.04 arch: x86_64 py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* - py-vers-pr: cp311-* + py-vers-pr: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* before-all: dnf -y install boost-devel zlib extra-env: "" mdt: "" - os: ubuntu-24.04-arm arch: aarch64 py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* - py-vers-pr: cp312-* + py-vers-pr: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* before-all: dnf -y install boost-devel zlib extra-env: "" mdt: "" - os: macos-15-intel arch: x86_64 py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* - py-vers-pr: cp313-* + py-vers-pr: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* before-all: brew install boost zlib extra-env: CC=clang CXX=clang++ mdt: 15 - os: macos-14 arch: arm64 py-vers-full: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* - py-vers-pr: cp314-* + py-vers-pr: cp39-* cp310-* cp311-* cp312-* cp313-* cp314-* before-all: brew install boost zlib extra-env: CC=clang CXX=clang++ mdt: 14 From ec912b4cef6adb71d078ee68fff68aa112277fc6 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:11:49 +0000 Subject: [PATCH 12/33] Run tests with python < 3.14 because no wheels yet for msprime or tskit --- .github/workflows/python_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 6ff8573..96f437e 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -21,7 +21,7 @@ jobs: matrix: include: - os: ubuntu-24.04 - py: "3.14" + py: "3.13" sys_install: "sudo apt -y update && sudo apt -y install libboost-iostreams-dev" - os: macos-15 py: "3.10" From 1c06483bf0fb10bc2936b824ca8e8b58e22192da Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:15:33 +0000 Subject: [PATCH 13/33] Install dev deps so pytest is available --- .github/workflows/python_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 96f437e..c3d8916 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -46,7 +46,7 @@ jobs: - name: install python bindings run: | python -m pip install --upgrade pip setuptools wheel - python -m pip install . + python -m pip install .[dev] - name: python unit tests run: | From bca44f53e186c6f38f7ed8e3efb8d57191e5c02e Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:30:12 +0000 Subject: [PATCH 14/33] Run selection of example workflows on included data --- .github/workflows/python_examples.yml | 77 +++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/python_examples.yml diff --git a/.github/workflows/python_examples.yml b/.github/workflows/python_examples.yml new file mode 100644 index 0000000..12c0b42 --- /dev/null +++ b/.github/workflows/python_examples.yml @@ -0,0 +1,77 @@ +name: Python examples + +on: + push: + branches: + - main + pull_request: + branches: + - '**' + workflow_dispatch: + +jobs: + + build-and-test: + name: Python examples + runs-on: ${{ matrix.os }} + timeout-minutes: 10 + + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-24.04 + py: "3.10" + sys_install: "sudo apt -y update && sudo apt -y install libboost-iostreams-dev" + - os: macos-15 + py: "3.13" + sys_install: "brew install boost" + + steps: + - name: checkout repo & submodules + uses: actions/checkout@v5 + with: + submodules: true + fetch-depth: 0 + + - name: install deps + run: | + ${{ matrix.sys_install }} + + - name: Set up Python ${{ matrix.py }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.py }} + + - name: install python bindings + run: | + python -m pip install --upgrade pip setuptools wheel + python -m pip install . + + - name: prepare example + run: | + prepare_example + + - name: regular ARG-Needle + run: | + infer_args + + - name: ARG-Needle with ASMC-clust + run: | + infer_args --asmc_clust 1 + + - name: ARG-Needle without ARG normalization + run: | + infer_args --normalize 0 + + - name: ARG-Needle (sequence mode) + run: | + prepare_example --mode sequence --length 5e5 + infer_args --mode sequence + + - name: ARG-Needle advanced mode (multistep) + run: | + prepare_example + infer_args_advanced --step 1 --num_snp_samples 200 + infer_args_advanced --step 2 + infer_args_advanced --step 3 --trim_num_snps 0,50 From 407aec278f3209741713593b34f53ad4324c4e0e Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:43:43 +0000 Subject: [PATCH 15/33] Update pypi readme and release notes --- PyPI_README.md | 10 ++++++++-- RELEASE_NOTES.md | 5 +++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/PyPI_README.md b/PyPI_README.md index a4266ef..a686b6b 100644 --- a/PyPI_README.md +++ b/PyPI_README.md @@ -1,6 +1,14 @@ # arg-needle This repository contains arg-needle, which implements the ARG inference algorithms ARG-Needle and ASMC-clust. +Prebuilt CPython wheels are available for Linux (compatible with glibc ≥ 2.28) and macOS (built on macOS 15 for x86_64 and macOS 14 for arm64). + +| Platform \ CPython | ≤3.8 | 3.9 | 3.10 | 3.11 | 3.12 | 3.13 | 3.14 | +|-----------------------------|------|-----|------|------|------|------|------| +| Linux x86_64 | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Linux aarch64 | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| macOS Intel (x86_64) | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| macOS Apple Silicon (arm64) | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ## Quickstart @@ -12,8 +20,6 @@ The Python module can be installed with: pip install arg-needle ``` -This Python module is currently available on Linux and macOS. - ### Documentation Please see the [ARG-Needle manual](https://palamaralab.github.io/software/argneedle/) for all usage instructions and documentation. diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 3d4f9a5..f2dad19 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,10 @@ # arg-needle Release Notes +## v1.1.0 (2025-12-04) + +- Public source code release: https://github.com/PalamaraLab/arg-needle +- Python wheels are now available for Linux and macOS on both x86_64 and arm64/AArch64 architectures, for CPython versions 3.9 to 3.14 inclusive. + ## v1.0.3 (2023-08-30) - ASMC decoders can now take a genetic map file with a specified non-default location. From dc7811f43f1783e72980afb84e98ff44c15da192 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Thu, 4 Dec 2025 22:45:39 +0000 Subject: [PATCH 16/33] Bump version number in preparation for release --- CMakeLists.txt | 2 +- README.md | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8ab4b2..ce80a6f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ cmake_minimum_required(VERSION 3.16) message(STATUS "Using CMake version ${CMAKE_VERSION}") -project(arg_needle LANGUAGES CXX VERSION 1.0.3) +project(arg_needle LANGUAGES CXX VERSION 1.1.0) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/README.md b/README.md index 8225c8a..46434d8 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Please see the [ARG-Needle manual](https://palamaralab.github.io/software/argnee ## For developers: making a release -- Bump the version number in [setup.py](setup.py) and [CMakeLists.txt](CMakeLists.txt) +- Bump the version number in [pyproject.toml](pyproject.toml) and [CMakeLists.txt](CMakeLists.txt) - Update [RELEASE_NOTES.md](RELEASE_NOTES.md) - Push changes and check that all [GitHub workflows](https://github.com/PalamaraLab/arg_needle/actions) pass - Tag the commit in Git using syntax `vX.Y.Z` diff --git a/pyproject.toml b/pyproject.toml index 748718c..1e7adbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "scikit_build_core.build" [project] dynamic = ["readme"] name = "arg-needle" -version = "1.0.3" +version = "1.1.0" description = "Ancestral recombination graph (ARG)" authors = [ { name = "ARG-Needle Developers" } From 02d1706a4e25db13ede2aab84c37c1551192e322 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 5 Dec 2025 12:19:59 +0000 Subject: [PATCH 17/33] Add C++ testing infrastructure --- .github/workflows/cpp_tests.yml | 62 +++++++++++++++++++++++++++++++++ CMakeLists.txt | 21 +++++++---- src/hashing/FileUtils.cpp | 1 + test/cpp/CMakeLists.txt | 32 +++++++++++++++++ test/cpp/test_file_utils.cpp | 59 +++++++++++++++++++++++++++++++ 5 files changed, 169 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/cpp_tests.yml create mode 100644 test/cpp/CMakeLists.txt create mode 100644 test/cpp/test_file_utils.cpp diff --git a/.github/workflows/cpp_tests.yml b/.github/workflows/cpp_tests.yml new file mode 100644 index 0000000..530f6f4 --- /dev/null +++ b/.github/workflows/cpp_tests.yml @@ -0,0 +1,62 @@ +name: C++ unit tests + +on: + push: + branches: + - main + pull_request: + branches: + - '**' + workflow_dispatch: + +jobs: + + build-and-test: + name: C++ unit tests + runs-on: ${{ matrix.os }} + timeout-minutes: 10 + + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-24.04 + cxx: "clang++-18" + sys_install: "sudo apt -y update && sudo apt -y install libboost-iostreams-dev" + - os: ubuntu-24.04-arm + cxx: "g++-12" + sys_install: "sudo apt -y update && sudo apt -y install libboost-iostreams-dev" + - os: macos-15 + cxx: "" + sys_install: "brew install boost" + - os: macos-15 + cxx: "$(brew --prefix llvm@18)/bin/clang" + sys_install: "brew install boost" + + steps: + - name: checkout repo & submodules + uses: actions/checkout@v5 + with: + submodules: true + fetch-depth: 0 + + - name: install deps + run: | + ${{ matrix.sys_install }} + + - name: make build dir + run: | + mkdir build + + - name: cmake configure + run: | + cmake .. + working-directory: build + + - name: cmake build + run: | + cmake --build . --target cpp_tests --parallel 4 + + - name: ctest + run: | + ctest diff --git a/CMakeLists.txt b/CMakeLists.txt index a8ab4b2..ee4bedb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,10 +74,19 @@ endif () add_subdirectory(src) -#add_subdirectory(example) -#option(ENABLE_TESTING "Enable Test Builds" ON) -#if(ENABLE_TESTING) -# enable_testing() -# add_subdirectory(test) -#endif() +option(ARG_NEEDLE_TESTING "Enable ARG Needle unit testing" ON) +if(ARG_NEEDLE_TESTING) + Include(FetchContent) + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG b3fb4b9feafcd8d91c5cb510a4775143fdbef02f # Version 3.11.0 + ) + + FetchContent_MakeAvailable(Catch2) + list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) + include(CTest) + include(Catch) + add_subdirectory(test/cpp) +endif() diff --git a/src/hashing/FileUtils.cpp b/src/hashing/FileUtils.cpp index f521a49..0e8a3f0 100644 --- a/src/hashing/FileUtils.cpp +++ b/src/hashing/FileUtils.cpp @@ -56,6 +56,7 @@ int AutoGzIfstream::lineCount(const std::string& file) { string line; while (getline(fin, line)) ctr++; + fin.close(); return ctr; } diff --git a/test/cpp/CMakeLists.txt b/test/cpp/CMakeLists.txt new file mode 100644 index 0000000..de59923 --- /dev/null +++ b/test/cpp/CMakeLists.txt @@ -0,0 +1,32 @@ +# This file is part of the ARG-Needle genealogical inference and +# analysis software suite. +# Copyright (C) 2023-2025 ARG-Needle Developers. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +set( + test_files + test_file_utils.cpp +) + +add_executable(cpp_tests ${test_files}) +target_link_libraries(cpp_tests PRIVATE arg_needle_hashing Catch2::Catch2WithMain) + +catch_discover_tests(cpp_tests) + +set(ARG_NEEDLE_TEST_DIR="${CMAKE_CURRENT_SOURCE_DIR}") +set(ARG_NEEDLE_RESOURCES_DIR "${CMAKE_SOURCE_DIR}/src/arg_needle/resources") + +add_definitions(-DARG_NEEDLE_TEST_DIR=\"${ARG_NEEDLE_TEST_DIR}\") +add_definitions(-DARG_NEEDLE_RESOURCES_DIR=\"${ARG_NEEDLE_RESOURCES_DIR}\") diff --git a/test/cpp/test_file_utils.cpp b/test/cpp/test_file_utils.cpp new file mode 100644 index 0000000..0a846e9 --- /dev/null +++ b/test/cpp/test_file_utils.cpp @@ -0,0 +1,59 @@ +/* + This file is part of the ARG-Needle genealogical inference and + analysis software suite. + Copyright (C) 2023-2025 ARG-Needle Developers. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "FileUtils.hpp" + + +TEST_CASE( "FileUtils::fileExists", "[test_file_utils]" ) { + REQUIRE(FileUtils::fileExists(ARG_NEEDLE_TEST_DIR "/CMakeLists.txt") == true); + REQUIRE(FileUtils::fileExists(ARG_NEEDLE_TEST_DIR "/file_that_does_not_exist") == false); +} + +TEST_CASE( "FileUtils::AutoGzIfstream", "[test_file_utils]") +{ + SECTION("open and close gz file") + { + REQUIRE(FileUtils::fileExists(ARG_NEEDLE_RESOURCES_DIR "/30-100-2000_CEU.decodingQuantities.gz") == true); + FileUtils::AutoGzIfstream gz_file; + gz_file.openOrExit(ARG_NEEDLE_RESOURCES_DIR "/30-100-2000_CEU.decodingQuantities.gz"); + gz_file.close(); + } + + SECTION("count lines in file") + { + REQUIRE(FileUtils::fileExists(ARG_NEEDLE_RESOURCES_DIR "/30-100-2000_CEU.decodingQuantities.gz") == true); + REQUIRE(FileUtils::AutoGzIfstream::lineCount(ARG_NEEDLE_RESOURCES_DIR "/30-100-2000_CEU.decodingQuantities.gz") == 35245); + } + + SECTION("extract line from a file") + { + REQUIRE(FileUtils::fileExists(ARG_NEEDLE_RESOURCES_DIR "/30-100-2000_CEU.decodingQuantities.gz") == true); + + FileUtils::AutoGzIfstream gz_file; + gz_file.openOrExit(ARG_NEEDLE_RESOURCES_DIR "/30-100-2000_CEU.decodingQuantities.gz"); + + std::string first_line; + FileUtils::getline(gz_file, first_line); + gz_file.close(); + + REQUIRE(first_line == "TransitionType"); + } +} \ No newline at end of file From cf85e307ac93583372d5b17383202d66ffea19de Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 5 Dec 2025 12:21:16 +0000 Subject: [PATCH 18/33] C++ test workflow: correct working directory --- .github/workflows/cpp_tests.yml | 2 ++ CMakeLists.txt | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cpp_tests.yml b/.github/workflows/cpp_tests.yml index 530f6f4..35b58c2 100644 --- a/.github/workflows/cpp_tests.yml +++ b/.github/workflows/cpp_tests.yml @@ -56,7 +56,9 @@ jobs: - name: cmake build run: | cmake --build . --target cpp_tests --parallel 4 + working-directory: build - name: ctest run: | ctest + working-directory: build diff --git a/CMakeLists.txt b/CMakeLists.txt index ee4bedb..99af2d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,9 +50,6 @@ set(arg_needle_testdata_dir ${CMAKE_CURRENT_SOURCE_DIR}/testdata) add_definitions(-DARG_NEEDLE_TESTDATA_DIR=\"${arg_needle_testdata_dir}\") - - - # Python bindings if (ARG_NEEDLE_PYTHON_BINDINGS) option(ARG_NEEDLE_BUILDING_FROM_PYPROJECT "Are we building from pyproject.toml (pip install)?" OFF) From 210e3239eba4d503430927aba9aac67d83901080 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 5 Dec 2025 12:27:15 +0000 Subject: [PATCH 19/33] Fix defintiion --- .github/workflows/cpp_tests.yml | 2 +- test/cpp/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpp_tests.yml b/.github/workflows/cpp_tests.yml index 35b58c2..0e03a4b 100644 --- a/.github/workflows/cpp_tests.yml +++ b/.github/workflows/cpp_tests.yml @@ -60,5 +60,5 @@ jobs: - name: ctest run: | - ctest + ctest --output-on-failure working-directory: build diff --git a/test/cpp/CMakeLists.txt b/test/cpp/CMakeLists.txt index de59923..409bcd5 100644 --- a/test/cpp/CMakeLists.txt +++ b/test/cpp/CMakeLists.txt @@ -25,7 +25,7 @@ target_link_libraries(cpp_tests PRIVATE arg_needle_hashing Catch2::Catch2WithMai catch_discover_tests(cpp_tests) -set(ARG_NEEDLE_TEST_DIR="${CMAKE_CURRENT_SOURCE_DIR}") +set(ARG_NEEDLE_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(ARG_NEEDLE_RESOURCES_DIR "${CMAKE_SOURCE_DIR}/src/arg_needle/resources") add_definitions(-DARG_NEEDLE_TEST_DIR=\"${ARG_NEEDLE_TEST_DIR}\") From 9ce80facf4d05c1568cf9a60759d83f9c2489fde Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 5 Dec 2025 14:58:08 +0000 Subject: [PATCH 20/33] Hide boost headers in implementation --- src/hashing/FileUtils.cpp | 41 ++++++++++++++++++++++++++------------- src/hashing/FileUtils.hpp | 29 ++++++++++++++------------- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/hashing/FileUtils.cpp b/src/hashing/FileUtils.cpp index 0e8a3f0..aeb591b 100644 --- a/src/hashing/FileUtils.cpp +++ b/src/hashing/FileUtils.cpp @@ -44,12 +44,20 @@ using std::endl; using std::string; using std::vector; -bool fileExists(const std::string& name) { - std::ifstream f(name.c_str()); +struct AutoGzIfstream::Impl { + boost::iostreams::filtering_istream boost_in; + std::ifstream fin; +}; + +AutoGzIfstream::AutoGzIfstream() : pimpl(std::make_unique()) {} +AutoGzIfstream::~AutoGzIfstream() noexcept = default; + +bool fileExists(const std::filesystem::path& file) { + std::ifstream f(file.c_str()); return f.good(); } -int AutoGzIfstream::lineCount(const std::string& file) { +int AutoGzIfstream::lineCount(const std::filesystem::path& file) { AutoGzIfstream fin; fin.openOrExit(file); int ctr = 0; @@ -60,29 +68,34 @@ int AutoGzIfstream::lineCount(const std::string& file) { return ctr; } -void AutoGzIfstream::openOrExit(const std::string& file, std::ios_base::openmode mode) { - fin.open(file.c_str(), mode); - if (!fin) { +void AutoGzIfstream::openOrExit(const std::filesystem::path& file, std::ios_base::openmode mode) { + pimpl->fin.open(file.c_str(), mode); + if (!pimpl->fin) { cerr << "ERROR: Unable to open file: " << file << endl; exit(1); } - if ((int) file.length() > 3 && file.substr(file.length() - 3) == ".gz") - boost_in.push(boost::iostreams::gzip_decompressor()); - boost_in.push(fin); + if ((int) file.string().length() > 3 && file.string().substr(file.string().length() - 3) == ".gz") + pimpl->boost_in.push(boost::iostreams::gzip_decompressor()); + pimpl->boost_in.push(pimpl->fin); } void AutoGzIfstream::close() { - fin.close(); - boost_in.reset(); + pimpl->fin.close(); + pimpl->boost_in.reset(); } -AutoGzIfstream::operator bool() const { - return !boost_in.fail(); +AutoGzIfstream::operator bool() const noexcept { + return !pimpl->boost_in.fail(); } AutoGzIfstream& getline(AutoGzIfstream& in, std::string& s) { - std::getline(in.boost_in, s); + std::getline(in.pimpl->boost_in, s); return in; } +AutoGzIfstream& AutoGzIfstream::operator>>(std::string& x) { + pimpl->boost_in >> x; + return *this; +} + } // namespace FileUtils diff --git a/src/hashing/FileUtils.hpp b/src/hashing/FileUtils.hpp index cd2f070..9a7e46f 100644 --- a/src/hashing/FileUtils.hpp +++ b/src/hashing/FileUtils.hpp @@ -29,30 +29,31 @@ #define FILEUTILS_HPP #include +#include +#include #include -#include - -#include namespace FileUtils { -bool fileExists(const std::string& name); +bool fileExists(const std::filesystem::path& file); class AutoGzIfstream { - boost::iostreams::filtering_istream boost_in; - std::ifstream fin; + struct Impl; + std::unique_ptr pimpl; public: - static int lineCount(const std::string& file); + AutoGzIfstream(); + ~AutoGzIfstream() noexcept; + + [[nodiscard]] static int lineCount(const std::filesystem::path& file); - void openOrExit(const std::string& file, std::ios_base::openmode mode = std::ios::in); + void openOrExit(const std::filesystem::path& file, std::ios_base::openmode mode = std::ios::in); void close(); - template AutoGzIfstream& operator>>(T& x) { - boost_in >> x; - return *this; - } - operator bool() const; + AutoGzIfstream& operator>>(std::string& x); + + [[nodiscard]] explicit operator bool() const noexcept; + friend AutoGzIfstream& getline(AutoGzIfstream& in, std::string& s); }; @@ -60,4 +61,4 @@ AutoGzIfstream& getline(AutoGzIfstream& in, std::string& s); } // namespace FileUtils -#endif +#endif \ No newline at end of file From 7d98360b7daa0828fbce554ad3b186e695944d28 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 5 Dec 2025 17:11:34 +0000 Subject: [PATCH 21/33] Commit WIP for regression test --- pyproject.toml | 1 + test/run_regression_test.py | 96 +++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 test/run_regression_test.py diff --git a/pyproject.toml b/pyproject.toml index 748718c..af99916 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ [project.optional-dependencies] dev = [ "pytest", + "h5py", ] [project.scripts] diff --git a/test/run_regression_test.py b/test/run_regression_test.py new file mode 100644 index 0000000..2c58cbd --- /dev/null +++ b/test/run_regression_test.py @@ -0,0 +1,96 @@ +# This file is part of the ARG-Needle genealogical inference and +# analysis software suite. +# Copyright (C) 2023-2025 ARG-Needle Developers. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This test assumes you have installed the dev dependencies of arg-needle. +# In the root of this repository, run: +# +# pip install .[dev] + +import subprocess +import tempfile +import pathlib +import h5py +import numpy as np + +DATA_DIR = pathlib.Path(__file__).resolve().parent / "data" + +def compare_attr_keys(a, b): + if set(a.keys()) != set(b.keys()): + return False + return True + +def compare_attr_vals(a, b): + + errors = False + + unchecked_keys = 'datetime_created' + deterministic_keys = ['arg_file_version', 'chromosome', 'end', 'mutations', 'node_bounds', 'offset', 'start', 'threaded_samples'] + nondeterministic_keys = ['num_edges', 'num_nodes'] + + for k in deterministic_keys: + if a[k] != b[k]: + print(f"Key {k} values ({a[k]} and {b[k]}) do not match") + errors = True + + for k in nondeterministic_keys: + if not np.isclose(a[k], b[k], rtol=0.01): + print(f"Key {k} values ({a[k]} and {b[k]}) differ by more than 1%") + errors = True + + if errors: + return False + return True + + +def check_attrs(h5file): + + return True + +def hdf5_equal(file1, file2): + with h5py.File(file1, "r") as f1, h5py.File(file2, "r") as f2: + # compare file-level attributes and root group + if not compare_attr_keys(f1.attrs, f2.attrs): + return False + if not compare_attr_vals(f1.attrs, f2.attrs): + return False + + print("hooray!") + return True + +def test_script_output(): + + with tempfile.TemporaryDirectory() as tmpdir: + tmp = pathlib.Path(tmpdir) + + # Run scripts inside temporary directory + # subprocess.run(["prepare_example"], cwd=tmp, check=True) + # subprocess.run(["infer_args", "--normalize", "0"], cwd=tmp, check=True) + # + # # Check output file exists + # outfile = tmp / "example.argn" + # assert outfile.exists() + + # Compare to frozen output + + + hdf5_equal(DATA_DIR / "example.argn", DATA_DIR / "arg_from_example.argn") + + + +if __name__ == '__main__': + test_script_output() From 455fc984ea4ab25035e2e5bf5f61cae34611715d Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sat, 6 Dec 2025 22:09:56 +0000 Subject: [PATCH 22/33] Finish python regression test --- .github/workflows/python_tests.yml | 4 +- test/run_regression_test.py | 96 ----------------------------- test/test_regression.py | 99 ++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 98 deletions(-) delete mode 100644 test/run_regression_test.py create mode 100644 test/test_regression.py diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index c3d8916..01668bb 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -1,4 +1,4 @@ -name: Python unit tests +name: Python tests on: push: @@ -12,7 +12,7 @@ on: jobs: build-and-test: - name: Python unit tests + name: Python tests runs-on: ${{ matrix.os }} timeout-minutes: 10 diff --git a/test/run_regression_test.py b/test/run_regression_test.py deleted file mode 100644 index 2c58cbd..0000000 --- a/test/run_regression_test.py +++ /dev/null @@ -1,96 +0,0 @@ -# This file is part of the ARG-Needle genealogical inference and -# analysis software suite. -# Copyright (C) 2023-2025 ARG-Needle Developers. - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# This test assumes you have installed the dev dependencies of arg-needle. -# In the root of this repository, run: -# -# pip install .[dev] - -import subprocess -import tempfile -import pathlib -import h5py -import numpy as np - -DATA_DIR = pathlib.Path(__file__).resolve().parent / "data" - -def compare_attr_keys(a, b): - if set(a.keys()) != set(b.keys()): - return False - return True - -def compare_attr_vals(a, b): - - errors = False - - unchecked_keys = 'datetime_created' - deterministic_keys = ['arg_file_version', 'chromosome', 'end', 'mutations', 'node_bounds', 'offset', 'start', 'threaded_samples'] - nondeterministic_keys = ['num_edges', 'num_nodes'] - - for k in deterministic_keys: - if a[k] != b[k]: - print(f"Key {k} values ({a[k]} and {b[k]}) do not match") - errors = True - - for k in nondeterministic_keys: - if not np.isclose(a[k], b[k], rtol=0.01): - print(f"Key {k} values ({a[k]} and {b[k]}) differ by more than 1%") - errors = True - - if errors: - return False - return True - - -def check_attrs(h5file): - - return True - -def hdf5_equal(file1, file2): - with h5py.File(file1, "r") as f1, h5py.File(file2, "r") as f2: - # compare file-level attributes and root group - if not compare_attr_keys(f1.attrs, f2.attrs): - return False - if not compare_attr_vals(f1.attrs, f2.attrs): - return False - - print("hooray!") - return True - -def test_script_output(): - - with tempfile.TemporaryDirectory() as tmpdir: - tmp = pathlib.Path(tmpdir) - - # Run scripts inside temporary directory - # subprocess.run(["prepare_example"], cwd=tmp, check=True) - # subprocess.run(["infer_args", "--normalize", "0"], cwd=tmp, check=True) - # - # # Check output file exists - # outfile = tmp / "example.argn" - # assert outfile.exists() - - # Compare to frozen output - - - hdf5_equal(DATA_DIR / "example.argn", DATA_DIR / "arg_from_example.argn") - - - -if __name__ == '__main__': - test_script_output() diff --git a/test/test_regression.py b/test/test_regression.py new file mode 100644 index 0000000..3c8f2f8 --- /dev/null +++ b/test/test_regression.py @@ -0,0 +1,99 @@ +# This file is part of the ARG-Needle genealogical inference and +# analysis software suite. +# Copyright (C) 2023-2025 ARG-Needle Developers. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# This test assumes you have installed the dev dependencies of arg-needle. +# In the root of this repository, run: +# +# pip install .[dev] + +import subprocess +import tempfile +import pathlib +import h5py +import numpy as np +import shutil + +DATA_DIR = pathlib.Path(__file__).resolve().parent / "data" + +ARGN_KEYS = ['arg_file_version', 'chromosome', 'datetime_created', 'end', 'mutations', 'node_bounds', 'num_edges', + 'num_mutations', 'num_nodes', 'offset', 'start', 'threaded_samples'] + +def check_attr_keys(attrs): + """ + Check if the keys are as expected in generated HDF5 file + """ + keys_in_generated_file = sorted([str(x) for x in attrs.keys()]) + + if keys_in_generated_file != ARGN_KEYS: + print(f"Expected the following keys:\n{ARGN_KEYS}\n but got:\n{keys_in_generated_file}") + return False + + return True + +def check_attr_vals(attrs): + """ + Check attr values are correct + """ + + # Deterministic values: + assert attrs["arg_file_version"] == 2 + assert attrs["chromosome"] == 1 + assert np.isclose(attrs["start"], 0.0, rtol=1e-8) + assert np.isclose(attrs["end"], 2000079.0, rtol=1e-8) + assert attrs["mutations"] == False + assert attrs["node_bounds"] == True + assert attrs["offset"] == 10001457 + assert attrs["threaded_samples"] == 400 + + # These values were calculated by running the example about 100 times + nodes_mean = 17203.69792 + nodes_std = 127.8394651 + edges_mean = 93654.98958 + edges_std = 591.7562353 + + # This should almost never fail + assert attrs["num_nodes"] > nodes_mean - 3.0 * nodes_std + assert attrs["num_nodes"] < nodes_mean + 3.0 * nodes_std + + assert attrs["num_edges"] > edges_mean - 3.0 * edges_std + assert attrs["num_edges"] < edges_mean + 3.0 * edges_std + + return True + + +def test_script_output(): + + with tempfile.TemporaryDirectory() as tmpdir: + tmp = pathlib.Path(tmpdir) + + # Run scripts inside temporary directory + subprocess.run(["prepare_example"], cwd=tmp, check=True) + subprocess.run(["infer_args", "--normalize", "0"], cwd=tmp, check=True) + + # Check output file exists + outfile = tmp / "example.argn" + assert outfile.exists() + + # Compare to frozen output + with h5py.File(outfile, "r") as arg_file: + assert check_attr_keys(arg_file.attrs) + assert check_attr_vals(arg_file.attrs) + + +if __name__ == '__main__': + test_script_output() From b9242b70fd2ab59bd35064e5b86dc321185ea5b3 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sat, 6 Dec 2025 23:25:38 +0000 Subject: [PATCH 23/33] Stop using deprecated find boost module --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7482dad..3fb0ab8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,7 +15,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -find_package(Boost COMPONENTS iostreams REQUIRED) +find_package(Boost CONFIG COMPONENTS iostreams REQUIRED) set( arg_needle_hashing_src From c5079567954ff8a7ef464227afaa5d2f9d72c809 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sat, 6 Dec 2025 23:26:01 +0000 Subject: [PATCH 24/33] Modernise FileUtils --- src/hashing/FileUtils.cpp | 128 ++++++++++++++++++-------------------- src/hashing/FileUtils.hpp | 122 ++++++++++++++++++++++++++++-------- 2 files changed, 157 insertions(+), 93 deletions(-) diff --git a/src/hashing/FileUtils.cpp b/src/hashing/FileUtils.cpp index aeb591b..aac8028 100644 --- a/src/hashing/FileUtils.cpp +++ b/src/hashing/FileUtils.cpp @@ -25,77 +25,73 @@ // The license file can be found at 3rd_party/Eagle/COPYING from the // root of this repository. -#include -#include -#include -#include -#include -#include - #include "FileUtils.hpp" #include #include -namespace FileUtils { +#include +#include +#include +#include -using std::cerr; -using std::endl; -using std::string; -using std::vector; - -struct AutoGzIfstream::Impl { - boost::iostreams::filtering_istream boost_in; - std::ifstream fin; -}; - -AutoGzIfstream::AutoGzIfstream() : pimpl(std::make_unique()) {} -AutoGzIfstream::~AutoGzIfstream() noexcept = default; - -bool fileExists(const std::filesystem::path& file) { - std::ifstream f(file.c_str()); - return f.good(); -} - -int AutoGzIfstream::lineCount(const std::filesystem::path& file) { - AutoGzIfstream fin; - fin.openOrExit(file); - int ctr = 0; - string line; - while (getline(fin, line)) - ctr++; - fin.close(); - return ctr; -} - -void AutoGzIfstream::openOrExit(const std::filesystem::path& file, std::ios_base::openmode mode) { - pimpl->fin.open(file.c_str(), mode); - if (!pimpl->fin) { - cerr << "ERROR: Unable to open file: " << file << endl; - exit(1); - } - if ((int) file.string().length() > 3 && file.string().substr(file.string().length() - 3) == ".gz") - pimpl->boost_in.push(boost::iostreams::gzip_decompressor()); - pimpl->boost_in.push(pimpl->fin); -} - -void AutoGzIfstream::close() { - pimpl->fin.close(); - pimpl->boost_in.reset(); -} - -AutoGzIfstream::operator bool() const noexcept { - return !pimpl->boost_in.fail(); -} - -AutoGzIfstream& getline(AutoGzIfstream& in, std::string& s) { - std::getline(in.pimpl->boost_in, s); - return in; -} - -AutoGzIfstream& AutoGzIfstream::operator>>(std::string& x) { - pimpl->boost_in >> x; - return *this; -} +namespace FileUtils { + struct AutoGzIfstream::Impl { + boost::iostreams::filtering_istream boost_in; + std::ifstream fin; + }; + + AutoGzIfstream::AutoGzIfstream() : pimpl(std::make_unique()) { + } + + AutoGzIfstream::~AutoGzIfstream() noexcept = default; + + bool fileExists(const std::filesystem::path &file) { + std::ifstream f(file.c_str()); + return f.good(); + } + + int AutoGzIfstream::lineCount(const std::filesystem::path &file) { + AutoGzIfstream fin; + fin.openOrExit(file); + int ctr = 0; + std::string line; + while (getline(fin, line)) { + ctr++; + } + fin.close(); + return ctr; + } + + void AutoGzIfstream::openOrExit(const std::filesystem::path &file, std::ios_base::openmode mode) { + pimpl->fin.open(file.c_str(), mode); + if (!pimpl->fin) { + std::cerr << "ERROR: Unable to open file: " << file << std::endl; + exit(1); + } + if (file.extension() == ".gz") { + pimpl->boost_in.push(boost::iostreams::gzip_decompressor()); + } + pimpl->boost_in.push(pimpl->fin); + } + + void AutoGzIfstream::close() { + pimpl->fin.close(); + pimpl->boost_in.reset(); + } + + AutoGzIfstream::operator bool() const noexcept { + return !pimpl->boost_in.fail(); + } + + AutoGzIfstream &getline(AutoGzIfstream &in, std::string &s) { + std::getline(in.pimpl->boost_in, s); + return in; + } + + AutoGzIfstream &AutoGzIfstream::operator>>(std::string &x) { + pimpl->boost_in >> x; + return *this; + } } // namespace FileUtils diff --git a/src/hashing/FileUtils.hpp b/src/hashing/FileUtils.hpp index 9a7e46f..716fbf7 100644 --- a/src/hashing/FileUtils.hpp +++ b/src/hashing/FileUtils.hpp @@ -28,37 +28,105 @@ #ifndef FILEUTILS_HPP #define FILEUTILS_HPP -#include #include #include #include namespace FileUtils { - -bool fileExists(const std::filesystem::path& file); - -class AutoGzIfstream { - struct Impl; - std::unique_ptr pimpl; - -public: - AutoGzIfstream(); - ~AutoGzIfstream() noexcept; - - [[nodiscard]] static int lineCount(const std::filesystem::path& file); - - void openOrExit(const std::filesystem::path& file, std::ios_base::openmode mode = std::ios::in); - void close(); - - AutoGzIfstream& operator>>(std::string& x); - - [[nodiscard]] explicit operator bool() const noexcept; - - friend AutoGzIfstream& getline(AutoGzIfstream& in, std::string& s); -}; - -AutoGzIfstream& getline(AutoGzIfstream& in, std::string& s); - + /** + * @brief Check whether a given file exists on disk. + * + * @param file Path to the file to check. + * @return true if the file exists, false otherwise. + */ + bool fileExists(const std::filesystem::path &file); + + /** + * @class AutoGzIfstream + * @brief Stream wrapper that transparently reads either plain-text or gzip-compressed files. + * + * AutoGzIfstream detects whether an input file is compressed (.gz) and automatically + * opens it appropriately. It behaves similarly to std::ifstream but supports reading + * gzip-compressed streams without requiring explicit decompression by the caller. + * + * Internally uses a pimpl to hide implementation details and avoid exposing boost + * libraries at the interface level. + */ + class AutoGzIfstream { + struct Impl; + std::unique_ptr pimpl; + + public: + /** + * @brief Construct an unopened AutoGzIfstream. + */ + AutoGzIfstream(); + + /** + * @brief Destructor closes the stream if open and releases internal resources. + */ + ~AutoGzIfstream() noexcept; + + /** + * @brief Count the number of lines in a file (supports gzipped and plain files). + * + * @param file Path to the file whose line count will be computed. + * @return Number of lines in the file. + */ + [[nodiscard]] static int lineCount(const std::filesystem::path &file); + + /** + * @brief Open a file for reading or exit the program if opening fails. + * + * Automatically detects gzip compression based on file contents. + * + * @param file Path to the file to open. + * @param mode Stream opening mode (defaults to std::ios::in). + */ + void openOrExit(const std::filesystem::path &file, + std::ios_base::openmode mode = std::ios::in); + + /** + * @brief Close the underlying stream. + */ + void close(); + + /** + * @brief Read whitespace-delimited input into a string via the extraction operator. + * + * @param x Output string that will receive the parsed token. + * @return Reference to this stream. + */ + AutoGzIfstream &operator>>(std::string &x); + + /** + * @brief Boolean conversion indicating whether the stream is currently valid. + * + * Allows usage in conditions such as: + * @code + * if (stream) { ... } + * @endcode + * + * @return true if the stream is open and in a good state, false otherwise. + */ + [[nodiscard]] explicit operator bool() const noexcept; + + /** + * @brief Friend declaration enabling getline(AutoGzIfstream&, ...). + */ + friend AutoGzIfstream &getline(AutoGzIfstream &in, std::string &s); + }; + + /** + * @brief Read a full line from an AutoGzIfstream into a string. + * + * Supports both compressed and uncompressed input sources. + * + * @param in Stream to read from. + * @param s Output string receiving the line (without delimiter). + * @return Reference to the stream. + */ + AutoGzIfstream &getline(AutoGzIfstream &in, std::string &s); } // namespace FileUtils -#endif \ No newline at end of file +#endif From 13b78dea5f74b8599f573e473020a503284486db Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sat, 6 Dec 2025 23:36:28 +0000 Subject: [PATCH 25/33] Update header guards --- src/hashing/FileUtils.hpp | 6 +++--- src/hashing/HapData.hpp | 6 +++--- src/hashing/utils.hpp | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/hashing/FileUtils.hpp b/src/hashing/FileUtils.hpp index 716fbf7..de1f004 100644 --- a/src/hashing/FileUtils.hpp +++ b/src/hashing/FileUtils.hpp @@ -25,8 +25,8 @@ // The license file can be found at 3rd_party/Eagle/COPYING from the // root of this repository. -#ifndef FILEUTILS_HPP -#define FILEUTILS_HPP +#ifndef ARG_NEEDLE_FILE_UTILS_HPP +#define ARG_NEEDLE_FILE_UTILS_HPP #include #include @@ -129,4 +129,4 @@ namespace FileUtils { AutoGzIfstream &getline(AutoGzIfstream &in, std::string &s); } // namespace FileUtils -#endif +#endif // ARG_NEEDLE_FILE_UTILS_HPP diff --git a/src/hashing/HapData.hpp b/src/hashing/HapData.hpp index c5c4741..345d7f5 100644 --- a/src/hashing/HapData.hpp +++ b/src/hashing/HapData.hpp @@ -17,8 +17,8 @@ along with this program. If not, see . */ -#ifndef __HAP_DATA_HPP_ -#define __HAP_DATA_HPP_ +#ifndef ARG_NEELE_HAP_DATA_HPP +#define ARG_NEELE_HAP_DATA_HPP #include #include @@ -82,4 +82,4 @@ class HapData { friend ostream& operator<<(ostream& os, const HapData& data); }; -#endif // __HAP_DATA_HPP_ +#endif // ARG_NEELE_HAP_DATA_HPP diff --git a/src/hashing/utils.hpp b/src/hashing/utils.hpp index a47df25..880c65e 100644 --- a/src/hashing/utils.hpp +++ b/src/hashing/utils.hpp @@ -17,8 +17,8 @@ along with this program. If not, see . */ -#ifndef __UTILS_HPP_ -#define __UTILS_HPP_ +#ifndef ARG_NEEDLE_UTILS_HPP +#define ARG_NEEDLE_UTILS_HPP #include @@ -28,3 +28,4 @@ using std::string; #define THROW_LINE(a) (string(__FILE__) + ":" + std::to_string(__LINE__) + ": " + a) #endif // __UTILS_HPP_ +#endif // ARG_NEEDLE_UTILS_HPP From 94fef936e5fab9680d966222a8caa38e2b2e9171 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sat, 6 Dec 2025 23:37:59 +0000 Subject: [PATCH 26/33] Update THROW_LINE macro to function using source_location --- CMakeLists.txt | 2 +- src/hashing/HapData.cpp | 16 ++++++++-------- src/hashing/utils.hpp | 12 +++++++----- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 23651aa..25e2ff1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ message(STATUS "Using CMake version ${CMAKE_VERSION}") project(arg_needle LANGUAGES CXX VERSION 1.1.0) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) diff --git a/src/hashing/HapData.cpp b/src/hashing/HapData.cpp index b368605..a8acb4a 100644 --- a/src/hashing/HapData.cpp +++ b/src/hashing/HapData.cpp @@ -54,18 +54,18 @@ HapData::HapData(string mode, string file_root_path, unsigned int _word_size, st data_mode = HapDataMode::array; } else { - throw std::logic_error(THROW_LINE("Mode not recognized.")); + throw std::logic_error(make_error("Mode not recognized.")); } if (sizeof(word_type) != 8) { - throw std::logic_error(THROW_LINE("Expected word_type to be 8 bytes (64 bits).")); + throw std::logic_error(make_error("Expected word_type to be 8 bytes (64 bits).")); } if (sizeof(1ull) < 8) { throw std::logic_error( - THROW_LINE("Expected unsigned long long to be at least 8 bytes (64 bits).")); + make_error("Expected unsigned long long to be at least 8 bytes (64 bits).")); } if (word_size > 64 || word_size <= 0) { - throw std::logic_error(THROW_LINE("Out of bounds word size.")); + throw std::logic_error(make_error("Out of bounds word size.")); } string line; @@ -230,10 +230,10 @@ HapData::~HapData() { void HapData::add_to_hash(size_t hap_id) { if (hashed_hap_ids.find(hap_id) != hashed_hap_ids.end()) { - throw std::logic_error(THROW_LINE("This haplotype has already been hashed.")); + throw std::logic_error(make_error("This haplotype has already been hashed.")); } if (hap_id >= num_haps) { - throw std::logic_error(THROW_LINE("Haplotype ID out of bounds.")); + throw std::logic_error(make_error("Haplotype ID out of bounds.")); } if (hashes.empty()) { @@ -252,7 +252,7 @@ void HapData::add_to_hash(size_t hap_id) { void HapData::print_hap(size_t hap_id) { if (hap_id >= num_haps) { - throw std::logic_error(THROW_LINE("Haplotype ID out of bounds.")); + throw std::logic_error(make_error("Haplotype ID out of bounds.")); } cout << "Bits for hap_id = " << hap_id << endl; for (size_t site_id = 0; site_id < num_sites; ++site_id) { @@ -301,7 +301,7 @@ void HapData::print_hashes() { void HapData::print_word_match_diagram(size_t hap_id1, size_t hap_id2) { if (hap_id1 >= num_haps || hap_id2 >= num_haps) { - throw std::logic_error(THROW_LINE("Haplotype ID out of bounds.")); + throw std::logic_error(make_error("Haplotype ID out of bounds.")); } for (size_t i = 0; i < words[hap_id1].size(); ++i) { if (i != 0) { diff --git a/src/hashing/utils.hpp b/src/hashing/utils.hpp index 880c65e..541b691 100644 --- a/src/hashing/utils.hpp +++ b/src/hashing/utils.hpp @@ -21,11 +21,13 @@ #define ARG_NEEDLE_UTILS_HPP #include +#include -using std::string; +inline std::string make_error(const std::string& msg, + const std::source_location& loc = std::source_location::current()) +{ + return std::string(loc.file_name()) + ":" + + std::to_string(loc.line()) + ": " + msg; +} -// Utility for exceptions -#define THROW_LINE(a) (string(__FILE__) + ":" + std::to_string(__LINE__) + ": " + a) - -#endif // __UTILS_HPP_ #endif // ARG_NEEDLE_UTILS_HPP From 8a71288c1e370d91b7e1f0ea16da419810b1e1dd Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sat, 6 Dec 2025 23:43:11 +0000 Subject: [PATCH 27/33] Remove using declarations --- src/hashing/HapData.cpp | 114 ++++++++++++++++++---------------------- src/hashing/HapData.hpp | 35 +++++------- 2 files changed, 65 insertions(+), 84 deletions(-) diff --git a/src/hashing/HapData.cpp b/src/hashing/HapData.cpp index a8acb4a..3e57dd8 100644 --- a/src/hashing/HapData.cpp +++ b/src/hashing/HapData.cpp @@ -33,18 +33,8 @@ #include "HapData.hpp" #include "utils.hpp" -using std::cerr; -using std::cout; -using std::deque; -using std::endl; -using std::ostream; -using std::pair; -using std::string; -using std::tuple; -using std::unordered_map; -using std::unordered_set; - -HapData::HapData(string mode, string file_root_path, unsigned int _word_size, string map_file_path, + +HapData::HapData(std::string mode, std::string file_root_path, unsigned int _word_size, std::string map_file_path, bool fill_sites) : word_size(_word_size) { if (mode == "sequence") { @@ -68,7 +58,7 @@ HapData::HapData(string mode, string file_root_path, unsigned int _word_size, st throw std::logic_error(make_error("Out of bounds word size.")); } - string line; + std::string line; std::stringstream ss; // read in .sample[s] file @@ -80,14 +70,14 @@ HapData::HapData(string mode, string file_root_path, unsigned int _word_size, st file_samples.openOrExit(file_root_path + ".sample"); } else { - cerr << "ERROR. Could not find sample file in " + file_root_path + ".sample[s]" << endl; + std::cerr << "ERROR. Could not find sample file in " + file_root_path + ".sample[s]" << std::endl; exit(1); } while (getline(file_samples, line)) { - vector splitStr; + std::vector splitStr; std::istringstream iss(line); - string buf; + std::string buf; while (iss >> buf) splitStr.push_back(buf); @@ -112,7 +102,7 @@ HapData::HapData(string mode, string file_root_path, unsigned int _word_size, st // cout << "Using genetic map " << map_file_path << endl; } else { - cerr << "ERROR. Could not open map file " + map_file_path + ", no such file" << endl; + std::cerr << "ERROR. Could not open map file " + map_file_path + ", no such file" << std::endl; exit(1); } } @@ -127,13 +117,13 @@ HapData::HapData(string mode, string file_root_path, unsigned int _word_size, st // cout << "Using genetic map " << file_root_path << ".map" << endl; } else { - cerr << "ERROR. Could not find map file in " + file_root_path + ".map.gz or " + + std::cerr << "ERROR. Could not find map file in " + file_root_path + ".map.gz or " + file_root_path + ".map" - << endl; + << std::endl; exit(1); } } - string map_field[4]; + std::string map_field[4]; while (getline(file_map, line)) { ss.clear(); ss.str(line); @@ -159,17 +149,17 @@ HapData::HapData(string mode, string file_root_path, unsigned int _word_size, st file_hap.openOrExit(file_root_path + ".haps"); } else { - cerr << "ERROR. Could not find hap file in " + file_root_path + ".hap.gz, " + file_root_path + + std::cerr << "ERROR. Could not find hap file in " + file_root_path + ".hap.gz, " + file_root_path + ".hap, " + ".haps.gz, or " + file_root_path + ".haps" - << endl; + << std::endl; exit(1); } if (fill_sites) { - sites = vector>(num_haps, vector()); + sites = std::vector>(num_haps, std::vector()); } - words = vector>(num_haps, vector()); - string marker_id; + words = std::vector>(num_haps, std::vector()); + std::string marker_id; unsigned long int marker_pos; char al[2], inp; int site_id = 0; @@ -237,12 +227,12 @@ void HapData::add_to_hash(size_t hap_id) { } if (hashes.empty()) { - hashes = vector>>( - words[hap_id].size(), unordered_map>()); + hashes = std::vector>>( + words[hap_id].size(), std::unordered_map>()); } for (size_t i = 0; i < words[hap_id].size(); ++i) { - vector& hash_value = + std::vector& hash_value = hashes[i][words[hap_id][i]]; // creates if not present, only hashes once hash_value.push_back(hap_id); } @@ -254,21 +244,21 @@ void HapData::print_hap(size_t hap_id) { if (hap_id >= num_haps) { throw std::logic_error(make_error("Haplotype ID out of bounds.")); } - cout << "Bits for hap_id = " << hap_id << endl; + std::cout << "Bits for hap_id = " << hap_id << std::endl; for (size_t site_id = 0; site_id < num_sites; ++site_id) { - cout << sites[hap_id][site_id]; + std::cout << sites[hap_id][site_id]; if (site_id % word_size == word_size - 1) { - cout << " "; + std::cout << " "; } } - cout << endl; + std::cout << std::endl; - cout << "Words (hex) for hap_id = " << hap_id << endl; + std::cout << "Words (hex) for hap_id = " << hap_id << std::endl; std::cout << std::hex << std::showbase; for (auto const& word : words[hap_id]) { - cout << word << " "; + std::cout << word << " "; } - cout << endl; + std::cout << std::endl; std::cout << std::dec << std::noshowbase; // cout << "Words (decimal)" << endl; @@ -280,22 +270,22 @@ void HapData::print_hap(size_t hap_id) { void HapData::print_hashes() { for (size_t i = 0; i < hashes.size(); ++i) { - cout << "Hash for word " << i << " of " << hashes.size() << endl; + std::cout << "Hash for word " << i << " of " << hashes.size() << std::endl; for (auto const& map_entry : hashes[i]) { unsigned int num_bits = word_size; if (i == hashes.size() - 1) { num_bits = ((num_sites - 1) % word_size) + 1; } for (size_t j = 0; j < num_bits; ++j) { - cout << ((map_entry.first >> j) & 1); + std::cout << ((map_entry.first >> j) & 1); } - cout << ":"; + std::cout << ":"; for (const size_t id : map_entry.second) { - cout << " " << id; + std::cout << " " << id; } - cout << endl; + std::cout << std::endl; } - cout << endl; + std::cout << std::endl; } } @@ -306,31 +296,31 @@ void HapData::print_word_match_diagram(size_t hap_id1, size_t hap_id2) { for (size_t i = 0; i < words[hap_id1].size(); ++i) { if (i != 0) { if (i % 100 == 0) { - cout << endl; + std::cout << std::endl; } if (i % 25 == 0) { - cout << endl; + std::cout << std::endl; } else if (i % 5 == 0) { - cout << " "; + std::cout << " "; } } if (words[hap_id1][i] == words[hap_id2][i]) { - cout << "x"; + std::cout << "x"; } else { - cout << "_"; + std::cout << "_"; } } - cout << endl; + std::cout << std::endl; } -vector>>> +std::vector>>> HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int tolerance, double window_size_genetic) { // find the windows - vector windows; // Window defined in HapData.hpp + std::vector windows; // Window defined in HapData.hpp size_t num_words = words[hap_id].size(); if (window_size_genetic <= 0) { // make a new window for each and every word @@ -367,7 +357,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran } } - vector words_to_windows; + std::vector words_to_windows; for (size_t i = 0; i < windows.size(); ++i) { Window w = windows[i]; for (size_t j = w.start; j < w.end; ++j) { @@ -377,17 +367,17 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran // how high each sample scores in each window // we only record samples that have matched - vector> window_scores( - windows.size(), unordered_map()); + std::vector> window_scores( + windows.size(), std::unordered_map()); // stretches of matching material separated by 2*k + 1 fillers, where k is the number // of mismatches, max size defined by 2*tolerance + 1 - vector>> stretches(hap_id, deque>()); + std::vector>> stretches(hap_id, std::deque>()); // size_t num_overall_matches = 0; for (size_t i = 0; i < num_words; ++i) { // in some cases, the word does not yet exist in the hashmap if (hashes[i].find(words[hap_id][i]) != hashes[i].end()) { - const vector& hash_value = hashes[i].find(words[hap_id][i])->second; + const std::vector& hash_value = hashes[i].find(words[hap_id][i])->second; // num_overall_matches += hash_value.size(); for (auto v : hash_value) { // check the end of stretches to figure out what to do @@ -395,7 +385,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran stretches[v].emplace_back(i, i + 1); // end is exclusive } else { - pair& back_pair = stretches[v].back(); + std::pair& back_pair = stretches[v].back(); if (back_pair.second == i) { back_pair.second = i + 1; // end is exclusive } @@ -419,7 +409,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran // pop_front to get to size 2*tolerance + 1 while (stretches[v].size() > 2 * tolerance + 1) { - pair& item = stretches[v].front(); + std::pair& item = stretches[v].front(); if (item.second != 0) { size_t range_start = item.first; // old version was buggy @@ -455,7 +445,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran // go over all the stretches and pop_front for (size_t v = 0; v < hap_id; ++v) { while (stretches[v].size() > 0) { - pair item = stretches[v].front(); + std::pair item = stretches[v].front(); if (item.second != 0) { size_t range_start = item.first; // old version is buggy in general, but should work in this case @@ -487,12 +477,12 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran } // take the values in window_scores and sort to find top k - vector>>> results; + std::vector>>> results; for (const Window& w : windows) { size_t window_start_site = w.start * word_size; size_t window_end_site = std::min(w.end * word_size - 1, num_sites - 1); - vector> stats; + std::vector> stats; for (const auto& map_entry : window_scores[w.index]) { size_t hap_id = map_entry.first; double score = (double) map_entry.second; @@ -501,14 +491,14 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran size_t actual_k = std::min(k, stats.size()); // use this if we want sorted std::partial_sort( - stats.begin(), stats.begin() + actual_k, stats.end(), std::greater>()); + stats.begin(), stats.begin() + actual_k, stats.end(), std::greater>()); // use this if we don't care about sorted // std::nth_element(stats.begin(), stats.begin() + actual_k, stats.end(), // std::greater>()); // append to results results.push_back( - std::make_tuple(window_start_site, window_end_site, vector>())); + std::make_tuple(window_start_site, window_end_site, std::vector>())); for (size_t stats_idx = 0; stats_idx < actual_k; ++stats_idx) { std::get<2>(results[results.size() - 1]) .emplace_back(stats[stats_idx].second, stats[stats_idx].first); @@ -518,7 +508,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran return results; } -ostream& operator<<(ostream& os, const HapData& data) { +std::ostream& operator<<(std::ostream& os, const HapData& data) { os << "HapData with " << data.num_haps << " haplotypes and " << data.num_sites; os << " sites, word size = " << data.word_size << " bits"; return os; diff --git a/src/hashing/HapData.hpp b/src/hashing/HapData.hpp index 345d7f5..2f174ad 100644 --- a/src/hashing/HapData.hpp +++ b/src/hashing/HapData.hpp @@ -20,8 +20,7 @@ #ifndef ARG_NEELE_HAP_DATA_HPP #define ARG_NEELE_HAP_DATA_HPP -#include -#include +#include #include #include #include @@ -29,14 +28,6 @@ #include #include -using std::ostream; -using std::pair; -using std::string; -using std::tuple; -using std::unordered_map; -using std::unordered_set; -using std::vector; - struct Window { size_t start, end, index; // end is inclusive friend bool operator<(const Window& a, const Window& b) { @@ -59,27 +50,27 @@ class HapData { unsigned int num_sites = 0; unsigned int word_size; HapDataMode data_mode; - vector physical_positions; - vector genetic_positions; - vector site_mafs; - vector sample_names; - vector> sites; - vector> words; + std::vector physical_positions; + std::vector genetic_positions; + std::vector site_mafs; + std::vector sample_names; + std::vector> sites; + std::vector> words; - vector>> hashes; - unordered_set hashed_hap_ids; + std::vector>> hashes; + std::unordered_set hashed_hap_ids; - HapData(string mode, string file_root_path, unsigned int _word_size = 64, - string map_file_path = "", bool fill_sites = true); + HapData(std::string mode, std::string file_root_path, unsigned int _word_size = 64, + std::string map_file_path = "", bool fill_sites = true); ~HapData(); void add_to_hash(size_t hap_id); - vector>>> + std::vector>>> get_closest_cousins(size_t hap_id, unsigned int k, unsigned int tolerance = 0, double window_size_genetic = 0); void print_hap(size_t hap_id); void print_hashes(); void print_word_match_diagram(size_t hap_id1, size_t hap_id2); - friend ostream& operator<<(ostream& os, const HapData& data); + friend std::ostream& operator<<(std::ostream& os, const HapData& data); }; #endif // ARG_NEELE_HAP_DATA_HPP From f1a16330faab97c57cb57f4367b1d2da5c29a2f6 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sun, 7 Dec 2025 00:09:11 +0000 Subject: [PATCH 28/33] Modernise some C++ --- src/hashing/HapData.cpp | 56 +++++++++++++++++------------------------ src/hashing/HapData.hpp | 6 ++--- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/src/hashing/HapData.cpp b/src/hashing/HapData.cpp index 3e57dd8..f989e7d 100644 --- a/src/hashing/HapData.cpp +++ b/src/hashing/HapData.cpp @@ -95,11 +95,10 @@ HapData::HapData(std::string mode, std::string file_root_path, unsigned int _wor // Parse .map[.gz] file FileUtils::AutoGzIfstream file_map; - if (map_file_path != "") { + if (!map_file_path.empty()) { // Attempt to read in .map[.gz] file if (FileUtils::fileExists(map_file_path)) { file_map.openOrExit(map_file_path); - // cout << "Using genetic map " << map_file_path << endl; } else { std::cerr << "ERROR. Could not open map file " + map_file_path + ", no such file" << std::endl; @@ -110,11 +109,9 @@ HapData::HapData(std::string mode, std::string file_root_path, unsigned int _wor // If no map file is specified, default to file_root_path.map[.gz] if (FileUtils::fileExists(file_root_path + ".map.gz")) { file_map.openOrExit(file_root_path + ".map.gz"); - // cout << "Using genetic map " << file_root_path << ".map.gz" << endl; } else if (FileUtils::fileExists(file_root_path + ".map")) { file_map.openOrExit(file_root_path + ".map"); - // cout << "Using genetic map " << file_root_path << ".map" << endl; } else { std::cerr << "ERROR. Could not find map file in " + file_root_path + ".map.gz or " + @@ -162,14 +159,15 @@ HapData::HapData(std::string mode, std::string file_root_path, unsigned int _wor std::string marker_id; unsigned long int marker_pos; char al[2], inp; - int site_id = 0; + unsigned int site_id = 0u; while (getline(file_hap, line)) { // read the meta data ss.clear(); ss.str(line); ss >> map_field[0] >> marker_id >> marker_pos >> al[0] >> al[1]; - if (map_field[0] == "") + if (map_field[0].empty()) { continue; + } if (site_id % word_size == 0) { for (size_t hap_id = 0; hap_id < num_haps; ++hap_id) { @@ -202,9 +200,9 @@ HapData::HapData(std::string mode, std::string file_root_path, unsigned int _wor } } } - float maf = (float) maf_ctr / num_haps; - if (maf > 0.5) { - maf = 1 - maf; + float maf = static_cast(maf_ctr) / static_cast(num_haps); + if (maf > 0.5f) { + maf = 1.f - maf; } site_mafs.push_back(maf); ++site_id; @@ -212,12 +210,6 @@ HapData::HapData(std::string mode, std::string file_root_path, unsigned int _wor file_hap.close(); } -HapData::~HapData() { -#ifdef _DEBUG - cout << "Deleting: " << *this << endl; -#endif // _DEBUG -} - void HapData::add_to_hash(size_t hap_id) { if (hashed_hap_ids.find(hap_id) != hashed_hap_ids.end()) { throw std::logic_error(make_error("This haplotype has already been hashed.")); @@ -272,9 +264,9 @@ void HapData::print_hashes() { for (size_t i = 0; i < hashes.size(); ++i) { std::cout << "Hash for word " << i << " of " << hashes.size() << std::endl; for (auto const& map_entry : hashes[i]) { - unsigned int num_bits = word_size; + unsigned long num_bits = word_size; if (i == hashes.size() - 1) { - num_bits = ((num_sites - 1) % word_size) + 1; + num_bits = ((num_sites - 1ul) % word_size) + 1ul; } for (size_t j = 0; j < num_bits; ++j) { std::cout << ((map_entry.first >> j) & 1); @@ -325,7 +317,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran if (window_size_genetic <= 0) { // make a new window for each and every word for (size_t j = 0; j < num_words; ++j) { - Window w; + Window w{}; w.start = j; w.end = j + 1; w.index = j; @@ -343,7 +335,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran (genetic_positions[last_word_site] - start_genetic >= window_size_genetic && genetic_positions[num_sites - 1] - genetic_positions[last_word_site + 1] >= window_size_genetic)) { - Window w; + Window w{}; w.start = start_word; w.end = j + 1; w.index = window_index; @@ -377,11 +369,10 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran for (size_t i = 0; i < num_words; ++i) { // in some cases, the word does not yet exist in the hashmap if (hashes[i].find(words[hap_id][i]) != hashes[i].end()) { - const std::vector& hash_value = hashes[i].find(words[hap_id][i])->second; - // num_overall_matches += hash_value.size(); - for (auto v : hash_value) { + const std::vector& matches = hashes[i].find(words[hap_id][i])->second; + for (auto v : matches) { // check the end of stretches to figure out what to do - if (stretches[v].size() == 0) { + if (stretches[v].empty()) { stretches[v].emplace_back(i, i + 1); // end is exclusive } else { @@ -429,10 +420,10 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran // if our range is [6, 16), we want [5, 10) to [15, 20) inclusive for (size_t window_index = words_to_windows[range_start]; window_index <= words_to_windows[range_end - 1]; ++window_index) { - size_t& hash_value = + size_t& best_len = window_scores[window_index][v]; // creates if not present, only hashes once - if (range_size > hash_value) { - hash_value = range_size; + if (range_size > best_len) { + best_len = range_size; } } } @@ -444,7 +435,7 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran // go over all the stretches and pop_front for (size_t v = 0; v < hap_id; ++v) { - while (stretches[v].size() > 0) { + while (!stretches[v].empty()) { std::pair item = stretches[v].front(); if (item.second != 0) { size_t range_start = item.first; @@ -484,21 +475,20 @@ HapData::get_closest_cousins(size_t hap_id, unsigned int k, unsigned int toleran std::vector> stats; for (const auto& map_entry : window_scores[w.index]) { - size_t hap_id = map_entry.first; - double score = (double) map_entry.second; - stats.emplace_back(score, hap_id); + size_t map_entry_hap_id = map_entry.first; + auto score = static_cast(map_entry.second); + stats.emplace_back(score, map_entry_hap_id); } size_t actual_k = std::min(k, stats.size()); // use this if we want sorted std::partial_sort( - stats.begin(), stats.begin() + actual_k, stats.end(), std::greater>()); + stats.begin(), stats.begin() + static_cast(actual_k), stats.end(), std::greater>()); // use this if we don't care about sorted // std::nth_element(stats.begin(), stats.begin() + actual_k, stats.end(), // std::greater>()); // append to results - results.push_back( - std::make_tuple(window_start_site, window_end_site, std::vector>())); + results.emplace_back(window_start_site, window_end_site, std::vector>()); for (size_t stats_idx = 0; stats_idx < actual_k; ++stats_idx) { std::get<2>(results[results.size() - 1]) .emplace_back(stats[stats_idx].second, stats[stats_idx].first); diff --git a/src/hashing/HapData.hpp b/src/hashing/HapData.hpp index 2f174ad..671bc44 100644 --- a/src/hashing/HapData.hpp +++ b/src/hashing/HapData.hpp @@ -46,8 +46,8 @@ class HapData { public: typedef uint64_t word_type; - unsigned int num_haps = 0; - unsigned int num_sites = 0; + unsigned long num_haps = 0ul; + unsigned long num_sites = 0ul; unsigned int word_size; HapDataMode data_mode; std::vector physical_positions; @@ -62,7 +62,7 @@ class HapData { HapData(std::string mode, std::string file_root_path, unsigned int _word_size = 64, std::string map_file_path = "", bool fill_sites = true); - ~HapData(); + ~HapData() = default; void add_to_hash(size_t hap_id); std::vector>>> get_closest_cousins(size_t hap_id, unsigned int k, unsigned int tolerance = 0, From 4253410352bc920fb023454621ab417861461301 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sun, 7 Dec 2025 00:35:28 +0000 Subject: [PATCH 29/33] Fix remaining THROW_LINE -> make_error conversions --- src/hashing/pybind.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hashing/pybind.cpp b/src/hashing/pybind.cpp index 9dbe58e..14070da 100644 --- a/src/hashing/pybind.cpp +++ b/src/hashing/pybind.cpp @@ -56,7 +56,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "physical_position_at", [](const HapData& data, size_t site) { if (site >= data.num_sites) { - throw std::logic_error(THROW_LINE("Out of bounds site.")); + throw std::logic_error(make_error("Out of bounds site.")); } return data.physical_positions[site]; }, @@ -65,7 +65,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "genetic_position_at", [](const HapData& data, size_t site) { if (site >= data.num_sites) { - throw std::logic_error(THROW_LINE("Out of bounds site.")); + throw std::logic_error(make_error("Out of bounds site.")); } return data.genetic_positions[site]; }, @@ -74,7 +74,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "site_maf_at", [](const HapData& data, size_t site) { if (site >= data.num_sites) { - throw std::logic_error(THROW_LINE("Out of bounds site.")); + throw std::logic_error(make_error("Out of bounds site.")); } return data.site_mafs[site]; }, @@ -83,7 +83,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "sample_name", [](const HapData& data, size_t hap_id) { if (hap_id >= data.num_haps) { - throw std::logic_error(THROW_LINE("Out of bounds hap_id.")); + throw std::logic_error(make_error("Out of bounds hap_id.")); } return data.sample_names[hap_id]; }, From e596e009564bf0a633e6d33c9ecd47bb020015e5 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sun, 7 Dec 2025 00:35:39 +0000 Subject: [PATCH 30/33] Add test for utils --- test/cpp/CMakeLists.txt | 1 + test/cpp/test_utils.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 test/cpp/test_utils.cpp diff --git a/test/cpp/CMakeLists.txt b/test/cpp/CMakeLists.txt index 409bcd5..148cf22 100644 --- a/test/cpp/CMakeLists.txt +++ b/test/cpp/CMakeLists.txt @@ -18,6 +18,7 @@ set( test_files test_file_utils.cpp + test_utils.cpp ) add_executable(cpp_tests ${test_files}) diff --git a/test/cpp/test_utils.cpp b/test/cpp/test_utils.cpp new file mode 100644 index 0000000..84473a1 --- /dev/null +++ b/test/cpp/test_utils.cpp @@ -0,0 +1,41 @@ +/* + This file is part of the ARG-Needle genealogical inference and + analysis software suite. + Copyright (C) 2023-2025 ARG-Needle Developers. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "HapData.hpp" +#include "utils.hpp" + +using Catch::Matchers::ContainsSubstring; + +void test_throw() { + throw std::logic_error(make_error("Something went wrong")); +} + + +TEST_CASE("make_error", "[utils]") { + + REQUIRE_THROWS_WITH(test_throw(), + ContainsSubstring( "test_utils.cpp:" ) && ContainsSubstring( "Something went wrong" )); + + REQUIRE_THROWS_WITH(HapData("banana", ""), + ContainsSubstring( "HapData.cpp:" ) && ContainsSubstring( "Mode not recognized" )); +} + From 38c9a730f988e0d246e34e579816f72cca1bff12 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sun, 7 Dec 2025 00:47:09 +0000 Subject: [PATCH 31/33] Alma linux 8 doesn't have new enough boost for config mode --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3fb0ab8..7482dad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,7 +15,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -find_package(Boost CONFIG COMPONENTS iostreams REQUIRED) +find_package(Boost COMPONENTS iostreams REQUIRED) set( arg_needle_hashing_src From 1ceeb1b7781692591102d876538288134a4bfa10 Mon Sep 17 00:00:00 2001 From: Fergus Cooper <3770306+fcooper8472@users.noreply.github.com> Date: Sun, 7 Dec 2025 12:48:39 +0000 Subject: [PATCH 32/33] Back to C++17 to avoid nasty boost interaction on AlmaLinux8 --- CMakeLists.txt | 2 +- src/hashing/HapData.cpp | 16 ++++++++-------- src/hashing/pybind.cpp | 8 ++++---- src/hashing/utils.hpp | 10 ++++------ test/cpp/test_utils.cpp | 2 +- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 25e2ff1..23651aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ message(STATUS "Using CMake version ${CMAKE_VERSION}") project(arg_needle LANGUAGES CXX VERSION 1.1.0) -set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) diff --git a/src/hashing/HapData.cpp b/src/hashing/HapData.cpp index f989e7d..ceb7a07 100644 --- a/src/hashing/HapData.cpp +++ b/src/hashing/HapData.cpp @@ -44,18 +44,18 @@ HapData::HapData(std::string mode, std::string file_root_path, unsigned int _wor data_mode = HapDataMode::array; } else { - throw std::logic_error(make_error("Mode not recognized.")); + throw std::logic_error(MAKE_ERROR("Mode not recognized.")); } if (sizeof(word_type) != 8) { - throw std::logic_error(make_error("Expected word_type to be 8 bytes (64 bits).")); + throw std::logic_error(MAKE_ERROR("Expected word_type to be 8 bytes (64 bits).")); } if (sizeof(1ull) < 8) { throw std::logic_error( - make_error("Expected unsigned long long to be at least 8 bytes (64 bits).")); + MAKE_ERROR("Expected unsigned long long to be at least 8 bytes (64 bits).")); } if (word_size > 64 || word_size <= 0) { - throw std::logic_error(make_error("Out of bounds word size.")); + throw std::logic_error(MAKE_ERROR("Out of bounds word size.")); } std::string line; @@ -212,10 +212,10 @@ HapData::HapData(std::string mode, std::string file_root_path, unsigned int _wor void HapData::add_to_hash(size_t hap_id) { if (hashed_hap_ids.find(hap_id) != hashed_hap_ids.end()) { - throw std::logic_error(make_error("This haplotype has already been hashed.")); + throw std::logic_error(MAKE_ERROR("This haplotype has already been hashed.")); } if (hap_id >= num_haps) { - throw std::logic_error(make_error("Haplotype ID out of bounds.")); + throw std::logic_error(MAKE_ERROR("Haplotype ID out of bounds.")); } if (hashes.empty()) { @@ -234,7 +234,7 @@ void HapData::add_to_hash(size_t hap_id) { void HapData::print_hap(size_t hap_id) { if (hap_id >= num_haps) { - throw std::logic_error(make_error("Haplotype ID out of bounds.")); + throw std::logic_error(MAKE_ERROR("Haplotype ID out of bounds.")); } std::cout << "Bits for hap_id = " << hap_id << std::endl; for (size_t site_id = 0; site_id < num_sites; ++site_id) { @@ -283,7 +283,7 @@ void HapData::print_hashes() { void HapData::print_word_match_diagram(size_t hap_id1, size_t hap_id2) { if (hap_id1 >= num_haps || hap_id2 >= num_haps) { - throw std::logic_error(make_error("Haplotype ID out of bounds.")); + throw std::logic_error(MAKE_ERROR("Haplotype ID out of bounds.")); } for (size_t i = 0; i < words[hap_id1].size(); ++i) { if (i != 0) { diff --git a/src/hashing/pybind.cpp b/src/hashing/pybind.cpp index 14070da..b8eda37 100644 --- a/src/hashing/pybind.cpp +++ b/src/hashing/pybind.cpp @@ -56,7 +56,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "physical_position_at", [](const HapData& data, size_t site) { if (site >= data.num_sites) { - throw std::logic_error(make_error("Out of bounds site.")); + throw std::logic_error(MAKE_ERROR("Out of bounds site.")); } return data.physical_positions[site]; }, @@ -65,7 +65,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "genetic_position_at", [](const HapData& data, size_t site) { if (site >= data.num_sites) { - throw std::logic_error(make_error("Out of bounds site.")); + throw std::logic_error(MAKE_ERROR("Out of bounds site.")); } return data.genetic_positions[site]; }, @@ -74,7 +74,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "site_maf_at", [](const HapData& data, size_t site) { if (site >= data.num_sites) { - throw std::logic_error(make_error("Out of bounds site.")); + throw std::logic_error(MAKE_ERROR("Out of bounds site.")); } return data.site_mafs[site]; }, @@ -83,7 +83,7 @@ PYBIND11_MODULE(arg_needle_hashing_pybind, m) { "sample_name", [](const HapData& data, size_t hap_id) { if (hap_id >= data.num_haps) { - throw std::logic_error(make_error("Out of bounds hap_id.")); + throw std::logic_error(MAKE_ERROR("Out of bounds hap_id.")); } return data.sample_names[hap_id]; }, diff --git a/src/hashing/utils.hpp b/src/hashing/utils.hpp index 541b691..85619b5 100644 --- a/src/hashing/utils.hpp +++ b/src/hashing/utils.hpp @@ -21,13 +21,11 @@ #define ARG_NEEDLE_UTILS_HPP #include -#include -inline std::string make_error(const std::string& msg, - const std::source_location& loc = std::source_location::current()) -{ - return std::string(loc.file_name()) + ":" + - std::to_string(loc.line()) + ": " + msg; +inline std::string make_error(const std::string &msg, const char *file, const int line) noexcept { + return std::string(file) + ":" + std::to_string(line) + ": " + msg; } +#define MAKE_ERROR(msg) (make_error((msg), __FILE__, __LINE__)) + #endif // ARG_NEEDLE_UTILS_HPP diff --git a/test/cpp/test_utils.cpp b/test/cpp/test_utils.cpp index 84473a1..db78b30 100644 --- a/test/cpp/test_utils.cpp +++ b/test/cpp/test_utils.cpp @@ -26,7 +26,7 @@ using Catch::Matchers::ContainsSubstring; void test_throw() { - throw std::logic_error(make_error("Something went wrong")); + throw std::logic_error(MAKE_ERROR("Something went wrong")); } From ab72f4c6eaccdead8d0209cb18fe8f6077615459 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 18 Dec 2025 11:57:00 +0000 Subject: [PATCH 33/33] Bump date for release --- RELEASE_NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index f2dad19..cfe98a9 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,6 +1,6 @@ # arg-needle Release Notes -## v1.1.0 (2025-12-04) +## v1.1.0 (2025-12-18) - Public source code release: https://github.com/PalamaraLab/arg-needle - Python wheels are now available for Linux and macOS on both x86_64 and arm64/AArch64 architectures, for CPython versions 3.9 to 3.14 inclusive.