From 715ff0e11f274d9072ecbd51957f3d98b41e58dd Mon Sep 17 00:00:00 2001 From: bielj Date: Thu, 18 Jun 2026 10:47:40 -0400 Subject: [PATCH 1/6] Switch to CUDA 12.6 devel build --- Dockerfile | 164 +++++++++++++++-------------------------------------- 1 file changed, 47 insertions(+), 117 deletions(-) diff --git a/Dockerfile b/Dockerfile index 96749c9..bb26f52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,14 @@ -# ==================== BUILDER ==================== -FROM ubuntu:22.04 AS builder +# ==================== CUDA 12.6 (single-stage, devel) ==================== +# CUDA 12.6 *devel* image: nvcc + full CUDA Toolkit, kept for the final runtime so GROMACS builds +# with -DGMX_GPU=CUDA AND future CUDA-dependent tooling can compile inside the container. +# Host needs the NVIDIA driver + NVIDIA Container Toolkit at run time (e.g. docker run --gpus all). +# docker build --build-arg CUDA_IMAGE_TAG=12.6.3 -t diffuseproject/md:gpu . +ARG CUDA_IMAGE_TAG=12.6.3 +FROM nvidia/cuda:${CUDA_IMAGE_TAG}-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive +# ---------- system packages (build tools + runtime deps) ---------- RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ ca-certificates \ @@ -10,18 +16,23 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ git \ bzip2 \ + coreutils \ + rsync \ + bc \ + libgomp1 \ && rm -rf /var/lib/apt/lists/* # ---------- install micromamba ---------- +# Install envs under /opt so shebangs (#!/opt/micromamba/...) match the runtime layout. RUN curl -L micro.mamba.pm/install.sh -o /tmp/micromamba_install.sh \ && printf '\n\n\n\n' | bash /tmp/micromamba_install.sh \ - && rm /tmp/micromamba_install.sh + && rm /tmp/micromamba_install.sh \ + && mkdir -p /opt/micromamba/bin \ + && cp /root/.local/bin/micromamba /opt/micromamba/bin/micromamba -# Install envs under /opt so copied shebangs (#!/opt/micromamba/...) match the final image layout. -RUN mkdir -p /opt/micromamba ENV MAMBA_ROOT_PREFIX=/opt/micromamba -ENV MAMBA_EXE=/root/.local/bin/micromamba -ENV PATH="/root/.local/bin:${PATH}" +ENV MAMBA_EXE=/opt/micromamba/bin/micromamba +ENV PATH="/opt/micromamba/bin:${PATH}" # ---------- conda environment (inline of lunus.yaml) ---------- RUN cat > /tmp/lunus.yaml <<'YAML' @@ -69,7 +80,7 @@ ENV CONDA_PREFIX="${MAMBA_ENV}" # ---------- pip packages ---------- RUN pip install --no-cache-dir git+https://github.com/ando-lab/mdx2.git -# ---------- GROMACS (inline of install_gromacs.sh) ---------- +# ---------- GROMACS (CUDA build, targeting H100 / sm_90) ---------- RUN set -ex \ && d=$(mktemp -d) \ && cd "$d" \ @@ -79,14 +90,17 @@ RUN set -ex \ && mkdir build && cd build \ && cmake .. \ -DGMX_BUILD_OWN_FFTW=ON \ + -DGMX_GPU=CUDA \ + -DCUDAToolkit_ROOT=/usr/local/cuda \ + -DGMX_CUDA_TARGET_SM=90 \ && make -j"$(nproc)" \ && make install \ && cd / \ && rm -rf "$d" # ---------- lunus ---------- -RUN mkdir -p /root/packages \ - && cd /root/packages \ +RUN mkdir -p /opt/packages \ + && cd /opt/packages \ && git clone https://github.com/lanl/lunus \ && cd lunus \ && scons enable-openmp=True @@ -96,132 +110,48 @@ RUN $MAMBA_EXE remove -n lunus -y scons cmake \ && $MAMBA_EXE clean -afy \ && find /opt/micromamba -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null; \ find /opt/micromamba -name "*.pyc" -delete 2>/dev/null; \ - rm -rf /root/packages/lunus/.git; \ + rm -rf /opt/packages/lunus/.git; \ true -# ==================== FINAL ==================== -FROM ubuntu:22.04 - -ENV DEBIAN_FRONTEND=noninteractive - +# ---------- ChimeraX ---------- ARG CHIMERAX_URL="https://www.cgl.ucsf.edu/chimerax/cgi-bin/secure/chimerax-get.py?file=current/ubuntu-22.04/chimerax-daily.deb" -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates \ - coreutils \ - rsync \ - bc \ - bzip2 \ - curl \ - libgomp1 \ +RUN apt-get update \ && curl -s -c /tmp/cx_cookies -d "choice=Accept" "${CHIMERAX_URL}" \ | grep -oP 'url=\K[^"]*' > /tmp/cx_redirect \ && curl -s -b /tmp/cx_cookies -o /tmp/chimerax.deb \ "https://www.cgl.ucsf.edu$(cat /tmp/cx_redirect)" \ && apt-get install -y /tmp/chimerax.deb \ && rm -f /tmp/chimerax.deb /tmp/cx_cookies /tmp/cx_redirect \ - && apt-get purge -y curl \ - && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* -# ---------- bashrc (inline of bashrc_new) ---------- -RUN cat > /etc/skel/.bashrc <<'BASHRC' -# If not running interactively, don't do anything -#case $- in -# *i*) ;; -# *) return;; -#esac - -HISTCONTROL=ignoreboth -shopt -s histappend -HISTSIZE=1000 -HISTFILESIZE=2000 -shopt -s checkwinsize - -[ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)" - -if [ -z "${debian_chroot:-}" ] && [ -r /etc/debian_chroot ]; then - debian_chroot=$(cat /etc/debian_chroot) -fi - -case "$TERM" in - xterm-color|*-256color) color_prompt=yes;; -esac - -if [ -n "$force_color_prompt" ]; then - if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then - color_prompt=yes - else - color_prompt= - fi -fi - -if [ "$color_prompt" = yes ]; then - PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' -else - PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ ' -fi -unset color_prompt force_color_prompt - -case "$TERM" in -xterm*|rxvt*) - PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1" - ;; -*) - ;; -esac - -if [ -x /usr/bin/dircolors ]; then - test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)" - alias ls='ls --color=auto' - alias grep='grep --color=auto' - alias fgrep='fgrep --color=auto' - alias egrep='egrep --color=auto' -fi - -alias ll='ls -alF' -alias la='ls -A' -alias l='ls -CF' - -if [ -f ~/.bash_aliases ]; then - . ~/.bash_aliases -fi - -if ! shopt -oq posix; then - if [ -f /usr/share/bash-completion/bash_completion ]; then - . /usr/share/bash-completion/bash_completion - elif [ -f /etc/bash_completion ]; then - . /etc/bash_completion - fi -fi - -eval "$(micromamba shell hook --shell bash)" -BASHRC - -# ---------- copy artifacts from builder ---------- -COPY --from=builder /root/.local/bin/micromamba /opt/micromamba/bin/micromamba -COPY --from=builder /opt/micromamba /opt/micromamba -COPY --from=builder /usr/local/gromacs /usr/local/gromacs -COPY --from=builder /root/packages /opt/packages +# ---------- shell init (minimal) ---------- +# Runs as root by default (see end of file); downstream images (e.g. Dockerfile.astera) own the +# general user environment. Put the micromamba hook in the *global* bashrc so `micromamba activate` +# works for root and for any UID supplied via `docker run --user ...` (skel only covers new users). +RUN printf '\n# Enable `micromamba activate` in interactive shells\neval "$(micromamba shell hook --shell bash)"\n' >> /etc/bash.bashrc +# ---------- md-workflows ---------- # Ship md-workflows in the lunus env so Hub users need not pip install / extend PATH. +# Placed late so code edits don't invalidate the expensive conda/GROMACS layers. COPY pyproject.toml /opt/md-workflows/pyproject.toml COPY md_workflows /opt/md-workflows/md_workflows RUN /opt/micromamba/envs/lunus/bin/python -m pip install --no-cache-dir /opt/md-workflows -ENV MAMBA_ROOT_PREFIX=/opt/micromamba -ENV MAMBA_EXE=/opt/micromamba/bin/micromamba ENV PATH="/opt/micromamba/bin:/opt/micromamba/envs/lunus/bin:/usr/local/gromacs/bin:${PATH}" -ENV CONDA_PREFIX=/opt/micromamba/envs/lunus -ENV HOME=/home/mduser - -ARG UID=1000 -ARG GID=1000 -RUN groupadd -g "${GID}" mduser \ - && useradd -m -u "${UID}" -g "${GID}" -s /bin/bash mduser \ - && mkdir -p /workspace \ - && chown -R mduser:mduser /home/mduser /workspace /opt/micromamba /opt/packages /opt/md-workflows -USER mduser +# ---------- GPU runtime metadata (placed late so it doesn't bust the build cache) ---------- +# Do NOT auto-claim GPUs. The base nvidia/cuda image sets NVIDIA_VISIBLE_DEVICES=all, which under +# Kubernetes (NVIDIA device plugin / GPU Operator) overrides per-pod GPU isolation and exposes every +# node GPU regardless of resource requests. Override to "void" so GPUs are granted only at run time: +# `docker run --gpus ...` and the K8s device plugin both set NVIDIA_VISIBLE_DEVICES themselves. +ENV NVIDIA_VISIBLE_DEVICES=void +# Capabilities to mount when a GPU *is* granted (harmless when none is). +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility + +# Run as root by default. No baked non-root user: downstream images (Dockerfile.astera) run as root, +# and the standalone README workflow overrides identity with `docker run --user "$(id -u):$(id -g)"`. +# /opt artifacts stay root-owned at default perms, so they remain readable/executable by any UID. +# A dedicated user can be added later if a use case needs one. WORKDIR /workspace SHELL ["/bin/bash", "-c"] CMD ["bash"] From d9c4fb01ac9c1435a1013beb53b746d9080e75d7 Mon Sep 17 00:00:00 2001 From: bielj Date: Thu, 18 Jun 2026 11:24:10 -0400 Subject: [PATCH 2/6] Enforce AVX_512 CPU --- Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile b/Dockerfile index bb26f52..6d24daf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -81,6 +81,9 @@ ENV CONDA_PREFIX="${MAMBA_ENV}" RUN pip install --no-cache-dir git+https://github.com/ando-lab/mdx2.git # ---------- GROMACS (CUDA build, targeting H100 / sm_90) ---------- +# GMX_SIMD is pinned to AVX_512 (Voltage Park Xeon Platinum supports it) instead of letting CMake +# auto-detect from the build host: under QEMU emulation detection falls back to SSE4.1, which would +# cripple CPU-side kernels. Pinning makes the CPU SIMD deployment-correct regardless of build host. RUN set -ex \ && d=$(mktemp -d) \ && cd "$d" \ @@ -93,6 +96,7 @@ RUN set -ex \ -DGMX_GPU=CUDA \ -DCUDAToolkit_ROOT=/usr/local/cuda \ -DGMX_CUDA_TARGET_SM=90 \ + -DGMX_SIMD=AVX_512 \ && make -j"$(nproc)" \ && make install \ && cd / \ From 7578642a282b3ce46bd55d0293459c008c9a2884 Mon Sep 17 00:00:00 2001 From: bielj Date: Thu, 18 Jun 2026 12:50:54 -0400 Subject: [PATCH 3/6] Add Dockerfile.astera for second stage build --- Dockerfile.astera | 99 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 Dockerfile.astera diff --git a/Dockerfile.astera b/Dockerfile.astera new file mode 100644 index 0000000..8748fef --- /dev/null +++ b/Dockerfile.astera @@ -0,0 +1,99 @@ +# syntax=docker/dockerfile:1 +# md-workflows - Astera ACTL overlay. +# +# The diffuseproject/md:0.0.3-rc1 base is the CUDA 12.6 / GPU-enabled GROMACS +# image (single-stage nvidia/cuda devel, GROMACS built with CUDA sm_90 + AVX_512). +# This overlay keeps that scientific stack intact and only adds the ACTL workspace +# conventions: /home/dev as the persisted home, editor/sync tools, root-oriented +# interactive pods, and global shell init for the lunus env. +# +# Build locally: +# docker buildx build --platform linux/amd64 \ +# -f Dockerfile.astera \ +# --build-arg MD_WORKFLOWS_BASE_IMAGE=docker.io/diffuseproject/md:0.0.3-rc1@sha256:731f992e324c73a44454daa077d6bad133b2e8467d1c42b9815b9e9d172a0638 \ +# -t harbor.astera.sh/library/md-workflows:0.0.3-rc1-actl-2026-06-18 \ +# . + +# Pinned to the linux/amd64 child manifest (not the multi-arch index digest), so the FROM resolves +# unambiguously to the amd64 image without platform-selecting against the index + provenance entry. +ARG MD_WORKFLOWS_BASE_IMAGE=docker.io/diffuseproject/md:0.0.3-rc1@sha256:731f992e324c73a44454daa077d6bad133b2e8467d1c42b9815b9e9d172a0638 +FROM ${MD_WORKFLOWS_BASE_IMAGE} AS astera + +USER root + +ARG ACTL_PACKAGES="bash ca-certificates curl wget rsync tini vim nano emacs-nox git zsh htop tmux ncdu iputils-ping dnsutils" + +ENV DEBIAN_FRONTEND=noninteractive \ + HOME=/home/dev \ + XDG_CONFIG_HOME=/home/dev/.config \ + XDG_CACHE_HOME=/home/dev/.cache \ + XDG_DATA_HOME=/home/dev/.local/share \ + SHELL=/bin/bash \ + MAMBA_ROOT_PREFIX=/opt/micromamba \ + MAMBA_EXE=/opt/micromamba/bin/micromamba \ + CONDA_PREFIX=/opt/micromamba/envs/lunus \ + PYTHONPATH=/home/dev/workspace:/opt/md-workflows \ + PATH="/opt/micromamba/bin:/opt/micromamba/envs/lunus/bin:/usr/local/gromacs/bin:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ${ACTL_PACKAGES} \ + bc \ + bzip2 \ + coreutils \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean \ + && mkdir -p /home/dev/.config /home/dev/.cache /home/dev/.local/share /home/dev/workspace /etc/zsh \ + && cat > /usr/local/share/actl-md-workflows-shell-init.sh <<'EOF' +# Keep the baked md-workflows/lunus environment active while letting ACTL's +# synced checkout at /home/dev/workspace override the baked package for edits. +if [ -d /opt/micromamba/envs/lunus/bin ]; then + case ":${PATH}:" in + *:/opt/micromamba/envs/lunus/bin:*) ;; + *) PATH="/opt/micromamba/envs/lunus/bin:${PATH}" ;; + esac + export PATH + CONDA_PREFIX=/opt/micromamba/envs/lunus + export CONDA_PREFIX +fi + +if [ -x /opt/micromamba/bin/micromamba ]; then + case "${ZSH_VERSION:+zsh}${BASH_VERSION:+bash}" in + zsh*) eval "$(/opt/micromamba/bin/micromamba shell hook --shell zsh 2>/dev/null)" || true ;; + *bash*) eval "$(/opt/micromamba/bin/micromamba shell hook --shell bash 2>/dev/null)" || true ;; + esac +fi + +if [ -d /home/dev/workspace ]; then + case ":${PYTHONPATH:-}:" in + *:/home/dev/workspace:*) ;; + *) PYTHONPATH="/home/dev/workspace${PYTHONPATH:+:${PYTHONPATH}}" ;; + esac +fi +if [ -d /opt/md-workflows ]; then + case ":${PYTHONPATH:-}:" in + *:/opt/md-workflows:*) ;; + *) PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}/opt/md-workflows" ;; + esac +fi +export PYTHONPATH + +case "$-" in + *i*) + if [ -d /home/dev/workspace ]; then + cd /home/dev/workspace || true + fi + ;; +esac +EOF +RUN chmod 0644 /usr/local/share/actl-md-workflows-shell-init.sh \ + && printf '\n# Astera md-workflows environment\n[ -r /usr/local/share/actl-md-workflows-shell-init.sh ] && . /usr/local/share/actl-md-workflows-shell-init.sh\n' >> /etc/bash.bashrc \ + && printf '\n# Astera md-workflows environment\n[ -r /usr/local/share/actl-md-workflows-shell-init.sh ] && . /usr/local/share/actl-md-workflows-shell-init.sh\n' >> /etc/zsh/zshrc \ + && for cmd in \ + md_workflows.mdmx gmx micromamba curl wget rsync tini vim nano emacs git zsh htop tmux ncdu ping dig; do \ + command -v "${cmd}" >/dev/null; \ + done + +WORKDIR /home/dev +SHELL ["/bin/bash", "-c"] +CMD ["bash"] From b7eadd528b0ad3576459be29d91e20bc3362c30a Mon Sep 17 00:00:00 2001 From: bielj Date: Tue, 23 Jun 2026 14:34:10 -0400 Subject: [PATCH 4/6] Add required directory for ChimeraX --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 6d24daf..0422aee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -126,7 +126,8 @@ RUN apt-get update \ "https://www.cgl.ucsf.edu$(cat /tmp/cx_redirect)" \ && apt-get install -y /tmp/chimerax.deb \ && rm -f /tmp/chimerax.deb /tmp/cx_cookies /tmp/cx_redirect \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* \ + && mkdir -p /home/dev/.config/ChimeraX # ---------- shell init (minimal) ---------- # Runs as root by default (see end of file); downstream images (e.g. Dockerfile.astera) own the From 0557ae41f1e762fb05194d0c76c7d3783836c796 Mon Sep 17 00:00:00 2001 From: bielj Date: Thu, 25 Jun 2026 14:43:26 -0400 Subject: [PATCH 5/6] build: split Dockerfile into base+gromacs stages; add CI Isolate the non-portable GROMACS compile (GMX_* args) from an arch-neutral base, and install md-workflows last for cache correctness. Add ci.yml (ruff lint + buildx --check, no push). Pin [tool.ruff], move ruff to a dev extra, and make the tree lint-clean. The actl overlay moves to the astera branch as Dockerfile.actl. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 73 ++++++++++++++ Dockerfile.astera | 99 ------------------ Dockerfile => Dockerfile.base | 65 ++++-------- Dockerfile.gromacs | 60 +++++++++++ README.md | 49 +++++++-- md_workflows/cli.py | 18 ++-- md_workflows/equilibrate.py | 77 +++++++++----- md_workflows/make_crystal.py | 28 ++++-- md_workflows/make_waterbox.py | 146 ++++++++++++++++++--------- md_workflows/minimize.py | 45 ++++++--- md_workflows/param_prot.py | 23 +++-- md_workflows/pdb_file_processing.py | 29 ++++-- md_workflows/resolvate.py | 138 ++++++++++++++++++-------- md_workflows/run_params_gaussian.py | 149 +++++++++++++++++++++------- md_workflows/solvate.py | 54 ++++++---- md_workflows/workflows/mdmx.py | 24 ++++- pyproject.toml | 16 +++ 17 files changed, 723 insertions(+), 370 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 Dockerfile.astera rename Dockerfile => Dockerfile.base (66%) create mode 100644 Dockerfile.gromacs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..78417b8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,73 @@ +name: CI + +# Cheap, hosted-runner checks for every push/PR. Deliberately does NOT build or push images: +# the expensive GROMACS/CUDA builds run only in build-images.yml (on the Astera infra branch, +# on push/dispatch/tags — never on pull_request), so fork PRs can't trigger heavy compute. +on: + push: + branches: ['**'] + pull_request: + branches: ['**'] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + name: Lint Python code + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install linters + run: | + python -m pip install --upgrade pip + pip install '.[dev]' + + - name: Run ruff lint check + run: ruff check md_workflows + + - name: Run ruff format check + run: ruff format --check md_workflows + + build-check: + name: Dockerfile build checks (no push) + runs-on: ubuntu-latest + needs: lint + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Validate Dockerfiles with buildx --check + # `--check` lints/validates each Dockerfile (syntax, build args, reserved words) without + # executing any build step or pulling real bases. Placeholder build-args satisfy the + # chained `FROM ${BASE_IMAGE}`/`${GROMACS_IMAGE}` so the graph resolves. Runs against + # whichever stages are present on the branch (base+gromacs on main; +actl on astera). + run: | + set -e + for f in Dockerfile.base Dockerfile.gromacs Dockerfile.actl; do + if [ ! -f "$f" ]; then + echo "skip $f (not present on this branch)" + continue + fi + echo "::group::buildx --check $f" + docker buildx build --check \ + --build-arg BASE_IMAGE=scratch \ + --build-arg GROMACS_IMAGE=scratch \ + -f "$f" . + echo "::endgroup::" + done diff --git a/Dockerfile.astera b/Dockerfile.astera deleted file mode 100644 index 8748fef..0000000 --- a/Dockerfile.astera +++ /dev/null @@ -1,99 +0,0 @@ -# syntax=docker/dockerfile:1 -# md-workflows - Astera ACTL overlay. -# -# The diffuseproject/md:0.0.3-rc1 base is the CUDA 12.6 / GPU-enabled GROMACS -# image (single-stage nvidia/cuda devel, GROMACS built with CUDA sm_90 + AVX_512). -# This overlay keeps that scientific stack intact and only adds the ACTL workspace -# conventions: /home/dev as the persisted home, editor/sync tools, root-oriented -# interactive pods, and global shell init for the lunus env. -# -# Build locally: -# docker buildx build --platform linux/amd64 \ -# -f Dockerfile.astera \ -# --build-arg MD_WORKFLOWS_BASE_IMAGE=docker.io/diffuseproject/md:0.0.3-rc1@sha256:731f992e324c73a44454daa077d6bad133b2e8467d1c42b9815b9e9d172a0638 \ -# -t harbor.astera.sh/library/md-workflows:0.0.3-rc1-actl-2026-06-18 \ -# . - -# Pinned to the linux/amd64 child manifest (not the multi-arch index digest), so the FROM resolves -# unambiguously to the amd64 image without platform-selecting against the index + provenance entry. -ARG MD_WORKFLOWS_BASE_IMAGE=docker.io/diffuseproject/md:0.0.3-rc1@sha256:731f992e324c73a44454daa077d6bad133b2e8467d1c42b9815b9e9d172a0638 -FROM ${MD_WORKFLOWS_BASE_IMAGE} AS astera - -USER root - -ARG ACTL_PACKAGES="bash ca-certificates curl wget rsync tini vim nano emacs-nox git zsh htop tmux ncdu iputils-ping dnsutils" - -ENV DEBIAN_FRONTEND=noninteractive \ - HOME=/home/dev \ - XDG_CONFIG_HOME=/home/dev/.config \ - XDG_CACHE_HOME=/home/dev/.cache \ - XDG_DATA_HOME=/home/dev/.local/share \ - SHELL=/bin/bash \ - MAMBA_ROOT_PREFIX=/opt/micromamba \ - MAMBA_EXE=/opt/micromamba/bin/micromamba \ - CONDA_PREFIX=/opt/micromamba/envs/lunus \ - PYTHONPATH=/home/dev/workspace:/opt/md-workflows \ - PATH="/opt/micromamba/bin:/opt/micromamba/envs/lunus/bin:/usr/local/gromacs/bin:${PATH}" - -RUN apt-get update && apt-get install -y --no-install-recommends \ - ${ACTL_PACKAGES} \ - bc \ - bzip2 \ - coreutils \ - libgomp1 \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get clean \ - && mkdir -p /home/dev/.config /home/dev/.cache /home/dev/.local/share /home/dev/workspace /etc/zsh \ - && cat > /usr/local/share/actl-md-workflows-shell-init.sh <<'EOF' -# Keep the baked md-workflows/lunus environment active while letting ACTL's -# synced checkout at /home/dev/workspace override the baked package for edits. -if [ -d /opt/micromamba/envs/lunus/bin ]; then - case ":${PATH}:" in - *:/opt/micromamba/envs/lunus/bin:*) ;; - *) PATH="/opt/micromamba/envs/lunus/bin:${PATH}" ;; - esac - export PATH - CONDA_PREFIX=/opt/micromamba/envs/lunus - export CONDA_PREFIX -fi - -if [ -x /opt/micromamba/bin/micromamba ]; then - case "${ZSH_VERSION:+zsh}${BASH_VERSION:+bash}" in - zsh*) eval "$(/opt/micromamba/bin/micromamba shell hook --shell zsh 2>/dev/null)" || true ;; - *bash*) eval "$(/opt/micromamba/bin/micromamba shell hook --shell bash 2>/dev/null)" || true ;; - esac -fi - -if [ -d /home/dev/workspace ]; then - case ":${PYTHONPATH:-}:" in - *:/home/dev/workspace:*) ;; - *) PYTHONPATH="/home/dev/workspace${PYTHONPATH:+:${PYTHONPATH}}" ;; - esac -fi -if [ -d /opt/md-workflows ]; then - case ":${PYTHONPATH:-}:" in - *:/opt/md-workflows:*) ;; - *) PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}/opt/md-workflows" ;; - esac -fi -export PYTHONPATH - -case "$-" in - *i*) - if [ -d /home/dev/workspace ]; then - cd /home/dev/workspace || true - fi - ;; -esac -EOF -RUN chmod 0644 /usr/local/share/actl-md-workflows-shell-init.sh \ - && printf '\n# Astera md-workflows environment\n[ -r /usr/local/share/actl-md-workflows-shell-init.sh ] && . /usr/local/share/actl-md-workflows-shell-init.sh\n' >> /etc/bash.bashrc \ - && printf '\n# Astera md-workflows environment\n[ -r /usr/local/share/actl-md-workflows-shell-init.sh ] && . /usr/local/share/actl-md-workflows-shell-init.sh\n' >> /etc/zsh/zshrc \ - && for cmd in \ - md_workflows.mdmx gmx micromamba curl wget rsync tini vim nano emacs git zsh htop tmux ncdu ping dig; do \ - command -v "${cmd}" >/dev/null; \ - done - -WORKDIR /home/dev -SHELL ["/bin/bash", "-c"] -CMD ["bash"] diff --git a/Dockerfile b/Dockerfile.base similarity index 66% rename from Dockerfile rename to Dockerfile.base index 0422aee..a2960be 100644 --- a/Dockerfile +++ b/Dockerfile.base @@ -1,8 +1,16 @@ -# ==================== CUDA 12.6 (single-stage, devel) ==================== -# CUDA 12.6 *devel* image: nvcc + full CUDA Toolkit, kept for the final runtime so GROMACS builds -# with -DGMX_GPU=CUDA AND future CUDA-dependent tooling can compile inside the container. -# Host needs the NVIDIA driver + NVIDIA Container Toolkit at run time (e.g. docker run --gpus all). -# docker build --build-arg CUDA_IMAGE_TAG=12.6.3 -t diffuseproject/md:gpu . +# ==================== md-workflows base (stage 1 of 3) ==================== +# Architecture-neutral foundation: CUDA 12.6 *devel* toolchain + the full scientific +# conda stack (AmberTools / DIALS / cctbx / mdtraj / …), lunus, and ChimeraX. This layer +# carries everything that is portable across compute; the non-portable GROMACS compile and +# the md-workflows app are added downstream in Dockerfile.gromacs, and Astera workspace +# conventions in Dockerfile.actl. +# +# CUDA *devel* (nvcc + full Toolkit) is kept in the runtime so GROMACS can build with +# -DGMX_GPU=CUDA downstream and future CUDA tooling can compile inside the container. The +# host needs the NVIDIA driver + NVIDIA Container Toolkit at run time (docker run --gpus all). +# +# Build: +# docker build -f Dockerfile.base --build-arg CUDA_IMAGE_TAG=12.6.3 -t md-base:local . ARG CUDA_IMAGE_TAG=12.6.3 FROM nvidia/cuda:${CUDA_IMAGE_TAG}-devel-ubuntu22.04 @@ -35,6 +43,9 @@ ENV MAMBA_EXE=/opt/micromamba/bin/micromamba ENV PATH="/opt/micromamba/bin:${PATH}" # ---------- conda environment (inline of lunus.yaml) ---------- +# cmake is kept here (not removed in this stage) because Dockerfile.gromacs needs it to +# build GROMACS; it is removed at the end of that downstream stage. scons is removed below +# once lunus is built. RUN cat > /tmp/lunus.yaml <<'YAML' name: lunus channels: @@ -80,28 +91,6 @@ ENV CONDA_PREFIX="${MAMBA_ENV}" # ---------- pip packages ---------- RUN pip install --no-cache-dir git+https://github.com/ando-lab/mdx2.git -# ---------- GROMACS (CUDA build, targeting H100 / sm_90) ---------- -# GMX_SIMD is pinned to AVX_512 (Voltage Park Xeon Platinum supports it) instead of letting CMake -# auto-detect from the build host: under QEMU emulation detection falls back to SSE4.1, which would -# cripple CPU-side kernels. Pinning makes the CPU SIMD deployment-correct regardless of build host. -RUN set -ex \ - && d=$(mktemp -d) \ - && cd "$d" \ - && wget https://ftp.gromacs.org/gromacs/gromacs-2025.2.tar.gz \ - && tar xfz gromacs-2025.2.tar.gz \ - && cd gromacs-2025.2 \ - && mkdir build && cd build \ - && cmake .. \ - -DGMX_BUILD_OWN_FFTW=ON \ - -DGMX_GPU=CUDA \ - -DCUDAToolkit_ROOT=/usr/local/cuda \ - -DGMX_CUDA_TARGET_SM=90 \ - -DGMX_SIMD=AVX_512 \ - && make -j"$(nproc)" \ - && make install \ - && cd / \ - && rm -rf "$d" - # ---------- lunus ---------- RUN mkdir -p /opt/packages \ && cd /opt/packages \ @@ -109,8 +98,8 @@ RUN mkdir -p /opt/packages \ && cd lunus \ && scons enable-openmp=True -# ---------- cleanup: remove build-only packages + caches ---------- -RUN $MAMBA_EXE remove -n lunus -y scons cmake \ +# ---------- cleanup: drop the lunus-only build tool + caches (keep cmake for GROMACS) ---------- +RUN $MAMBA_EXE remove -n lunus -y scons \ && $MAMBA_EXE clean -afy \ && find /opt/micromamba -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null; \ find /opt/micromamba -name "*.pyc" -delete 2>/dev/null; \ @@ -130,20 +119,11 @@ RUN apt-get update \ && mkdir -p /home/dev/.config/ChimeraX # ---------- shell init (minimal) ---------- -# Runs as root by default (see end of file); downstream images (e.g. Dockerfile.astera) own the -# general user environment. Put the micromamba hook in the *global* bashrc so `micromamba activate` -# works for root and for any UID supplied via `docker run --user ...` (skel only covers new users). +# Runs as root by default (see end of file); downstream images (Dockerfile.actl) own the +# general user environment. Put the micromamba hook in the *global* bashrc so `micromamba +# activate` works for root and for any UID supplied via `docker run --user ...`. RUN printf '\n# Enable `micromamba activate` in interactive shells\neval "$(micromamba shell hook --shell bash)"\n' >> /etc/bash.bashrc -# ---------- md-workflows ---------- -# Ship md-workflows in the lunus env so Hub users need not pip install / extend PATH. -# Placed late so code edits don't invalidate the expensive conda/GROMACS layers. -COPY pyproject.toml /opt/md-workflows/pyproject.toml -COPY md_workflows /opt/md-workflows/md_workflows -RUN /opt/micromamba/envs/lunus/bin/python -m pip install --no-cache-dir /opt/md-workflows - -ENV PATH="/opt/micromamba/bin:/opt/micromamba/envs/lunus/bin:/usr/local/gromacs/bin:${PATH}" - # ---------- GPU runtime metadata (placed late so it doesn't bust the build cache) ---------- # Do NOT auto-claim GPUs. The base nvidia/cuda image sets NVIDIA_VISIBLE_DEVICES=all, which under # Kubernetes (NVIDIA device plugin / GPU Operator) overrides per-pod GPU isolation and exposes every @@ -153,10 +133,9 @@ ENV NVIDIA_VISIBLE_DEVICES=void # Capabilities to mount when a GPU *is* granted (harmless when none is). ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility -# Run as root by default. No baked non-root user: downstream images (Dockerfile.astera) run as root, +# Run as root by default. No baked non-root user: downstream images (Dockerfile.actl) run as root, # and the standalone README workflow overrides identity with `docker run --user "$(id -u):$(id -g)"`. # /opt artifacts stay root-owned at default perms, so they remain readable/executable by any UID. -# A dedicated user can be added later if a use case needs one. WORKDIR /workspace SHELL ["/bin/bash", "-c"] CMD ["bash"] diff --git a/Dockerfile.gromacs b/Dockerfile.gromacs new file mode 100644 index 0000000..383b08b --- /dev/null +++ b/Dockerfile.gromacs @@ -0,0 +1,60 @@ +# ==================== md-workflows GROMACS + app (stage 2 of 3) ==================== +# Builds GROMACS on top of Dockerfile.base, then installs the md-workflows package. This is +# the deliberately *non-portable* stage: the GPU/CPU-architecture choices below are tuned for +# Astera's Voltage Park nodes (H100 GPUs, AVX-512 Xeon Platinum). Forks targeting other +# hardware should only need to override the GMX_* build args (or this single file) without +# touching the base image: +# --build-arg GMX_CUDA_TARGET_SM=80 (A100) +# --build-arg GMX_GPU=OpenCL (non-NVIDIA) +# --build-arg GMX_SIMD=AVX2_256 (older / non-AVX-512 CPUs) +# +# Build (after building md-base:local from Dockerfile.base): +# docker build -f Dockerfile.gromacs --build-arg BASE_IMAGE=md-base:local -t md-gromacs:local . +ARG BASE_IMAGE=md-base:local +FROM ${BASE_IMAGE} + +# ---------- GROMACS build configuration (override these on a fork) ---------- +ARG GMX_VERSION=2025.2 +ARG GMX_GPU=CUDA +ARG GMX_CUDA_TARGET_SM=90 +ARG GMX_SIMD=AVX_512 + +# ---------- GROMACS (CUDA build, targeting H100 / sm_90) ---------- +# GMX_SIMD is pinned (default AVX_512, which Voltage Park Xeon Platinum supports) instead of +# letting CMake auto-detect from the build host: under QEMU emulation detection falls back to +# SSE4.1, which would cripple CPU-side kernels. Pinning makes the CPU SIMD deployment-correct +# regardless of build host. cmake comes from the base conda env and is removed below afterward. +RUN set -ex \ + && d=$(mktemp -d) \ + && cd "$d" \ + && wget https://ftp.gromacs.org/gromacs/gromacs-${GMX_VERSION}.tar.gz \ + && tar xfz gromacs-${GMX_VERSION}.tar.gz \ + && cd gromacs-${GMX_VERSION} \ + && mkdir build && cd build \ + && cmake .. \ + -DGMX_BUILD_OWN_FFTW=ON \ + -DGMX_GPU=${GMX_GPU} \ + -DCUDAToolkit_ROOT=/usr/local/cuda \ + -DGMX_CUDA_TARGET_SM=${GMX_CUDA_TARGET_SM} \ + -DGMX_SIMD=${GMX_SIMD} \ + && make -j"$(nproc)" \ + && make install \ + && cd / \ + && rm -rf "$d" + +# ---------- cleanup: drop the GROMACS-only build tool + caches ---------- +RUN $MAMBA_EXE remove -n lunus -y cmake \ + && $MAMBA_EXE clean -afy + +# ---------- md-workflows ---------- +# Ship md-workflows in the lunus env so users need not pip install / extend PATH. Placed last +# (downstream of the expensive GROMACS layer) so code edits don't invalidate GROMACS or base. +COPY pyproject.toml /opt/md-workflows/pyproject.toml +COPY md_workflows /opt/md-workflows/md_workflows +RUN /opt/micromamba/envs/lunus/bin/python -m pip install --no-cache-dir /opt/md-workflows + +ENV PATH="/opt/micromamba/bin:/opt/micromamba/envs/lunus/bin:/usr/local/gromacs/bin:${PATH}" + +WORKDIR /workspace +SHELL ["/bin/bash", "-c"] +CMD ["bash"] diff --git a/README.md b/README.md index 3da475a..d392d4a 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,43 @@ # md-workflows -This project includes a `Dockerfile` and a Python CLI entry point for running the full MD workflow pipeline. +A Python CLI and Docker image for running the full molecular-dynamics workflow pipeline +(GROMACS + AmberTools + DIALS/cctbx + lunus). -## 1) Build the Docker image +## Docker image layout -From the project root (where the `Dockerfile` is): +The image is built in stages so the architecture-specific GROMACS compile is isolated from +the portable scientific stack — forks targeting other hardware can swap the GROMACS stage +without rebuilding the base: + +| Stage | Dockerfile | Contents | +|-------|-----------|----------| +| base | `Dockerfile.base` | CUDA 12.6 devel toolchain, micromamba/conda `lunus` env, lunus, ChimeraX. Architecture-neutral. | +| gromacs | `Dockerfile.gromacs` | GROMACS (CUDA, tuned for H100 / AVX-512 by default) + the `md_workflows` package. The consumable image. | + +(An Astera-specific `Dockerfile.actl` overlay lives on the `astera` branch and adds workspace +conventions on top of `gromacs`.) + +## 1) Build the images + +From the project root: ```bash -docker pull diffuseproject/md:0.0.1 +# 1. Base — architecture-neutral foundation +docker build -f Dockerfile.base -t md-base:local . + +# 2. GROMACS + md-workflows (the runnable image). +# Override the GMX_* build args for non-H100 / non-AVX-512 hardware, e.g. +# --build-arg GMX_CUDA_TARGET_SM=80 --build-arg GMX_SIMD=AVX2_256 +docker build -f Dockerfile.gromacs --build-arg BASE_IMAGE=md-base:local -t md-gromacs:local . ``` +CI builds these stages and pushes versioned tags (derived from the `version` in +`pyproject.toml`) to the Astera Harbor registry; see `.github/workflows/build-images.yml` on +the `astera` branch. + ## 2) Start a container -Run the container interactively, mounting the current project directory so inputs/outputs are available on your host: +Run interactively, mounting the project directory so inputs/outputs are available on the host: ```bash docker run --rm -it \ @@ -22,11 +47,11 @@ docker run --rm -it \ -e HOME=/workspace \ -v "$(pwd):/workspace" \ -w /workspace \ - diffuseproject/md:0.0.1 \ + md-gromacs:local \ bash ``` -This registers the CLI entry points from `pyproject.toml`, including `md_workflows.run_all`. +This registers the CLI entry points from `pyproject.toml`, including `md_workflows.mdmx`. ## 3) Run the full workflow command @@ -46,3 +71,13 @@ To see all available flags: ```bash md_workflows.mdmx --help ``` + +## Development + +Lint and format with [ruff](https://docs.astral.sh/ruff/) (config in `pyproject.toml`): + +```bash +pip install '.[dev]' +ruff check md_workflows +ruff format --check md_workflows +``` diff --git a/md_workflows/cli.py b/md_workflows/cli.py index 37ac7de..6250c9b 100644 --- a/md_workflows/cli.py +++ b/md_workflows/cli.py @@ -5,14 +5,16 @@ import argparse import sys -from . import equilibrate -from . import make_crystal -from . import make_waterbox -from . import minimize -from . import param_prot -from . import resolvate -from . import run_params_gaussian -from . import solvate +from . import ( + equilibrate, + make_crystal, + make_waterbox, + minimize, + param_prot, + resolvate, + run_params_gaussian, + solvate, +) def _single_command_cli(command: str) -> None: diff --git a/md_workflows/equilibrate.py b/md_workflows/equilibrate.py index 65bbe6b..791ab98 100644 --- a/md_workflows/equilibrate.py +++ b/md_workflows/equilibrate.py @@ -7,7 +7,6 @@ import glob import os import subprocess -import tempfile from pathlib import Path @@ -18,22 +17,40 @@ def run(ntomp: int = 26): _generate_restraints(chain_files) _build_restrained_topology(chain_files) - subprocess.run([ - "gmx", "grompp", - "-f", str(artifacts_dir / "equil.mdp"), - "-c", "md_min.gro", - "-o", "md_equil.tpr", - "-p", "md_model_posre.top", - "-r", "md_model.pdb", - ], capture_output=True, text=True, check=True) - - subprocess.run([ - "gmx", "mdrun", - "-ntmpi", "1", - "-ntomp", str(ntomp), - "-deffnm", "md_equil", - "-v", - ], check=True) + subprocess.run( + [ + "gmx", + "grompp", + "-f", + str(artifacts_dir / "equil.mdp"), + "-c", + "md_min.gro", + "-o", + "md_equil.tpr", + "-p", + "md_model_posre.top", + "-r", + "md_model.pdb", + ], + capture_output=True, + text=True, + check=True, + ) + + subprocess.run( + [ + "gmx", + "mdrun", + "-ntmpi", + "1", + "-ntomp", + str(ntomp), + "-deffnm", + "md_equil", + "-v", + ], + check=True, + ) def _extract_first_copy(): @@ -41,7 +58,7 @@ def _extract_first_copy(): with open("pdb_clean.pdb") as fh: lines = fh.readlines() - lines = [l for l in lines if not l.startswith("JRNL")] + lines = [line for line in lines if not line.startswith("JRNL")] kept = [] found_gol = False @@ -60,8 +77,7 @@ def _extract_first_copy(): fh.writelines(kept) prot_lines = [ - l for l in kept - if (l.startswith(("ATOM", "HETATM", "TER")) and "GOL" not in l) + line for line in kept if (line.startswith(("ATOM", "HETATM", "TER")) and "GOL" not in line) ] with open("first_copy_prot.pdb", "w") as fh: fh.writelines(prot_lines) @@ -102,8 +118,18 @@ def _generate_restraints(chain_files: list[str]): for f in chain_files: subprocess.run(["pdb4amber", "-i", f, "-o", f"{f}_amber.pdb"], check=True) subprocess.run( - ["gmx", "genrestr", "-fc", "209.2", "209.2", "209.2", - "-f", f"{f}_amber.pdb", "-o", f"posre_{f}.itp"], + [ + "gmx", + "genrestr", + "-fc", + "209.2", + "209.2", + "209.2", + "-f", + f"{f}_amber.pdb", + "-o", + f"posre_{f}.itp", + ], input="Protein-H\nq\n", text=True, check=True, @@ -113,6 +139,7 @@ def _generate_restraints(chain_files: list[str]): def _build_restrained_topology(chain_files: list[str]): """Insert #ifdef POSRES_partXX blocks into md_model_posre.top.""" import shutil + shutil.copy("md_model.top", "md_model_posre.top") for molnum_offset, f in enumerate(chain_files): @@ -127,11 +154,7 @@ def _build_restrained_topology(chain_files: list[str]): if "moleculetype" in line.lower(): cnt += 1 if cnt == target_moltype_count: - posre_block = ( - f'#ifdef POSRES_{f}\n' - f'#include "posre_{f}.itp"\n' - f'#endif\n\n' - ) + posre_block = f'#ifdef POSRES_{f}\n#include "posre_{f}.itp"\n#endif\n\n' new_lines.append(posre_block) new_lines.append(line) diff --git a/md_workflows/make_crystal.py b/md_workflows/make_crystal.py index cc6db9f..00a9fa1 100644 --- a/md_workflows/make_crystal.py +++ b/md_workflows/make_crystal.py @@ -4,8 +4,6 @@ bash scripts/make_crystal.sh 1 """ -import os -import re import subprocess from pathlib import Path @@ -53,8 +51,9 @@ def _prepend_cryst1(source_pdb: str, target_pdb: str): lines = fh.readlines() filtered = [ - l for l in lines - if "Na+" not in l and "Cl-" not in l and not l.startswith("CRYST1") + line + for line in lines + if "Na+" not in line and "Cl-" not in line and not line.startswith("CRYST1") ] with open(target_pdb, "w") as fh: @@ -99,12 +98,25 @@ def _set_p1_spacegroup(dry_pdb: str, cell_pdb: str): def _propagate_crystal(ix: int, iy: int, iz: int): """Use PropPDB to replicate the unit cell, or just copy if 0.""" if ix > 0 or iy > 0 or iz > 0: - subprocess.run([ - "PropPDB", "-p", "prot_dry_cell.pdb", "-o", "xtal.pdb", - "-ix", str(ix), "-iy", str(iy), "-iz", str(iz), - ], check=True) + subprocess.run( + [ + "PropPDB", + "-p", + "prot_dry_cell.pdb", + "-o", + "xtal.pdb", + "-ix", + str(ix), + "-iy", + str(iy), + "-iz", + str(iz), + ], + check=True, + ) else: import shutil + shutil.copy("prot_dry_cell.pdb", "xtal.pdb") diff --git a/md_workflows/make_waterbox.py b/md_workflows/make_waterbox.py index 47c960a..222dc22 100644 --- a/md_workflows/make_waterbox.py +++ b/md_workflows/make_waterbox.py @@ -52,31 +52,49 @@ def _create_box_pdb(workdir: Path, wb_dir: Path): def _insert_water(workdir: Path, wb_dir: Path): - subprocess.run([ - "gmx", "insert-molecules", - "-f", str(wb_dir / "box.pdb"), - "-ci", str(workdir / "WAT.pdb"), - "-conc", "58.0", - "-o", str(wb_dir / "box_solv.pdb"), - ], capture_output=True, text=True, cwd=str(wb_dir), check=True) + subprocess.run( + [ + "gmx", + "insert-molecules", + "-f", + str(wb_dir / "box.pdb"), + "-ci", + str(workdir / "WAT.pdb"), + "-conc", + "58.0", + "-o", + str(wb_dir / "box_solv.pdb"), + ], + capture_output=True, + text=True, + cwd=str(wb_dir), + check=True, + ) def _expand_waterbox(workdir: Path, wb_dir: Path): - subprocess.run([ - "PropPDB", - "-p", str(wb_dir / "box_solv.pdb"), - "-o", str(wb_dir / "box_solv_expand.pdb"), - "-ix", "10", "-iy", "10", "-iz", "10", - ], check=True) + subprocess.run( + [ + "PropPDB", + "-p", + str(wb_dir / "box_solv.pdb"), + "-o", + str(wb_dir / "box_solv_expand.pdb"), + "-ix", + "10", + "-iy", + "10", + "-iz", + "10", + ], + check=True, + ) with open(wb_dir / "cryst1_xtal.pdb") as fh: cryst1 = fh.read() with open(wb_dir / "box_solv_expand.pdb") as fh: - lines = [ - l for l in fh - if not l.startswith(("CRYST1", "HEADER")) - ] + lines = [line for line in fh if not line.startswith(("CRYST1", "HEADER"))] with open(wb_dir / "box_solv_expand.pdb", "w") as fh: fh.write(cryst1) @@ -91,9 +109,7 @@ def _count_wat_molecules(pdb_path: Path) -> int: if line.startswith(("ATOM", "HETATM")) and " WAT " in line: wat_atoms += 1 if wat_atoms % 3 != 0: - raise ValueError( - f"{pdb_path}: expected multiple of 3 WAT atoms, got {wat_atoms}" - ) + raise ValueError(f"{pdb_path}: expected multiple of 3 WAT atoms, got {wat_atoms}") return wat_atoms // 3 @@ -113,35 +129,73 @@ def _write_topology(workdir: Path, wb_dir: Path, nwat: int): def _minimize_waterbox(artifacts_dir: Path, wb_dir: Path, ntomp: int): - subprocess.run([ - "gmx", "grompp", - "-f", str(artifacts_dir / "min_water.mdp"), - "-c", str(wb_dir / "box_solv_expand.pdb"), - "-o", str(wb_dir / "water_min.tpr"), - "-p", str(wb_dir / "waterbox.top"), - ], cwd=str(wb_dir), check=True) - - subprocess.run([ - "gmx", "mdrun", - "-ntmpi", "1", "-ntomp", str(ntomp), - "-deffnm", "water_min", "-v", - ], cwd=str(wb_dir), check=True) + subprocess.run( + [ + "gmx", + "grompp", + "-f", + str(artifacts_dir / "min_water.mdp"), + "-c", + str(wb_dir / "box_solv_expand.pdb"), + "-o", + str(wb_dir / "water_min.tpr"), + "-p", + str(wb_dir / "waterbox.top"), + ], + cwd=str(wb_dir), + check=True, + ) + + subprocess.run( + [ + "gmx", + "mdrun", + "-ntmpi", + "1", + "-ntomp", + str(ntomp), + "-deffnm", + "water_min", + "-v", + ], + cwd=str(wb_dir), + check=True, + ) def _equilibrate_waterbox(artifacts_dir: Path, wb_dir: Path, ntomp: int): - subprocess.run([ - "gmx", "grompp", - "-f", str(artifacts_dir / "equil_water.mdp"), - "-c", str(wb_dir / "water_min.gro"), - "-o", str(wb_dir / "water_equil.tpr"), - "-p", str(wb_dir / "waterbox.top"), - ], cwd=str(wb_dir), check=True) - - subprocess.run([ - "gmx", "mdrun", - "-ntmpi", "1", "-ntomp", str(ntomp), - "-deffnm", "water_equil", "-v", - ], cwd=str(wb_dir), check=True) + subprocess.run( + [ + "gmx", + "grompp", + "-f", + str(artifacts_dir / "equil_water.mdp"), + "-c", + str(wb_dir / "water_min.gro"), + "-o", + str(wb_dir / "water_equil.tpr"), + "-p", + str(wb_dir / "waterbox.top"), + ], + cwd=str(wb_dir), + check=True, + ) + + subprocess.run( + [ + "gmx", + "mdrun", + "-ntmpi", + "1", + "-ntomp", + str(ntomp), + "-deffnm", + "water_equil", + "-v", + ], + cwd=str(wb_dir), + check=True, + ) if __name__ == "__main__": diff --git a/md_workflows/minimize.py b/md_workflows/minimize.py index 024cd65..71ca540 100644 --- a/md_workflows/minimize.py +++ b/md_workflows/minimize.py @@ -10,21 +10,38 @@ def run(ntomp: int = 26): artifacts_dir = Path("artifacts") - subprocess.run([ - "gmx", "grompp", - "-f", str(artifacts_dir / "min.mdp"), - "-c", "md_model.pdb", - "-o", "md_min.tpr", - "-p", "md_model.top", - ], capture_output=True, text=True, check=True) + subprocess.run( + [ + "gmx", + "grompp", + "-f", + str(artifacts_dir / "min.mdp"), + "-c", + "md_model.pdb", + "-o", + "md_min.tpr", + "-p", + "md_model.top", + ], + capture_output=True, + text=True, + check=True, + ) - subprocess.run([ - "gmx", "mdrun", - "-ntmpi", "1", - "-ntomp", str(ntomp), - "-deffnm", "md_min", - "-v", - ], check=True) + subprocess.run( + [ + "gmx", + "mdrun", + "-ntmpi", + "1", + "-ntomp", + str(ntomp), + "-deffnm", + "md_min", + "-v", + ], + check=True, + ) if __name__ == "__main__": diff --git a/md_workflows/param_prot.py b/md_workflows/param_prot.py index 43ea28d..06f0fe3 100644 --- a/md_workflows/param_prot.py +++ b/md_workflows/param_prot.py @@ -7,6 +7,7 @@ import subprocess import textwrap from pathlib import Path + from .pdb_file_processing import ensure_entry_pdb_file @@ -35,9 +36,17 @@ def _clean_pdb(pdb_id: str): with open("pdb_clean.pdb", "w") as fh: fh.writelines(kept) - subprocess.run([ - "pdb4amber", "-i", "pdb_clean.pdb", "--prot", "-o", "pdb_clean_amber.pdb", - ], check=True) + subprocess.run( + [ + "pdb4amber", + "-i", + "pdb_clean.pdb", + "--prot", + "-o", + "pdb_clean_amber.pdb", + ], + check=True, + ) def _initial_solvation(): @@ -86,11 +95,11 @@ def _extract_solvent_pdbs(): with open("prot.pdb") as fh: lines = fh.readlines() - hetatm_lines = [l for l in lines if l.startswith("HETATM")] + hetatm_lines = [line for line in lines if line.startswith("HETATM")] - na_lines = [l for l in hetatm_lines if "Na+" in l] - cl_lines = [l for l in hetatm_lines if "Cl-" in l] - wat_lines = [l for l in hetatm_lines if "WAT" in l] + na_lines = [line for line in hetatm_lines if "Na+" in line] + cl_lines = [line for line in hetatm_lines if "Cl-" in line] + wat_lines = [line for line in hetatm_lines if "WAT" in line] if na_lines: with open("Na+.pdb", "w") as fh: diff --git a/md_workflows/pdb_file_processing.py b/md_workflows/pdb_file_processing.py index ce39d1e..7785c55 100644 --- a/md_workflows/pdb_file_processing.py +++ b/md_workflows/pdb_file_processing.py @@ -2,11 +2,11 @@ from __future__ import annotations +import platform import urllib.request from collections import defaultdict from pathlib import Path from typing import TypedDict -import platform class HeteroLigandHit(TypedDict): @@ -19,23 +19,36 @@ class HeteroLigandHit(TypedDict): n_atoms: int -_WATER_SOLVENT_RESNAMES = frozenset({ - "HOH", "WAT", "SOL", "H2O", "DOD", "TIP", "TIP3", "SPC", "PE4", "P7G", -}) +_WATER_SOLVENT_RESNAMES = frozenset( + { + "HOH", + "WAT", + "SOL", + "H2O", + "DOD", + "TIP", + "TIP3", + "SPC", + "PE4", + "P7G", + } +) + -#Makes a GET request to the RCSB API and saves the response to a file. +# Makes a GET request to the RCSB API and saves the response to a file. def rcsb_api_request(endpoint: str, out_path: Path) -> Path: base_url = "https://files.rcsb.org/" req = urllib.request.Request( url=f"{base_url}{endpoint}", data=None, headers={"User-Agent": f"{platform.node()} {platform.system()}"}, - method="GET" + method="GET", ) with urllib.request.urlopen(req, timeout=120) as resp: out_path.write_bytes(resp.read()) return out_path + def download_rcsb_legacy_pdb_and_find_ligands( pdb_id: str, *, @@ -115,7 +128,9 @@ def _sort_key(h: HeteroLigandHit) -> tuple[str, int, str, str]: def find_ligands_in_legacy_pdb_file( - path: Path | str, *, exclude_water_solvent: bool = True, + path: Path | str, + *, + exclude_water_solvent: bool = True, ) -> list[HeteroLigandHit]: """Read a legacy PDB file from disk and list hetero residues.""" text = Path(path).read_text(encoding="ascii", errors="replace") diff --git a/md_workflows/resolvate.py b/md_workflows/resolvate.py index 9ed052b..2654e17 100644 --- a/md_workflows/resolvate.py +++ b/md_workflows/resolvate.py @@ -20,11 +20,18 @@ def run(ntmpi: int = 8, ntomp: int = 1): def _compute_maxsol() -> int: """Do a trial solvation to figure out 25 % fill.""" result = subprocess.run( - ["gmx", "solvate", - "-cp", "md_equil.gro", - "-cs", "waterbox/water_equil.gro", - "-o", "tmp.pdb"], - capture_output=True, text=True, + [ + "gmx", + "solvate", + "-cp", + "md_equil.gro", + "-cs", + "waterbox/water_equil.gro", + "-o", + "tmp.pdb", + ], + capture_output=True, + text=True, ) log_text = result.stdout + result.stderr with open("gmx_solvate.log", "w") as fh: @@ -39,12 +46,21 @@ def _compute_maxsol() -> int: def _resolvate(maxsol: int): result = subprocess.run( - ["gmx", "solvate", - "-cp", "md_equil.gro", - "-cs", "waterbox/water_equil.gro", - "-o", "md_resolv.pdb", - "-maxsol", str(maxsol)], - capture_output=True, text=True, check=True, + [ + "gmx", + "solvate", + "-cp", + "md_equil.gro", + "-cs", + "waterbox/water_equil.gro", + "-o", + "md_resolv.pdb", + "-maxsol", + str(maxsol), + ], + capture_output=True, + text=True, + check=True, ) with open("gmx_resolvate.log", "w") as fh: fh.write(result.stdout + result.stderr) @@ -57,42 +73,78 @@ def _update_topology(maxsol: int): def _minimize(ntmpi: int, ntomp: int): artifacts_dir = Path("artifacts") - subprocess.run([ - "gmx", "grompp", - "-f", str(artifacts_dir / "min.mdp"), - "-c", "md_resolv.pdb", - "-o", "md_resolv_min.tpr", - "-p", "md_model_posre.top", - ], capture_output=True, text=True, check=True) - - subprocess.run([ - "gmx", "mdrun", - "-ntmpi", str(ntmpi), - "-ntomp", str(ntomp), - "-deffnm", "md_resolv_min", - "-v", - ], check=True) + subprocess.run( + [ + "gmx", + "grompp", + "-f", + str(artifacts_dir / "min.mdp"), + "-c", + "md_resolv.pdb", + "-o", + "md_resolv_min.tpr", + "-p", + "md_model_posre.top", + ], + capture_output=True, + text=True, + check=True, + ) + + subprocess.run( + [ + "gmx", + "mdrun", + "-ntmpi", + str(ntmpi), + "-ntomp", + str(ntomp), + "-deffnm", + "md_resolv_min", + "-v", + ], + check=True, + ) def _equilibrate(ntmpi: int, ntomp: int): artifacts_dir = Path("artifacts") - subprocess.run([ - "gmx", "grompp", - "-f", str(artifacts_dir / "equil.mdp"), - "-c", "md_resolv_min.gro", - "-o", "md_resolv_equil.tpr", - "-p", "md_model_posre.top", - "-r", "md_model.pdb", - "-maxwarn", "2", - ], capture_output=True, text=True, check=True) - - subprocess.run([ - "gmx", "mdrun", - "-ntmpi", str(ntmpi), - "-ntomp", str(ntomp), - "-deffnm", "md_resolv_equil", - "-v", - ], check=True) + subprocess.run( + [ + "gmx", + "grompp", + "-f", + str(artifacts_dir / "equil.mdp"), + "-c", + "md_resolv_min.gro", + "-o", + "md_resolv_equil.tpr", + "-p", + "md_model_posre.top", + "-r", + "md_model.pdb", + "-maxwarn", + "2", + ], + capture_output=True, + text=True, + check=True, + ) + + subprocess.run( + [ + "gmx", + "mdrun", + "-ntmpi", + str(ntmpi), + "-ntomp", + str(ntomp), + "-deffnm", + "md_resolv_equil", + "-v", + ], + check=True, + ) if __name__ == "__main__": diff --git a/md_workflows/run_params_gaussian.py b/md_workflows/run_params_gaussian.py index cfdd24e..ac72d31 100644 --- a/md_workflows/run_params_gaussian.py +++ b/md_workflows/run_params_gaussian.py @@ -13,6 +13,7 @@ import subprocess import textwrap from pathlib import Path + from .pdb_file_processing import prepare_pdb_and_resn_files @@ -94,43 +95,103 @@ def _run_gaussian_esp(resn: str, nproc: int): def _process_resp_charges(resn: str): """Derive RESP charges from Gaussian output and correct net charge.""" - subprocess.run([ - "antechamber", "-fi", "gout", - "-i", f"{resn}_resp.log", - "-cf", f"{resn}_resp.crg", - "-c", "resp", - "-o", f"{resn}_gauss.ac", "-fo", "ac", "-rn", resn, - ], check=True) - - subprocess.run([ - "antechamber", "-fi", "gout", - "-i", f"{resn}_resp.log", - "-o", f"{resn}_gauss.pdb", "-fo", "pdb", "-rn", resn, - ], check=True) + subprocess.run( + [ + "antechamber", + "-fi", + "gout", + "-i", + f"{resn}_resp.log", + "-cf", + f"{resn}_resp.crg", + "-c", + "resp", + "-o", + f"{resn}_gauss.ac", + "-fo", + "ac", + "-rn", + resn, + ], + check=True, + ) + + subprocess.run( + [ + "antechamber", + "-fi", + "gout", + "-i", + f"{resn}_resp.log", + "-o", + f"{resn}_gauss.pdb", + "-fo", + "pdb", + "-rn", + resn, + ], + check=True, + ) orig_coords = _extract_coords_from_pdb(f"{resn}.pdb") _graft_coords_to_ac(f"{resn}_gauss.ac", orig_coords, f"{resn}_resp.ac") _correct_charge(f"{resn}_resp.ac") - subprocess.run([ - "antechamber", "-fi", "ac", "-i", f"{resn}_resp.ac", - "-fo", "mol2", "-o", f"{resn}_resp.mol2", "-rn", resn, - ], check=True) - - subprocess.run([ - "atomtype", "-i", f"{resn}_resp.ac", - "-o", f"{resn}_resp_gaff.ac", "-p", "gaff", - ], check=True) - - subprocess.run([ - "prepgen", "-i", f"{resn}_resp_gaff.ac", - "-o", f"{resn}_resp_gaff.prepc", "-f", "car", - ], check=True) - - subprocess.run([ - "parmchk2", "-i", f"{resn}_resp_gaff.prepc", - "-o", f"{resn}_resp.frcmod", "-f", "prepc", - ], check=True) + subprocess.run( + [ + "antechamber", + "-fi", + "ac", + "-i", + f"{resn}_resp.ac", + "-fo", + "mol2", + "-o", + f"{resn}_resp.mol2", + "-rn", + resn, + ], + check=True, + ) + + subprocess.run( + [ + "atomtype", + "-i", + f"{resn}_resp.ac", + "-o", + f"{resn}_resp_gaff.ac", + "-p", + "gaff", + ], + check=True, + ) + + subprocess.run( + [ + "prepgen", + "-i", + f"{resn}_resp_gaff.ac", + "-o", + f"{resn}_resp_gaff.prepc", + "-f", + "car", + ], + check=True, + ) + + subprocess.run( + [ + "parmchk2", + "-i", + f"{resn}_resp_gaff.prepc", + "-o", + f"{resn}_resp.frcmod", + "-f", + "prepc", + ], + check=True, + ) def _extract_coords_from_pdb(pdb_file: str) -> list[str]: @@ -208,11 +269,24 @@ def _run_parameterization(resn: str, g16root: str, nproc: int): if os.path.exists(g16_profile): subprocess.run(["bash", "-c", f"source {g16_profile}"], check=True) - subprocess.run([ - "antechamber", "-fi", "pdb", "-fo", "gcrt", - "-i", f"{resn}.pdb", "-o", f"{resn}.gau", - "-nc", "-2", "-m", "1", - ], check=True) + subprocess.run( + [ + "antechamber", + "-fi", + "pdb", + "-fo", + "gcrt", + "-i", + f"{resn}.pdb", + "-o", + f"{resn}.gau", + "-nc", + "-2", + "-m", + "1", + ], + check=True, + ) _patch_gaussian_input(resn, nproc) _run_gaussian_opt(resn, nproc) @@ -238,6 +312,5 @@ def run( os.chdir(base_dir) - if __name__ == "__main__": run() diff --git a/md_workflows/solvate.py b/md_workflows/solvate.py index 26256e7..7fd1d65 100644 --- a/md_workflows/solvate.py +++ b/md_workflows/solvate.py @@ -6,7 +6,6 @@ import re import subprocess -from pathlib import Path def run(): @@ -24,6 +23,7 @@ def run(): _finalize_topology(ncopies, nwat, ncl, nna) import shutil + shutil.copy("xtal_solv_cl_na.pdb", "md_model.pdb") @@ -111,23 +111,41 @@ def _compute_ion_counts(nwat: int, net_ion_charge: int) -> tuple[int, int]: def _insert_ions(ncl: int, nna: int): - subprocess.run([ - "gmx", "insert-molecules", - "-f", "xtal_solv.pdb", - "-ci", "Cl-.pdb", - "-o", "xtal_solv_cl.pdb", - "-replace", "SOL", - "-nmol", str(ncl), - ], check=True) - - subprocess.run([ - "gmx", "insert-molecules", - "-f", "xtal_solv_cl.pdb", - "-ci", "Na+.pdb", - "-o", "xtal_solv_cl_na.pdb", - "-replace", "SOL", - "-nmol", str(nna), - ], check=True) + subprocess.run( + [ + "gmx", + "insert-molecules", + "-f", + "xtal_solv.pdb", + "-ci", + "Cl-.pdb", + "-o", + "xtal_solv_cl.pdb", + "-replace", + "SOL", + "-nmol", + str(ncl), + ], + check=True, + ) + + subprocess.run( + [ + "gmx", + "insert-molecules", + "-f", + "xtal_solv_cl.pdb", + "-ci", + "Na+.pdb", + "-o", + "xtal_solv_cl_na.pdb", + "-replace", + "SOL", + "-nmol", + str(nna), + ], + check=True, + ) def _count_final_water() -> int: diff --git a/md_workflows/workflows/mdmx.py b/md_workflows/workflows/mdmx.py index aa25eb4..0c162b9 100644 --- a/md_workflows/workflows/mdmx.py +++ b/md_workflows/workflows/mdmx.py @@ -2,7 +2,8 @@ Matches the current shell script: -1. ``run_params_gaussian`` (under ``ligand/``, same as ``cd ligand && bash ../run_params_gaussian.sh``) +1. ``run_params_gaussian`` (under ``ligand/``, i.e. ``cd ligand && bash + ../run_params_gaussian.sh``) 2. ``param_prot`` 3. ``make_crystal`` 4. ``make_waterbox`` @@ -61,10 +62,23 @@ def _cli() -> None: default="6B8X", help="PDB ID passed to param_prot (Coordinates file should be .pdb in cwd)", ) - parser.add_argument("--ix", type=int, default=1, help="make_crystal supercell replication (x; also y/z if omitted)") - parser.add_argument("--iy", type=int, default=None, help="make_crystal y replication (optional)") - parser.add_argument("--iz", type=int, default=None, help="make_crystal z replication (optional)") - parser.add_argument("--chimerax-exec", default="/usr/bin/chimerax-daily", help="ChimeraX executable for make_crystal") + parser.add_argument( + "--ix", + type=int, + default=1, + help="make_crystal supercell replication (x; also y/z if omitted)", + ) + parser.add_argument( + "--iy", type=int, default=None, help="make_crystal y replication (optional)" + ) + parser.add_argument( + "--iz", type=int, default=None, help="make_crystal z replication (optional)" + ) + parser.add_argument( + "--chimerax-exec", + default="/usr/bin/chimerax-daily", + help="ChimeraX executable for make_crystal", + ) parser.add_argument("--resolv-ntmpi", type=int, default=8, help="resolvate gmx mdrun -ntmpi") parser.add_argument("--resolv-ntomp", type=int, default=1, help="resolvate gmx mdrun -ntomp") diff --git a/pyproject.toml b/pyproject.toml index c9064e9..0e42fd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,13 @@ version = "0.1.0" description = "Molecular dynamics workflow orchestration scripts" requires-python = ">=3.10" +[project.optional-dependencies] +# Dev/CI tooling only — kept out of [project.dependencies] so it is not baked into the +# runtime image. CI installs it via `pip install .[dev]` (or plain `pip install ruff`). +dev = [ + "ruff>=0.15.19", +] + [project.scripts] "md_workflows.param_prot" = "md_workflows.cli:param_prot_cli" "md_workflows.make_crystal" = "md_workflows.cli:make_crystal_cli" @@ -21,3 +28,12 @@ requires-python = ">=3.10" [tool.setuptools] packages = ["md_workflows", "md_workflows.workflows"] + +[tool.ruff] +# Pin the knobs so local runs and CI agree regardless of ruff's shifting defaults. +line-length = 100 +target-version = "py310" + +[tool.ruff.lint] +# pycodestyle errors/warnings, pyflakes, isort, pyupgrade, bugbear. +select = ["E", "F", "W", "I", "UP", "B"] From 2a48e09232f8330deb505d3f1d13bba523c1d511 Mon Sep 17 00:00:00 2001 From: bielj Date: Thu, 25 Jun 2026 14:50:25 -0400 Subject: [PATCH 6/6] ci: stop duplicate runs on PR pushes Scope push to [main, astera] and let pull_request cover feature branches, so a single push to a PR branch no longer matches both a wildcard push and pull_request trigger. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78417b8..52127c1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,13 +1,17 @@ name: CI -# Cheap, hosted-runner checks for every push/PR. Deliberately does NOT build or push images: -# the expensive GROMACS/CUDA builds run only in build-images.yml (on the Astera infra branch, -# on push/dispatch/tags — never on pull_request), so fork PRs can't trigger heavy compute. +# Cheap, hosted-runner checks. Deliberately does NOT build or push images: the expensive +# GROMACS/CUDA builds run only in build-images.yml (on the Astera infra branch, on +# push/dispatch — never on pull_request), so fork PRs can't trigger heavy compute. +# +# `push` is scoped to the long-lived branches and `pull_request` covers the rest. This avoids +# the double-run that happens when a single push to a branch with an open PR matches BOTH a +# wildcard `push` and `pull_request`: feature-branch pushes now run once (via pull_request), +# and main/astera get a post-merge run (via push). on: push: - branches: ['**'] + branches: [main, astera] pull_request: - branches: ['**'] workflow_dispatch: permissions: