diff --git a/Dockerfile b/Dockerfile index 0d3c449d..92901e90 100644 --- a/Dockerfile +++ b/Dockerfile @@ -102,7 +102,7 @@ ENV DV_BIN_PATH=/opt/deepvariant/bin # Install libraries RUN apt-get -y update && \ - apt-get install -y parallel python3-pip unzip && \ + apt-get install -y parallel python3-pip unzip libjemalloc2 && \ PATH="${HOME}/.local/bin:$PATH" python3 -m pip install absl-py==0.13.0 && \ apt-get clean autoclean && \ apt-get autoremove -y --purge && \ @@ -140,11 +140,17 @@ COPY --from=builder \ /opt/deepvariant/bin/ # Create shell wrappers for python zip files for easier use. +# +# The make_examples family is wrapped with LD_PRELOAD=libjemalloc.so.2: jemalloc +# meaningfully reduces make_examples wall-clock (its pileup/realignment work is +# allocation-heavy) while having no measurable effect on call_variants (TF +# inference), so the preload is scoped to just those wrappers. The bare soname +# keeps it architecture-portable (resolved from the default linker search path). RUN \ BASH_HEADER='#!/bin/bash' && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ - '/usr/bin/python3 /opt/deepvariant/bin/make_examples.zip "$@"' > \ + 'LD_PRELOAD=libjemalloc.so.2 /usr/bin/python3 /opt/deepvariant/bin/make_examples.zip "$@"' > \ /opt/deepvariant/bin/make_examples && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ @@ -168,7 +174,7 @@ RUN \ /opt/deepvariant/bin/runtime_by_region_vis && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ - '/usr/bin/python3 /opt/deepvariant/bin/multisample_make_examples.zip "$@"' > \ + 'LD_PRELOAD=libjemalloc.so.2 /usr/bin/python3 /opt/deepvariant/bin/multisample_make_examples.zip "$@"' > \ /opt/deepvariant/bin/multisample_make_examples && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ @@ -180,7 +186,7 @@ RUN \ /opt/deepvariant/bin/convert_to_saved_model && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ - '/usr/bin/python3 -u /opt/deepvariant/bin/make_examples_somatic.zip "$@"' > \ + 'LD_PRELOAD=libjemalloc.so.2 /usr/bin/python3 -u /opt/deepvariant/bin/make_examples_somatic.zip "$@"' > \ /opt/deepvariant/bin/make_examples_somatic && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ diff --git a/Dockerfile.deepsomatic b/Dockerfile.deepsomatic index 4d2f38f1..6ae7b5af 100644 --- a/Dockerfile.deepsomatic +++ b/Dockerfile.deepsomatic @@ -130,7 +130,7 @@ ENV DV_BIN_PATH=/opt/deepvariant/bin # Install libraries RUN apt-get -y update && \ - apt-get install -y parallel python3-pip unzip && \ + apt-get install -y parallel python3-pip unzip libjemalloc2 && \ PATH="${HOME}/.local/bin:$PATH" python3 -m pip install absl-py==0.13.0 && \ apt-get clean autoclean && \ apt-get autoremove -y --purge && \ @@ -164,11 +164,17 @@ COPY --from=builder \ /opt/deepvariant/bin/ # Create shell wrappers for python zip files for easier use. +# +# The make_examples family is wrapped with LD_PRELOAD=libjemalloc.so.2: jemalloc +# meaningfully reduces make_examples wall-clock (its pileup/realignment work is +# allocation-heavy) while having no measurable effect on call_variants (TF +# inference), so the preload is scoped to just those wrappers. The bare soname +# keeps it architecture-portable (resolved from the default linker search path). RUN \ BASH_HEADER='#!/bin/bash' && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ - 'python3 -u /opt/deepvariant/bin/make_examples_somatic.zip "$@"' > \ + 'LD_PRELOAD=libjemalloc.so.2 python3 -u /opt/deepvariant/bin/make_examples_somatic.zip "$@"' > \ /opt/deepvariant/bin/make_examples_somatic && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ diff --git a/Dockerfile.deeptrio b/Dockerfile.deeptrio index 051efa1e..cef15014 100644 --- a/Dockerfile.deeptrio +++ b/Dockerfile.deeptrio @@ -72,11 +72,17 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTH update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 0 # Create shell wrappers for python zip files for easier use. +# +# The make_examples family is wrapped with LD_PRELOAD=libjemalloc.so.2: jemalloc +# meaningfully reduces make_examples wall-clock (its pileup/realignment work is +# allocation-heavy) while having no measurable effect on call_variants (TF +# inference), so the preload is scoped to just those wrappers. The bare soname +# keeps it architecture-portable (resolved from the default linker search path). RUN \ BASH_HEADER='#!/bin/bash' && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ - 'python3 /opt/deepvariant/bin/deeptrio/make_examples.zip "$@"' > \ + 'LD_PRELOAD=libjemalloc.so.2 python3 /opt/deepvariant/bin/deeptrio/make_examples.zip "$@"' > \ /opt/deepvariant/bin/deeptrio/make_examples && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ @@ -215,7 +221,7 @@ ENV PATH="${PATH}":/opt/conda/bin:/opt/conda/envs/bio/bin:/opt/deepvariant/bin/d ENV TF_USE_LEGACY_KERAS=1 RUN apt-get -y update && \ - apt-get install -y parallel python3-pip && \ + apt-get install -y parallel python3-pip libjemalloc2 && \ PATH="${HOME}/.local/bin:$PATH" python3 -m pip install absl-py==0.13.0 && \ apt-get clean autoclean && \ apt-get autoremove -y --purge && \ diff --git a/Dockerfile.pangenome_aware_deepvariant b/Dockerfile.pangenome_aware_deepvariant index 2026b069..034bab39 100644 --- a/Dockerfile.pangenome_aware_deepvariant +++ b/Dockerfile.pangenome_aware_deepvariant @@ -73,11 +73,17 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTH update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 0 # Create shell wrappers for python zip files for easier use. +# +# The make_examples family is wrapped with LD_PRELOAD=libjemalloc.so.2: jemalloc +# meaningfully reduces make_examples wall-clock (its pileup/realignment work is +# allocation-heavy) while having no measurable effect on call_variants (TF +# inference), so the preload is scoped to just those wrappers. The bare soname +# keeps it architecture-portable (resolved from the default linker search path). RUN \ BASH_HEADER='#!/bin/bash' && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ - 'python3 -u /opt/deepvariant/bin/make_examples_pangenome_aware_dv.zip "$@"' > \ + 'LD_PRELOAD=libjemalloc.so.2 python3 -u /opt/deepvariant/bin/make_examples_pangenome_aware_dv.zip "$@"' > \ /opt/deepvariant/bin/make_examples_pangenome_aware_dv && \ printf "%s\n%s\n" \ "${BASH_HEADER}" \ @@ -140,7 +146,7 @@ ENV PATH="${PATH}":/opt/conda/bin:/opt/conda/envs/bio/bin:/opt/deepvariant/bin/p ENV TF_USE_LEGACY_KERAS=1 RUN apt-get -y update && \ - apt-get install -y parallel python3-pip && \ + apt-get install -y parallel python3-pip libjemalloc2 && \ PATH="${HOME}/.local/bin:$PATH" python3 -m pip install absl-py==0.13.0 && \ apt-get clean autoclean && \ apt-get autoremove -y --purge && \