Skip to content

Commit 1a86982

Browse files
alexlarssoncgwalters
authored andcommitted
ostree-ext: Match on diff_ids as well as layer digest on import
Layer digest can vary for a layer due to e.g. recompression, so at the start of an import we enumerate all cached images and make a diff_id to layer digest map. Then when pulling an image if the layer digest ref doesn't exist, we try to lookup in the map and use that if it exists. We also create new refs for all such reused layer commits. This fixes #2078 Assisted-by: Claude Code (Sonnet 4.5) Signed-off-by: Alexander Larsson <alexl@redhat.com> Signed-off-by: Colin Walters <walters@verbum.org>
1 parent 5d5fbe0 commit 1a86982

2 files changed

Lines changed: 261 additions & 7 deletions

File tree

crates/ostree-ext/src/container/store.rs

Lines changed: 130 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,8 @@ pub struct ImageImporter {
307307
offline: bool,
308308
/// If true, we have ostree v2024.3 or newer.
309309
ostree_v2024_3: bool,
310+
/// Mapping from diff_id to blob digest for layer deduplication
311+
diffid_to_digest: HashMap<String, String>,
310312

311313
layer_progress: Option<Sender<ImportProgress>>,
312314
layer_byte_progress: Option<tokio::sync::watch::Sender<Option<LayerProgress>>>,
@@ -641,6 +643,9 @@ impl ImageImporter {
641643
);
642644

643645
let repo = repo.clone();
646+
647+
let diffid_to_digest = Self::build_diffid_to_digest_map(&repo)?;
648+
644649
Ok(ImageImporter {
645650
repo,
646651
proxy,
@@ -651,6 +656,7 @@ impl ImageImporter {
651656
require_bootable: false,
652657
offline: false,
653658
imgref: imgref.clone(),
659+
diffid_to_digest,
654660
layer_progress: None,
655661
layer_byte_progress: None,
656662
})
@@ -749,6 +755,111 @@ impl ImageImporter {
749755
.await
750756
}
751757

758+
/// Build a mapping from diff_id to blob_digest by enumerating all stored images.
759+
/// This allows us to reuse layers with the same content even if they have different blob digests.
760+
fn build_diffid_to_digest_map(repo: &ostree::Repo) -> Result<HashMap<String, String>> {
761+
let mut map = HashMap::new();
762+
let all_images = list_images(repo)?;
763+
764+
for imgref_str in all_images {
765+
let imgref = match ImageReference::try_from(imgref_str.as_str()) {
766+
Ok(r) => r,
767+
Err(e) => {
768+
tracing::warn!("Failed to parse image reference {}: {}", imgref_str, e);
769+
continue;
770+
}
771+
};
772+
773+
let state = match query_image(repo, &imgref)? {
774+
Some(s) => s,
775+
None => continue,
776+
};
777+
778+
// Map each layer's diff_id to its blob digest
779+
for (layer_desc, diff_id) in state
780+
.manifest
781+
.layers()
782+
.iter()
783+
.zip(state.configuration.rootfs().diff_ids())
784+
{
785+
let diff_id_str = diff_id.to_string();
786+
// Only store first found
787+
map.entry(diff_id_str)
788+
.or_insert_with(|| layer_desc.digest().to_string());
789+
}
790+
}
791+
792+
Ok(map)
793+
}
794+
795+
fn find_digest_by_diffid(
796+
&self,
797+
manifest: &oci_image::ImageManifest,
798+
config: &oci_image::ImageConfiguration,
799+
layer: &oci_image::Descriptor,
800+
) -> Option<&String> {
801+
let idx = manifest
802+
.layers()
803+
.iter()
804+
.position(|l| l.digest() == layer.digest())?;
805+
let layer_diffid = config.rootfs().diff_ids().get(idx)?;
806+
self.diffid_to_digest.get(layer_diffid.as_str())
807+
}
808+
809+
/// Try to resolve a layer commit by looking up its diff_id in already-imported images.
810+
fn resolve_commit_by_diffid(
811+
&self,
812+
manifest: &oci_image::ImageManifest,
813+
config: &oci_image::ImageConfiguration,
814+
layer: &oci_image::Descriptor,
815+
) -> Result<Option<String>> {
816+
let Some(existing_digest) = self.find_digest_by_diffid(manifest, config, layer) else {
817+
return Ok(None);
818+
};
819+
let existing_ref = ref_for_blob_digest(existing_digest)?;
820+
Ok(self
821+
.repo
822+
.resolve_rev(&existing_ref, true)?
823+
.map(|s| s.to_string()))
824+
}
825+
826+
/// Query a layer by digest, falling back to diff_id lookup if the direct
827+
/// ref is not found.
828+
fn query_layer(
829+
&self,
830+
manifest: &oci_image::ImageManifest,
831+
config: &oci_image::ImageConfiguration,
832+
layer: &oci_image::Descriptor,
833+
) -> Result<ManifestLayerState> {
834+
let ostree_ref = ref_for_layer(layer)?;
835+
let commit = self
836+
.repo
837+
.resolve_rev(&ostree_ref, true)?
838+
.map(|s| s.to_string());
839+
// If no direct ref match, try to find a layer with the same diff_id
840+
// but a different blob digest (e.g. due to recompression).
841+
let commit = match commit {
842+
Some(c) => Some(c),
843+
None => self.resolve_commit_by_diffid(manifest, config, layer)?,
844+
};
845+
846+
Ok(ManifestLayerState {
847+
layer: layer.clone(),
848+
ostree_ref,
849+
commit,
850+
})
851+
}
852+
853+
/// Ensure a ref exists for a layer, creating it if needed.
854+
fn ensure_ref_for_layer(repo: &ostree::Repo, ostree_ref: &str, commit: &str) -> Result<()> {
855+
let ref_exists = repo.resolve_rev(ostree_ref, true)?.is_some();
856+
if !ref_exists {
857+
tracing::debug!("Creating ref {} for reused commit {}", ostree_ref, commit);
858+
repo.set_ref_immediate(None, ostree_ref, Some(commit), gio::Cancellable::NONE)?;
859+
}
860+
Ok(())
861+
}
862+
752863
/// Given existing metadata (manifest, config, previous image statE) generate a PreparedImport structure
753864
/// which e.g. includes a diff of the layers.
754865
fn create_prepared_import(
@@ -779,15 +890,18 @@ impl ImageImporter {
779890
let (commit_layer, component_layers, remaining_layers) =
780891
parse_manifest_layout(&manifest, &config)?;
781892

782-
let query = |l: &Descriptor| query_layer(&self.repo, l.clone());
783-
let commit_layer = commit_layer.map(query).transpose()?;
893+
let commit_layer = commit_layer
894+
.map(|layer| self.query_layer(&manifest, &config, layer))
895+
.transpose()?;
896+
784897
let component_layers = component_layers
785898
.into_iter()
786-
.map(query)
899+
.map(|l| self.query_layer(&manifest, &config, l))
787900
.collect::<Result<Vec<_>>>()?;
901+
788902
let remaining_layers = remaining_layers
789903
.into_iter()
790-
.map(query)
904+
.map(|l| self.query_layer(&manifest, &config, l))
791905
.collect::<Result<Vec<_>>>()?;
792906

793907
let previous_manifest_digest = previous_state.as_ref().map(|s| s.manifest_digest.clone());
@@ -937,7 +1051,10 @@ impl ImageImporter {
9371051
};
9381052
let des_layers = self.proxy.get_layer_info(&import.proxy_img).await?;
9391053
for layer in import.ostree_layers.iter_mut() {
940-
if layer.commit.is_some() {
1054+
if let Some(commit) = layer.commit.as_ref() {
1055+
if write_refs {
1056+
Self::ensure_ref_for_layer(&self.repo, &layer.ostree_ref, commit)?;
1057+
}
9411058
continue;
9421059
}
9431060
if let Some(p) = self.layer_progress.as_ref() {
@@ -984,7 +1101,11 @@ impl ImageImporter {
9841101
.await?;
9851102
}
9861103
}
987-
if commit_layer.commit.is_none() {
1104+
if let Some(commit) = commit_layer.commit.as_ref() {
1105+
if write_refs {
1106+
Self::ensure_ref_for_layer(&self.repo, &commit_layer.ostree_ref, commit)?;
1107+
}
1108+
} else {
9881109
if let Some(p) = self.layer_progress.as_ref() {
9891110
p.send(ImportProgress::OstreeChunkStarted(
9901111
commit_layer.layer.clone(),
@@ -1031,7 +1152,7 @@ impl ImageImporter {
10311152
))
10321153
.await?;
10331154
}
1034-
};
1155+
}
10351156
Ok(())
10361157
}
10371158

@@ -1234,6 +1355,8 @@ impl ImageImporter {
12341355
for layer in import.layers {
12351356
if let Some(c) = layer.commit {
12361357
tracing::debug!("Reusing fetched commit {}", c);
1358+
Self::ensure_ref_for_layer(&self.repo, &layer.ostree_ref, &c)?;
1359+
12371360
layer_commits.push(c.to_string());
12381361
} else {
12391362
if let Some(p) = self.layer_progress.as_ref() {

crates/ostree-ext/tests/it/main.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2234,3 +2234,134 @@ fn test_manifest_diff() {
22342234
"sha256:76b83eea62b7b93200a056b5e0201ef486c67f1eeebcf2c7678ced4d614cece2"
22352235
);
22362236
}
2237+
2238+
/// Test that importing an image reuses layers from a previously imported image
2239+
/// when the layers have the same diff_id (uncompressed content digest) but
2240+
/// different blob digests due to different compression (gzip vs zstd).
2241+
///
2242+
/// This directly exercises the `build_diffid_to_digest_map` / `query_layer`
2243+
/// machinery added for <https://github.com/bootc-dev/bootc/issues/2078>.
2244+
#[tokio::test]
2245+
async fn test_diff_id_reuse_across_compression() -> Result<()> {
2246+
if !check_skopeo() {
2247+
return Ok(());
2248+
}
2249+
2250+
let fixture = Fixture::new_v1()?;
2251+
let baseimg = &fixture.export_container().await?.0;
2252+
let basepath = &match baseimg.transport {
2253+
Transport::OciDir => fixture.path.join(baseimg.name.as_str()),
2254+
_ => unreachable!(),
2255+
};
2256+
let baseimg_str = format!("oci:{basepath}");
2257+
2258+
// --- Import the original (gzip) image ---
2259+
let gzip_ref = OstreeImageReference {
2260+
sigverify: SignatureSource::ContainerPolicyAllowInsecure,
2261+
imgref: baseimg.clone(),
2262+
};
2263+
let mut imp =
2264+
store::ImageImporter::new(fixture.destrepo(), &gzip_ref, Default::default()).await?;
2265+
imp.require_bootable();
2266+
let prep = match imp.prepare().await? {
2267+
store::PrepareResult::AlreadyPresent(_) => panic!("should not be already present"),
2268+
store::PrepareResult::Ready(r) => r,
2269+
};
2270+
// All layers should need fetching the first time.
2271+
let to_fetch: Vec<_> = prep.layers_to_fetch().collect::<Result<Vec<_>>>()?;
2272+
assert!(
2273+
!to_fetch.is_empty(),
2274+
"First import should have layers to fetch"
2275+
);
2276+
let _first_import = imp.import(prep).await?;
2277+
2278+
// --- Re-compress the image to zstd using skopeo ---
2279+
let zstd_image_path = &fixture.path.join("recompressed-zstd.oci");
2280+
let st = tokio::process::Command::new("skopeo")
2281+
.args([
2282+
"copy",
2283+
"--dest-compress-format=zstd",
2284+
baseimg_str.as_str(),
2285+
&format!("oci:{zstd_image_path}"),
2286+
])
2287+
.status()
2288+
.await?;
2289+
assert!(st.success(), "skopeo copy to zstd failed");
2290+
2291+
// Sanity: verify that blob digests differ but diff_ids match.
2292+
{
2293+
let gzip_oci = ocidir::OciDir::open(Dir::open_ambient_dir(
2294+
basepath,
2295+
cap_std::ambient_authority(),
2296+
)?)?;
2297+
let zstd_oci = ocidir::OciDir::open(Dir::open_ambient_dir(
2298+
zstd_image_path,
2299+
cap_std::ambient_authority(),
2300+
)?)?;
2301+
let gzip_idx = gzip_oci.read_index()?;
2302+
let zstd_idx = zstd_oci.read_index()?;
2303+
let gzip_manifest: oci_image::ImageManifest =
2304+
gzip_oci.read_json_blob(gzip_idx.manifests().first().unwrap())?;
2305+
let zstd_manifest: oci_image::ImageManifest =
2306+
zstd_oci.read_json_blob(zstd_idx.manifests().first().unwrap())?;
2307+
2308+
// At least one layer should have a different blob digest after recompression.
2309+
let gzip_digests: Vec<_> = gzip_manifest
2310+
.layers()
2311+
.iter()
2312+
.map(|l| l.digest().to_string())
2313+
.collect();
2314+
let zstd_digests: Vec<_> = zstd_manifest
2315+
.layers()
2316+
.iter()
2317+
.map(|l| l.digest().to_string())
2318+
.collect();
2319+
assert_ne!(
2320+
gzip_digests, zstd_digests,
2321+
"Blob digests should differ after recompression"
2322+
);
2323+
2324+
// But diff_ids (uncompressed content) must be identical.
2325+
let gzip_config: oci_image::ImageConfiguration =
2326+
gzip_oci.read_json_blob(gzip_manifest.config())?;
2327+
let zstd_config: oci_image::ImageConfiguration =
2328+
zstd_oci.read_json_blob(zstd_manifest.config())?;
2329+
assert_eq!(
2330+
gzip_config.rootfs().diff_ids(),
2331+
zstd_config.rootfs().diff_ids(),
2332+
"diff_ids should be identical across compression formats"
2333+
);
2334+
}
2335+
2336+
// --- Import the zstd-recompressed image ---
2337+
let zstd_ref = OstreeImageReference {
2338+
sigverify: SignatureSource::ContainerPolicyAllowInsecure,
2339+
imgref: ImageReference {
2340+
transport: Transport::OciDir,
2341+
name: zstd_image_path.to_string(),
2342+
},
2343+
};
2344+
let mut imp2 =
2345+
store::ImageImporter::new(fixture.destrepo(), &zstd_ref, Default::default()).await?;
2346+
imp2.require_bootable();
2347+
let prep2 = match imp2.prepare().await? {
2348+
store::PrepareResult::AlreadyPresent(_) => panic!("should not be already present"),
2349+
store::PrepareResult::Ready(r) => r,
2350+
};
2351+
2352+
// The key assertion: all layers should already have commits because the
2353+
// diff_id matches the already-imported gzip layers.
2354+
let to_fetch2: Vec<_> = prep2.layers_to_fetch().collect::<Result<Vec<_>>>()?;
2355+
assert!(
2356+
to_fetch2.is_empty(),
2357+
"Second import (zstd) should reuse all layers via diff_id; layers still to fetch: {:?}",
2358+
to_fetch2
2359+
.iter()
2360+
.map(|(l, _)| l.layer.digest().to_string())
2361+
.collect::<Vec<_>>()
2362+
);
2363+
2364+
let _second_import = imp2.import(prep2).await?;
2365+
2366+
Ok(())
2367+
}

0 commit comments

Comments
 (0)