From 58e42f4f8ea3d41da257878b0bc1adfe4bba57a4 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Tue, 3 Dec 2024 10:58:06 -0700 Subject: [PATCH] serialize checksum --- object_store/src/aws/client.rs | 13 +++++++++++-- object_store/src/client/s3.rs | 27 ++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs index c4261f875c3..68efc9254e8 100644 --- a/object_store/src/aws/client.rs +++ b/object_store/src/aws/client.rs @@ -29,7 +29,7 @@ use crate::client::list::ListClient; use crate::client::retry::RetryExt; use crate::client::s3::{ CompleteMultipartUpload, CompleteMultipartUploadResult, CopyPartResult, - InitiateMultipartUploadResult, ListResponse, + InitiateMultipartUploadResult, ListResponse, PartMetadata, }; use crate::client::GetOptionsExt; use crate::multipart::PartId; @@ -690,8 +690,13 @@ impl S3Client { request = request.with_encryption_headers(); } let response = request.send().await?; + let checksum = response + .headers() + .get("x-amz-checksum-sha256") + .and_then(|v| v.to_str().ok()) + .map(|v| v.to_string()); - let content_id = match is_copy { + let e_tag = match is_copy { false => get_etag(response.headers()).context(MetadataSnafu)?, true => { let response = response @@ -703,6 +708,10 @@ impl S3Client { response.e_tag } }; + + let meta = PartMetadata { e_tag, checksum }; + let content_id = quick_xml::se::to_string(&meta).unwrap(); + Ok(PartId { content_id }) } diff --git a/object_store/src/client/s3.rs b/object_store/src/client/s3.rs index dba752cb125..e49e59040d3 100644 --- a/object_store/src/client/s3.rs +++ b/object_store/src/client/s3.rs @@ -106,14 +106,32 @@ pub(crate) struct CompleteMultipartUpload { pub part: Vec, } +#[derive(Serialize, Deserialize)] +pub(crate) struct PartMetadata { + pub e_tag: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub checksum: Option, +} + impl From> for CompleteMultipartUpload { fn from(value: Vec) -> Self { let part = value .into_iter() .enumerate() - .map(|(part_number, part)| MultipartPart { - e_tag: part.content_id, - part_number: part_number + 1, + .map(|(part_idx, part)| { + let md = match quick_xml::de::from_str::(&part.content_id) { + Ok(md) => md, + // fallback to old way + Err(_) => PartMetadata { + e_tag: part.content_id.clone(), + checksum: None, + }, + }; + MultipartPart { + e_tag: md.e_tag, + part_number: part_idx + 1, + checksum_sha256: md.checksum, + } }) .collect(); Self { part } @@ -126,6 +144,9 @@ pub(crate) struct MultipartPart { pub e_tag: String, #[serde(rename = "PartNumber")] pub part_number: usize, + #[serde(rename = "ChecksumSHA256")] + #[serde(skip_serializing_if = "Option::is_none")] + pub checksum_sha256: Option, } #[derive(Debug, Deserialize)]