From 932bab0ef1c29b93f2a92d3218e8caf9f90dc241 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Thu, 15 Feb 2024 16:00:56 -0800 Subject: [PATCH] feat(storage): make it possible to disable Content-Type sniffing As described in https://github.com/google/go-cloud/issues/3298#issuecomment-1856821618, we want to disable automatic `Content-Type` detection when inserting an object to Google Cloud Storage (GCS). Previously it wasn't possible to disable this auto-detection, even though `googleapi.MediaOptions` provides a `ForceEmptyContentType` option (https://github.com/googleapis/google-api-go-client/blob/v0.165.0/googleapi/googleapi.go#L283). We enable this by adding a `Writer` option to set this value. Closes https://github.com/googleapis/google-cloud-go/issues/9430 --- storage/client.go | 3 +++ storage/grpc_client.go | 30 ++++++++++++++++-------------- storage/http_client.go | 2 +- storage/integration_test.go | 34 +++++++++++++++++++++++++++------- storage/writer.go | 30 ++++++++++++++++++------------ 5 files changed, 65 insertions(+), 34 deletions(-) diff --git a/storage/client.go b/storage/client.go index 4906b1d1f704..70b2a280e3b3 100644 --- a/storage/client.go +++ b/storage/client.go @@ -254,6 +254,9 @@ type openWriterParams struct { // attrs - see `Writer.ObjectAttrs`. // Required. attrs *ObjectAttrs + // forceEmptyContentType - Disables auto-detect of Content-Type + // Optional. + forceEmptyContentType bool // conds - see `Writer.o.conds`. // Optional. conds *Conditions diff --git a/storage/grpc_client.go b/storage/grpc_client.go index e9e95993011b..bdbf3acfea23 100644 --- a/storage/grpc_client.go +++ b/storage/grpc_client.go @@ -1529,16 +1529,17 @@ func newGRPCWriter(c *grpcStorageClient, params *openWriterParams, r io.Reader) } return &gRPCWriter{ - buf: make([]byte, size), - c: c, - ctx: params.ctx, - reader: r, - bucket: params.bucket, - attrs: params.attrs, - conds: params.conds, - encryptionKey: params.encryptionKey, - sendCRC32C: params.sendCRC32C, - chunkSize: params.chunkSize, + buf: make([]byte, size), + c: c, + ctx: params.ctx, + reader: r, + bucket: params.bucket, + attrs: params.attrs, + conds: params.conds, + encryptionKey: params.encryptionKey, + sendCRC32C: params.sendCRC32C, + chunkSize: params.chunkSize, + forceEmptyContentType: params.forceEmptyContentType, } } @@ -1557,8 +1558,9 @@ type gRPCWriter struct { encryptionKey []byte settings *settings - sendCRC32C bool - chunkSize int + sendCRC32C bool + chunkSize int + forceEmptyContentType bool // The gRPC client-stream used for sending buffers. stream storagepb.Storage_BidiWriteObjectClient @@ -1857,9 +1859,9 @@ func (w *gRPCWriter) writeObjectSpec() (*storagepb.WriteObjectSpec, error) { // read copies the data in the reader to the given buffer and reports how much // data was read into the buffer and if there is no more data to read (EOF). // Furthermore, if the attrs.ContentType is unset, the first bytes of content -// will be sniffed for a matching content type. +// will be sniffed for a matching content type unless forceEmptyContentType is enabled. func (w *gRPCWriter) read() (int, bool, error) { - if w.attrs.ContentType == "" { + if w.attrs.ContentType == "" && !w.forceEmptyContentType { w.reader, w.attrs.ContentType = gax.DetermineContentType(w.reader) } // Set n to -1 to start the Read loop. diff --git a/storage/http_client.go b/storage/http_client.go index a0f3c00a7373..e3e0d761bb08 100644 --- a/storage/http_client.go +++ b/storage/http_client.go @@ -885,7 +885,7 @@ func (c *httpStorageClient) OpenWriter(params *openWriterParams, opts ...storage mediaOpts := []googleapi.MediaOption{ googleapi.ChunkSize(params.chunkSize), } - if c := attrs.ContentType; c != "" { + if c := attrs.ContentType; c != "" || params.forceEmptyContentType { mediaOpts = append(mediaOpts, googleapi.ContentType(c)) } if params.chunkRetryDeadline != 0 { diff --git a/storage/integration_test.go b/storage/integration_test.go index 0278b7005078..18f2b5e89bd1 100644 --- a/storage/integration_test.go +++ b/storage/integration_test.go @@ -2415,8 +2415,9 @@ func TestIntegration_WriterContentType(t *testing.T) { multiTransportTest(ctx, t, func(t *testing.T, ctx context.Context, bucket, _ string, client *Client) { obj := client.Bucket(bucket).Object("content") testCases := []struct { - content string - setType, wantType string + content string + setType, wantType string + forceEmptyContentType bool }{ { // Sniffed content type. @@ -2438,9 +2439,17 @@ func TestIntegration_WriterContentType(t *testing.T) { setType: "image/jpeg", wantType: "image/jpeg", }, + { + // Content type sniffing disabled. + content: "My first page", + setType: "", + wantType: "", + forceEmptyContentType: true, + }, } for i, tt := range testCases { - if err := writeObject(ctx, obj, tt.setType, []byte(tt.content)); err != nil { + writer := newWriter(ctx, obj, tt.setType, tt.forceEmptyContentType) + if err := writeContents(writer, []byte(tt.content)); err != nil { t.Errorf("writing #%d: %v", i, err) } attrs, err := obj.Attrs(ctx) @@ -5649,10 +5658,7 @@ func deleteObjectIfExists(o *ObjectHandle, retryOpts ...RetryOption) error { return nil } -func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, contents []byte) error { - w := obj.Retryer(WithPolicy(RetryAlways)).NewWriter(ctx) - w.ContentType = contentType - +func writeContents(w *Writer, contents []byte) error { if contents != nil { if _, err := w.Write(contents); err != nil { _ = w.Close() @@ -5662,6 +5668,20 @@ func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, con return w.Close() } +func writeObject(ctx context.Context, obj *ObjectHandle, contentType string, contents []byte) error { + w := newWriter(ctx, obj, contentType, false) + + return writeContents(w, contents) +} + +func newWriter(ctx context.Context, obj *ObjectHandle, contentType string, forceEmptyContentType bool) *Writer { + w := obj.Retryer(WithPolicy(RetryAlways)).NewWriter(ctx) + w.ContentType = contentType + w.ForceEmptyContentType = forceEmptyContentType + + return w +} + func readObject(ctx context.Context, obj *ObjectHandle) ([]byte, error) { r, err := obj.NewReader(ctx) if err != nil { diff --git a/storage/writer.go b/storage/writer.go index aeb7ed418c8d..43a0f0d10937 100644 --- a/storage/writer.go +++ b/storage/writer.go @@ -88,6 +88,11 @@ type Writer struct { // cancellation. ChunkRetryDeadline time.Duration + // ForceEmptyContentType is an optional parameter that is used to disable + // auto-detection of Content-Type. By default, if a blank Content-Type + // is provided, then gax.DetermineContentType is called to sniff the type. + ForceEmptyContentType bool + // ProgressFunc can be used to monitor the progress of a large write // operation. If ProgressFunc is not nil and writing requires multiple // calls to the underlying service (see @@ -180,18 +185,19 @@ func (w *Writer) openWriter() (err error) { isIdempotent := w.o.conds != nil && (w.o.conds.GenerationMatch >= 0 || w.o.conds.DoesNotExist == true) opts := makeStorageOpts(isIdempotent, w.o.retry, w.o.userProject) params := &openWriterParams{ - ctx: w.ctx, - chunkSize: w.ChunkSize, - chunkRetryDeadline: w.ChunkRetryDeadline, - bucket: w.o.bucket, - attrs: &w.ObjectAttrs, - conds: w.o.conds, - encryptionKey: w.o.encryptionKey, - sendCRC32C: w.SendCRC32C, - donec: w.donec, - setError: w.error, - progress: w.progress, - setObj: func(o *ObjectAttrs) { w.obj = o }, + ctx: w.ctx, + chunkSize: w.ChunkSize, + chunkRetryDeadline: w.ChunkRetryDeadline, + bucket: w.o.bucket, + attrs: &w.ObjectAttrs, + conds: w.o.conds, + encryptionKey: w.o.encryptionKey, + sendCRC32C: w.SendCRC32C, + donec: w.donec, + setError: w.error, + progress: w.progress, + setObj: func(o *ObjectAttrs) { w.obj = o }, + forceEmptyContentType: w.ForceEmptyContentType, } if err := w.ctx.Err(); err != nil { return err // short-circuit