From b07781c04ce6f807fcb8622188ddec21e7765bc5 Mon Sep 17 00:00:00 2001 From: Vikas Bhansali <64532198+vibhansa-msft@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:22:07 +0530 Subject: [PATCH] Preserve ACL/Permissions while uploading file over datalake (#1571) * Add code to reset the ACLs post file upload in adls --- CHANGELOG.md | 1 + component/azstorage/azstorage.go | 3 + component/azstorage/config.go | 7 +- component/azstorage/connection.go | 1 + component/azstorage/datalake.go | 37 +++++++- component/azstorage/datalake_test.go | 125 +++++++++++++++++++++++++++ setup/advancedConfig.yaml | 1 + 7 files changed, 171 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26d62cae7..979260459 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Rename file was calling an additional getProperties call. - Delete empty directories from local cache on rmdir operation. - [#1547](https://github.com/Azure/azure-storage-fuse/issues/1547) Truncate logic of file cache is modified to prevent downloading and uploading the entire file. +- Updating a file via Blobfuse2 was resetting the ACLs and Permissions applied to file in Datalake. **Features** - Added 'gen-config' command to auto generate blobfuse2 config file. diff --git a/component/azstorage/azstorage.go b/component/azstorage/azstorage.go index 15f721273..ec9a991b6 100644 --- a/component/azstorage/azstorage.go +++ b/component/azstorage/azstorage.go @@ -662,6 +662,9 @@ func init() { cpkEnabled := config.AddBoolFlag("cpk-enabled", false, "Enable client provided key.") config.BindPFlag(compName+".cpk-enabled", cpkEnabled) + preserveACL := config.AddBoolFlag("preserve-acl", false, "Preserve ACL and Permissions set on file during updates") + config.BindPFlag(compName+".preserve-acl", preserveACL) + config.RegisterFlagCompletionFunc("container-name", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { return nil, cobra.ShellCompDirectiveNoFileComp }) diff --git a/component/azstorage/config.go b/component/azstorage/config.go index 6cc60957c..007d91736 100644 --- a/component/azstorage/config.go +++ b/component/azstorage/config.go @@ -184,6 +184,7 @@ type AzStorageOptions struct { CPKEnabled bool `config:"cpk-enabled" yaml:"cpk-enabled"` CPKEncryptionKey string `config:"cpk-encryption-key" yaml:"cpk-encryption-key"` CPKEncryptionKeySha256 string `config:"cpk-encryption-key-sha256" yaml:"cpk-encryption-key-sha256"` + PreserveACL bool `config:"preserve-acl" yaml:"preserve-acl"` // v1 support UseAdls bool `config:"use-adls" yaml:"-"` @@ -498,12 +499,14 @@ func ParseAndValidateConfig(az *AzStorage, opt AzStorageOptions) error { log.Warn("unsupported v1 CLI parameter: debug-libcurl is not applicable in blobfuse2.") } + az.stConfig.preserveACL = opt.PreserveACL + log.Crit("ParseAndValidateConfig : account %s, container %s, account-type %s, auth %s, prefix %s, endpoint %s, MD5 %v %v, virtual-directory %v, disable-compression %v, CPK %v", az.stConfig.authConfig.AccountName, az.stConfig.container, az.stConfig.authConfig.AccountType, az.stConfig.authConfig.AuthMode, az.stConfig.prefixPath, az.stConfig.authConfig.Endpoint, az.stConfig.validateMD5, az.stConfig.updateMD5, az.stConfig.virtualDirectory, az.stConfig.disableCompression, az.stConfig.cpkEnabled) log.Crit("ParseAndValidateConfig : use-HTTP %t, block-size %d, max-concurrency %d, default-tier %s, fail-unsupported-op %t, mount-all-containers %t", az.stConfig.authConfig.UseHTTP, az.stConfig.blockSize, az.stConfig.maxConcurrency, az.stConfig.defaultTier, az.stConfig.ignoreAccessModifiers, az.stConfig.mountAllContainers) - log.Crit("ParseAndValidateConfig : Retry Config: retry-count %d, max-timeout %d, backoff-time %d, max-delay %d", - az.stConfig.maxRetries, az.stConfig.maxTimeout, az.stConfig.backoffTime, az.stConfig.maxRetryDelay) + log.Crit("ParseAndValidateConfig : Retry Config: retry-count %d, max-timeout %d, backoff-time %d, max-delay %d, preserve-acl: %v", + az.stConfig.maxRetries, az.stConfig.maxTimeout, az.stConfig.backoffTime, az.stConfig.maxRetryDelay, az.stConfig.preserveACL) log.Crit("ParseAndValidateConfig : Telemetry : %s, honour-ACL %v, disable-symlink %v", az.stConfig.telemetry, az.stConfig.honourACL, az.stConfig.disableSymlink) diff --git a/component/azstorage/connection.go b/component/azstorage/connection.go index c052f955c..80b5481fc 100644 --- a/component/azstorage/connection.go +++ b/component/azstorage/connection.go @@ -76,6 +76,7 @@ type AzStorageConfig struct { telemetry string honourACL bool disableSymlink bool + preserveACL bool // CPK related config cpkEnabled bool diff --git a/component/azstorage/datalake.go b/component/azstorage/datalake.go index 7b18e97d9..0ccf6fb1e 100644 --- a/component/azstorage/datalake.go +++ b/component/azstorage/datalake.go @@ -553,7 +553,40 @@ func (dl *Datalake) ReadInBuffer(name string, offset int64, len int64, data []by // WriteFromFile : Upload local file to file func (dl *Datalake) WriteFromFile(name string, metadata map[string]*string, fi *os.File) (err error) { - return dl.BlockBlob.WriteFromFile(name, metadata, fi) + // File in DataLake may have permissions and ACL set. Just uploading the file will override them. + // So, we need to get the existing permissions and ACL and set them back after uploading the file. + + var acl string = "" + var fileClient *file.Client = nil + + if dl.Config.preserveACL { + fileClient = dl.Filesystem.NewFileClient(filepath.Join(dl.Config.prefixPath, name)) + resp, err := fileClient.GetAccessControl(context.Background(), nil) + if err != nil { + log.Err("Datalake::getACL : Failed to get ACLs for file %s [%s]", name, err.Error()) + } else if resp.ACL != nil { + acl = *resp.ACL + } + } + + // Upload the file, which will override the permissions and ACL + retCode := dl.BlockBlob.WriteFromFile(name, metadata, fi) + + if acl != "" { + // Cannot set both permissions and ACL in one call. ACL includes permission as well so just setting those back + // Just setting up the permissions will delete existing ACLs applied on the blob so do not convert this code to + // just set the permissions. + _, err := fileClient.SetAccessControl(context.Background(), &file.SetAccessControlOptions{ + ACL: &acl, + }) + + if err != nil { + // Earlier code was ignoring this so it might break customer cases where they do not have auth to update ACL + log.Err("Datalake::WriteFromFile : Failed to set ACL for %s [%s]", name, err.Error()) + } + } + + return retCode } // WriteFromBuffer : Upload from a buffer to a file @@ -588,7 +621,7 @@ func (dl *Datalake) ChangeMod(name string, mode os.FileMode) error { // and create new string with the username included in the string // Keeping this code here so in future if its required we can get the string and manipulate - currPerm, err := fileURL.GetAccessControl(context.Background()) + currPerm, err := fileURL.getACL(context.Background()) e := storeDatalakeErrToErr(err) if e == ErrFileNotFound { return syscall.ENOENT diff --git a/component/azstorage/datalake_test.go b/component/azstorage/datalake_test.go index d376946c5..a15b654f1 100644 --- a/component/azstorage/datalake_test.go +++ b/component/azstorage/datalake_test.go @@ -39,12 +39,15 @@ package azstorage import ( "bytes" "container/list" + "context" "crypto/rand" "encoding/base64" "encoding/json" "fmt" "io" + "io/fs" "os" + "path/filepath" "strings" "syscall" "testing" @@ -2568,6 +2571,128 @@ func (s *datalakeTestSuite) TestUploadWithCPKEnabled() { _ = os.Remove(name1) } +func getACL(dl *Datalake, name string) (string, error) { + fileClient := dl.Filesystem.NewFileClient(filepath.Join(dl.Config.prefixPath, name)) + acl, err := fileClient.GetAccessControl(context.Background(), nil) + + if err != nil || acl.ACL == nil { + return "", err + } + + return *acl.ACL, nil +} + +func (s *datalakeTestSuite) createFileWithData(name string, data []byte, mode os.FileMode) { + h, _ := s.az.CreateFile(internal.CreateFileOptions{Name: name}) + _, err := s.az.WriteFile(internal.WriteFileOptions{Handle: h, Offset: 0, Data: data}) + s.assert.Nil(err) + + err = s.az.Chmod(internal.ChmodOptions{Name: name, Mode: mode}) + s.assert.Nil(err) + + s.az.CloseFile(internal.CloseFileOptions{Handle: h}) + s.assert.Nil(err) +} + +func (s *datalakeTestSuite) TestPermissionPreservationWithoutFlag() { + defer s.cleanupTest() + name := generateFileName() + + data := []byte("test data") + mode := fs.FileMode(0764) + s.createFileWithData(name, data, mode) + // Simulate file copy and permission checks + _ = os.WriteFile(name+"_local", []byte("123123"), mode) + f, err := os.OpenFile(name+"_local", os.O_RDWR, mode) + s.assert.Nil(err) + + err = s.az.CopyFromFile(internal.CopyFromFileOptions{Name: name, File: f, Metadata: nil}) + s.assert.Nil(err) + attr, err := s.az.GetAttr(internal.GetAttrOptions{Name: name}) + s.assert.Nil(err) + s.assert.NotNil(attr) + s.assert.NotEqual(os.FileMode(0764), attr.Mode) + + acl, err := getACL(s.az.storage.(*Datalake), name) + s.assert.Nil(err) + s.assert.Contains(acl, "user::rw-") + s.assert.Contains(acl, "group::r--") + s.assert.Contains(acl, "other::---") + + os.Remove(name + "_local") +} + +func (s *datalakeTestSuite) TestPermissionPreservationWithFlag() { + defer s.cleanupTest() + // Setup + conf := fmt.Sprintf("azstorage:\n preserve-acl: true\n account-name: %s\n endpoint: https://%s.dfs.core.windows.net/\n type: adls\n account-key: %s\n mode: key\n container: %s\n fail-unsupported-op: true", + storageTestConfigurationParameters.AdlsAccount, storageTestConfigurationParameters.AdlsAccount, storageTestConfigurationParameters.AdlsKey, s.container) + s.setupTestHelper(conf, s.container, false) + + name := generateFileName() + data := []byte("test data") + mode := fs.FileMode(0764) + s.createFileWithData(name, data, mode) + // Simulate file copy and permission checks + _ = os.WriteFile(name+"_local", []byte("123123"), mode) + f, err := os.OpenFile(name+"_local", os.O_RDWR, mode) + s.assert.Nil(err) + + err = s.az.CopyFromFile(internal.CopyFromFileOptions{Name: name, File: f, Metadata: nil}) + s.assert.Nil(err) + + attr, err := s.az.GetAttr(internal.GetAttrOptions{Name: name}) + s.assert.Nil(err) + s.assert.NotNil(attr) + s.assert.Equal(os.FileMode(0764), attr.Mode) + + acl, err := getACL(s.az.storage.(*Datalake), name) + s.assert.Nil(err) + s.assert.Contains(acl, "user::rwx") + s.assert.Contains(acl, "group::rw-") + s.assert.Contains(acl, "other::r--") + + os.Remove(name + "_local") +} + +func (s *datalakeTestSuite) TestPermissionPreservationWithCommit() { + defer s.cleanupTest() + // Setup + s.setupTestHelper("", s.container, false) + name := generateFileName() + s.createFileWithData(name, []byte("test data"), fs.FileMode(0767)) + data := []byte("123123") + + id := base64.StdEncoding.EncodeToString(common.NewUUIDWithLength(16)) + err := s.az.StageData(internal.StageDataOptions{ + Name: name, + Id: id, + Data: data, + Offset: 0, + }) + s.assert.Nil(err) + + ids := []string{} + ids = append(ids, id) + err = s.az.CommitData(internal.CommitDataOptions{ + Name: name, + List: ids, + BlockSize: 1, + }) + s.assert.Nil(err) + + attr, err := s.az.GetAttr(internal.GetAttrOptions{Name: name}) + s.assert.Nil(err) + s.assert.NotNil(attr) + s.assert.EqualValues(os.FileMode(0767), attr.Mode) + + acl, err := getACL(s.az.storage.(*Datalake), name) + s.assert.Nil(err) + s.assert.Contains(acl, "user::rwx") + s.assert.Contains(acl, "group::rw-") + s.assert.Contains(acl, "other::rwx") +} + // func (s *datalakeTestSuite) TestRAGRS() { // defer s.cleanupTest() // // Setup diff --git a/setup/advancedConfig.yaml b/setup/advancedConfig.yaml index 86518547b..2f888b4d0 100644 --- a/setup/advancedConfig.yaml +++ b/setup/advancedConfig.yaml @@ -150,6 +150,7 @@ azstorage: cpk-enabled: true|false cpk-encryption-key: cpk-encryption-key-sha256: + preserve-acl: true|false # Mount all configuration mountall: