From 8e96e967abc387978ba7e2f654b8d25c2f27a5f6 Mon Sep 17 00:00:00 2001 From: Noritaka Sekiyama Date: Tue, 19 Mar 2024 16:11:25 +0900 Subject: [PATCH] fix(glue): s3 path specified in --spark-event-logs-path needs to end with slash (#29357) ### Issue # (if applicable) Closes #29356. ### Reason for this change Currently S3 path specified in `--spark-event-logs-path` does not end with slash in case only bucket is provided but prefix is not provided. This parameter causes errors when viewing the event log through Spark UI / Spark history server. ### Description of changes Add trailing slash when it does not end with slash. ### Description of how you validated changes Completed unit test and integ test. ### Checklist - [x] My code adheres to the [CONTRIBUTING GUIDE](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) and [DESIGN GUIDELINES](https://github.com/aws/aws-cdk/blob/main/docs/DESIGN_GUIDELINES.md) ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license* --- packages/@aws-cdk/aws-glue-alpha/lib/job.ts | 2 +- .../aws-glue-job.assets.json | 4 +- .../aws-glue-job.template.json | 174 +++++++++++++- .../test/integ.job.js.snapshot/manifest.json | 20 +- .../test/integ.job.js.snapshot/tree.json | 225 +++++++++++++++++- .../@aws-cdk/aws-glue-alpha/test/integ.job.ts | 3 + .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 3 +- 7 files changed, 420 insertions(+), 11 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts index 8420d380bb675..813894f0b6898 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts @@ -826,7 +826,7 @@ export class Job extends JobBase { bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix)); const args = { '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(props.prefix), + '--spark-event-logs-path': bucket.s3UrlForObject(props.prefix).replace(/\/?$/, '/'), // path will always end with a slash }; return { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.assets.json index 4be1daf9e3601..79fcfa4848c78 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.assets.json @@ -40,7 +40,7 @@ } } }, - "4595f761e767da2650d387dfba7358d632f1da57744033b6023677b8bc150f15": { + "dd8261e4900491c4aaaec8bcd31c57b1f30085f1d579f19089bd55a4bdb023b7": { "source": { "path": "aws-glue-job.template.json", "packaging": "file" @@ -48,7 +48,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "4595f761e767da2650d387dfba7358d632f1da57744033b6023677b8bc150f15.json", + "objectKey": "dd8261e4900491c4aaaec8bcd31c57b1f30085f1d579f19089bd55a4bdb023b7.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.template.json index 6ed2d571b6a3e..a198186232829 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.template.json @@ -162,7 +162,8 @@ "s3://", { "Ref": "EtlJob20SparkUIBucketFD07FBD8" - } + }, + "/" ] ] }, @@ -267,6 +268,43 @@ "Properties": { "PolicyDocument": { "Statement": [ + { + "Action": [ + "s3:Abort*", + "s3:DeleteObject*", + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*", + "s3:PutObject", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectVersionTagging" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::GetAtt": [ + "StreamingJob20SparkUIBucket92EF3706", + "Arn" + ] + }, + { + "Fn::Join": [ + "", + [ + { + "Fn::GetAtt": [ + "StreamingJob20SparkUIBucket92EF3706", + "Arn" + ] + }, + "/*" + ] + ] + } + ] + }, { "Action": [ "s3:GetBucket*", @@ -319,6 +357,11 @@ ] } }, + "StreamingJob20SparkUIBucket92EF3706": { + "Type": "AWS::S3::Bucket", + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" + }, "StreamingJob20355B58C7": { "Type": "AWS::Glue::Job", "Properties": { @@ -340,6 +383,19 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-spark-ui": "true", + "--spark-event-logs-path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "StreamingJob20SparkUIBucket92EF3706" + }, + "/" + ] + ] + }, "arg1": "value1", "arg2": "value2" }, @@ -520,7 +576,8 @@ "s3://", { "Ref": "EtlJob30SparkUIBucket9D789346" - } + }, + "/" ] ] }, @@ -625,6 +682,43 @@ "Properties": { "PolicyDocument": { "Statement": [ + { + "Action": [ + "s3:Abort*", + "s3:DeleteObject*", + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*", + "s3:PutObject", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectVersionTagging" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::GetAtt": [ + "StreamingJob30SparkUIBucketEFBF52D3", + "Arn" + ] + }, + { + "Fn::Join": [ + "", + [ + { + "Fn::GetAtt": [ + "StreamingJob30SparkUIBucketEFBF52D3", + "Arn" + ] + }, + "/*" + ] + ] + } + ] + }, { "Action": [ "s3:GetBucket*", @@ -677,6 +771,11 @@ ] } }, + "StreamingJob30SparkUIBucketEFBF52D3": { + "Type": "AWS::S3::Bucket", + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" + }, "StreamingJob30E005FBEB": { "Type": "AWS::Glue::Job", "Properties": { @@ -698,6 +797,19 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-spark-ui": "true", + "--spark-event-logs-path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "StreamingJob30SparkUIBucketEFBF52D3" + }, + "/" + ] + ] + }, "arg1": "value1", "arg2": "value2" }, @@ -878,7 +990,8 @@ "s3://", { "Ref": "EtlJob40SparkUIBucket02F50B0D" - } + }, + "/" ] ] }, @@ -983,6 +1096,43 @@ "Properties": { "PolicyDocument": { "Statement": [ + { + "Action": [ + "s3:Abort*", + "s3:DeleteObject*", + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*", + "s3:PutObject", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectVersionTagging" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::GetAtt": [ + "StreamingJob40SparkUIBucketA97E24C6", + "Arn" + ] + }, + { + "Fn::Join": [ + "", + [ + { + "Fn::GetAtt": [ + "StreamingJob40SparkUIBucketA97E24C6", + "Arn" + ] + }, + "/*" + ] + ] + } + ] + }, { "Action": [ "s3:GetBucket*", @@ -1035,6 +1185,11 @@ ] } }, + "StreamingJob40SparkUIBucketA97E24C6": { + "Type": "AWS::S3::Bucket", + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" + }, "StreamingJob40E284A782": { "Type": "AWS::Glue::Job", "Properties": { @@ -1056,6 +1211,19 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-spark-ui": "true", + "--spark-event-logs-path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "StreamingJob40SparkUIBucketA97E24C6" + }, + "/" + ] + ] + }, "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/manifest.json index e0a0106226e31..1d980ef4d57bb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/4595f761e767da2650d387dfba7358d632f1da57744033b6023677b8bc150f15.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/dd8261e4900491c4aaaec8bcd31c57b1f30085f1d579f19089bd55a4bdb023b7.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ @@ -76,6 +76,12 @@ "data": "StreamingJob20ServiceRoleDefaultPolicy3CF74B2D" } ], + "/aws-glue-job/StreamingJob2.0/SparkUIBucket/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "StreamingJob20SparkUIBucket92EF3706" + } + ], "/aws-glue-job/StreamingJob2.0/Resource": [ { "type": "aws:cdk:logicalId", @@ -124,6 +130,12 @@ "data": "StreamingJob30ServiceRoleDefaultPolicy0C15D010" } ], + "/aws-glue-job/StreamingJob3.0/SparkUIBucket/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "StreamingJob30SparkUIBucketEFBF52D3" + } + ], "/aws-glue-job/StreamingJob3.0/Resource": [ { "type": "aws:cdk:logicalId", @@ -172,6 +184,12 @@ "data": "StreamingJob40ServiceRoleDefaultPolicy0667C434" } ], + "/aws-glue-job/StreamingJob4.0/SparkUIBucket/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "StreamingJob40SparkUIBucketA97E24C6" + } + ], "/aws-glue-job/StreamingJob4.0/Resource": [ { "type": "aws:cdk:logicalId", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/tree.json index 6d88a4cb8fd4a..91a8fd605214a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/tree.json @@ -264,7 +264,8 @@ "s3://", { "Ref": "EtlJob20SparkUIBucketFD07FBD8" - } + }, + "/" ] ] }, @@ -429,6 +430,43 @@ "aws:cdk:cloudformation:props": { "policyDocument": { "Statement": [ + { + "Action": [ + "s3:Abort*", + "s3:DeleteObject*", + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*", + "s3:PutObject", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectVersionTagging" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::GetAtt": [ + "StreamingJob20SparkUIBucket92EF3706", + "Arn" + ] + }, + { + "Fn::Join": [ + "", + [ + { + "Fn::GetAtt": [ + "StreamingJob20SparkUIBucket92EF3706", + "Arn" + ] + }, + "/*" + ] + ] + } + ] + }, { "Action": [ "s3:GetBucket*", @@ -498,6 +536,28 @@ "version": "0.0.0" } }, + "SparkUIBucket": { + "id": "SparkUIBucket", + "path": "aws-glue-job/StreamingJob2.0/SparkUIBucket", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job/StreamingJob2.0/SparkUIBucket/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::S3::Bucket", + "aws:cdk:cloudformation:props": {} + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.Bucket", + "version": "0.0.0" + } + }, "Resource": { "id": "Resource", "path": "aws-glue-job/StreamingJob2.0/Resource", @@ -522,6 +582,19 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-spark-ui": "true", + "--spark-event-logs-path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "StreamingJob20SparkUIBucket92EF3706" + }, + "/" + ] + ] + }, "arg1": "value1", "arg2": "value2" }, @@ -781,7 +854,8 @@ "s3://", { "Ref": "EtlJob30SparkUIBucket9D789346" - } + }, + "/" ] ] }, @@ -946,6 +1020,43 @@ "aws:cdk:cloudformation:props": { "policyDocument": { "Statement": [ + { + "Action": [ + "s3:Abort*", + "s3:DeleteObject*", + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*", + "s3:PutObject", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectVersionTagging" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::GetAtt": [ + "StreamingJob30SparkUIBucketEFBF52D3", + "Arn" + ] + }, + { + "Fn::Join": [ + "", + [ + { + "Fn::GetAtt": [ + "StreamingJob30SparkUIBucketEFBF52D3", + "Arn" + ] + }, + "/*" + ] + ] + } + ] + }, { "Action": [ "s3:GetBucket*", @@ -1015,6 +1126,28 @@ "version": "0.0.0" } }, + "SparkUIBucket": { + "id": "SparkUIBucket", + "path": "aws-glue-job/StreamingJob3.0/SparkUIBucket", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job/StreamingJob3.0/SparkUIBucket/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::S3::Bucket", + "aws:cdk:cloudformation:props": {} + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.Bucket", + "version": "0.0.0" + } + }, "Resource": { "id": "Resource", "path": "aws-glue-job/StreamingJob3.0/Resource", @@ -1039,6 +1172,19 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-spark-ui": "true", + "--spark-event-logs-path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "StreamingJob30SparkUIBucketEFBF52D3" + }, + "/" + ] + ] + }, "arg1": "value1", "arg2": "value2" }, @@ -1298,7 +1444,8 @@ "s3://", { "Ref": "EtlJob40SparkUIBucket02F50B0D" - } + }, + "/" ] ] }, @@ -1463,6 +1610,43 @@ "aws:cdk:cloudformation:props": { "policyDocument": { "Statement": [ + { + "Action": [ + "s3:Abort*", + "s3:DeleteObject*", + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*", + "s3:PutObject", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectVersionTagging" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::GetAtt": [ + "StreamingJob40SparkUIBucketA97E24C6", + "Arn" + ] + }, + { + "Fn::Join": [ + "", + [ + { + "Fn::GetAtt": [ + "StreamingJob40SparkUIBucketA97E24C6", + "Arn" + ] + }, + "/*" + ] + ] + } + ] + }, { "Action": [ "s3:GetBucket*", @@ -1532,6 +1716,28 @@ "version": "0.0.0" } }, + "SparkUIBucket": { + "id": "SparkUIBucket", + "path": "aws-glue-job/StreamingJob4.0/SparkUIBucket", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job/StreamingJob4.0/SparkUIBucket/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::S3::Bucket", + "aws:cdk:cloudformation:props": {} + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.Bucket", + "version": "0.0.0" + } + }, "Resource": { "id": "Resource", "path": "aws-glue-job/StreamingJob4.0/Resource", @@ -1556,6 +1762,19 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-spark-ui": "true", + "--spark-event-logs-path": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "StreamingJob40SparkUIBucketA97E24C6" + }, + "/" + ] + ] + }, "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts index 7b405d53ca490..91bf9bab212fc 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts @@ -71,6 +71,9 @@ const moduleUtils = glue.Code.fromAsset(path.join(__dirname, 'module', 'utils.zi arg1: 'value1', arg2: 'value2', }, + sparkUI: { + enabled: true, + }, tags: { key: 'value', }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts index 48e395a3fedc9..0e6db582c1d71 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts @@ -542,6 +542,7 @@ describe('Job', () => { { Ref: 'JobSparkUIBucket8E6A0139', }, + '/', ], ], }, @@ -625,7 +626,7 @@ describe('Job', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { DefaultArguments: { '--enable-spark-ui': 'true', - '--spark-event-logs-path': `s3://${sparkUIBucketName}`, + '--spark-event-logs-path': `s3://${sparkUIBucketName}/`, }, }); });