added workaround for glue tables

aws-samples · Aug 16, 2024 · 63708b1 · 63708b1
1 parent db838eb
commit 63708b1
Show file tree

Hide file tree

Showing 6 changed files with 76 additions and 32 deletions.
diff --git a/.python/common.py b/.python/common.py
@@ -392,9 +392,15 @@ def tfplan3():
    x = glob.glob("import__*.tf")
    impf=len(x)
    if awsf != impf:
-      print("awsf="+str(awsf)+" impf="+str(impf))
-      print("ERROR: "+str(awsf)+ " aws_*.tf and " + str(impf) +" import__*.tf file counts do not match - exiting")
-      #exit()
+      if globals.workaround=="":
+         print("ERROR: "+str(awsf)+ "x aws_*.tf and " + str(impf) +"x import__*.tf file counts do not match - exiting")
+         print("\nLikely import error - do the following and report errors in github issue:")
+         print("cd "+globals.path1)
+         print("terraform plan -generate-config-out=resources.out")
+         exit()
+      else:
+         print("INFO: "+str(awsf)+ "x aws_*.tf and " + str(impf) +"x import__*.tf file counts do not match")
+         print("INFO: Continuing due to workaround "+globals.workaround)
    else:
       print("PASSED: aws_*.tf and import__*.tf file counts match")
 
@@ -517,6 +523,13 @@ def tfplan3():
          print("PASSED: import count = file counts =",str(zeroi))
       else:
          print("INFO: import count "+str(zeroi) +" != file counts "+ str(awsf))
+         if globals.workaround=="":
+            print("\nLikely import error - do the following and report errors in github issue")
+            print("cd "+globals.path1)
+            print("terraform plan -generate-config-out=resources.out")
+            exit()
+         else:
+            print("INFO: Continuing due to workaround "+globals.workaround)
 
    if globals.merge:
          print("Merge check")

diff --git a/.python/fixtf_aws_resources/aws_no_import.py b/.python/fixtf_aws_resources/aws_no_import.py
@@ -171,6 +171,6 @@
     "aws_rds_cluster_activity_stream": True, # issues with import identifier
     "aws_ebs_default_kms_key": True, # do we need to import ?
     "aws_rds_custom_db_engine_version": True, # import crashed provider
-    "aws_glue_catalog_table": True,   # Error: setting storage_descriptor: Invalid address to set: []string{"storage_descriptor", "0", "additional_locations"}
-    "aws_glue_partition": True # as child to glue table
+    #"aws_glue_catalog_table": True,   # Error: setting storage_descriptor: Invalid address to set: []string{"storage_descriptor", "0", "additional_locations"}
+    #"aws_glue_partition": True # as child to glue table
 } 
diff --git a/.python/get_aws_resources/aws_glue.py b/.python/get_aws_resources/aws_glue.py
@@ -48,35 +48,54 @@ def get_aws_glue_catalog_table(type, id, clfn, descfn, topkey, key, filterid):
     if globals.debug:
         print("--> In get_aws_glue_catalog_table  doing " + type + ' with id ' + str(id) +
               " clfn="+clfn+" descfn="+descfn+" topkey="+topkey+" key="+key+" filterid="+filterid)
-        
+
     try:
-        response = []
-        client = boto3.client(clfn)
+        print("WORKAROUND: Traditional terraform import for glue tables as import errors with new method currently - This operation can be slow if many resources are involved.")
+        globals.workaround=type
         if id is None:
-            print("WARNING: ID cannot be None - must pass catalog:database or catalog:database:tablename" )
-
-        else:     
-            ## Do have table name
-            cc=id.count(':')
-            if cc==0:
-                print("WARNING: ID - must pass catalog:database or catalog:database:tablename" )
-                return True
-            if cc == 1:
-                catalogn=id.split(':')[0]
-                databasen=id.split(':')[1]
-            if cc == 2:
-                catalogn=id.split(':')[0]
-                databasen=id.split(':')[1]
-                tabnam=id.split(':')[2]
+                print("WARNING: ID cannot be None - must pass catalog:database or catalog:database:tablename" )
+        cc=id.count(':')
+        if cc==0:
+                    print("WARNING: ID - must pass catalog:database or catalog:database:tablename" )
+                    return True
+        if cc == 1:
+                    catalogn=id.split(':')[0]
+                    databasen=id.split(':')[1]
+                    com="../../.scripts/get-glue-table.sh "+catalogn+" "+databasen
+        if cc == 2:
+                    catalogn=id.split(':')[0]
+                    databasen=id.split(':')[1]
+                    tabnam=id.split(':')[2]
+                    com="../../.scripts/get-glue-table.sh "+catalogn+" "+databasen+" "+tabnam
+        #print("Running "+com)
+
+        rout=common.rc(com)
+        print(rout.stderr.decode())
+        print(rout.stdout.decode())
+
+        tn=rout.stdout.decode('utf-8').rstrip().split("PARTITION:")[1]
+
+        if tn != "NOTABLE99-99":
+            pkey=catalogn+":"+databasen+":"+tn
+            common.add_dependancy("aws_glue_partition", pkey)
+
+        tkey="aws_glue_catalog_table"+"."+catalogn+":"+databasen
+                #print("Setting True "+tkey)
+        globals.rproc[tkey]=True
+
 
-            ## Do have table name
-            if cc == 1:
+        return True
+
+        response = []
+        client = boto3.client(clfn)
+
+        if cc == 1:
                 response = client.get_tables(CatalogId=catalogn,DatabaseName=databasen)
-            if cc == 2:
+        if cc == 2:
                 response = client.get_tables(CatalogId=catalogn,DatabaseName=databasen,Expression=tabnam)
 
-            if response == []: print("Empty response for "+type+ " id="+str(id)+" returning"); return True
-            for j in response[topkey]:
+        if response == []: print("Empty response for "+type+ " id="+str(id)+" returning"); return True
+        for j in response[topkey]:
             #Terraform import id = "123456789012:MyDatabase:MyTable"
                 pkey=catalogn+":"+databasen+":"+j[key]
                 tfid="d-"+pkey.replace(":","__")
@@ -85,9 +104,9 @@ def get_aws_glue_catalog_table(type, id, clfn, descfn, topkey, key, filterid):
                 common.add_dependancy("aws_glue_partition",pkey)
 
             # set dependency false
-            tkey="aws_glue_catalog_table"+"."+catalogn+":"+databasen
+        tkey="aws_glue_catalog_table"+"."+catalogn+":"+databasen
             #print("Setting True "+tkey)
-            globals.rproc[tkey]=True
+        globals.rproc[tkey]=True
 
     except Exception as e:
         common.handle_error(e,str(inspect.currentframe().f_code.co_name),clfn,descfn,topkey,id)

diff --git a/.python/globals.py b/.python/globals.py
@@ -42,6 +42,7 @@
 secvid=""
 
 meshname=""
+workaround=""
 
 
 #specials=["aws_iam_role_policy","aws_route_table_association","aws_iam_policy","aws_iam_policy_attchment",

diff --git a/README.md b/README.md
@@ -31,13 +31,14 @@ Finally aws2tf runs a `terraform plan` command and there should hopefully be no
 + boto3 1.34.93 or later (pip3 install -r requirements.txt).
 + AWS cli (v2) **version 2.17.0 or higher** needs to be installed and you need a login with at least "Read" privileges.
 + Terraform **version v1.7.5** or higher needs to be installed. (recommend you avoid early point releases eg. 1.9.0/1.9.1)
++ jq **version 1.6 or higher**
 
 ## Optional but recommended
 
 + pyenv - to help manage Python versions and environments (https://github.com/pyenv/pyenv)
 + tfenv - - to help manage multiple Terraform versions (https://github.com/tfutils/tfenv)
 + trivy **version 0.48.0 or later**  (https://aquasecurity.github.io/trivy/v0.54/)
-+ jq **version 1.6 or higher**
+
 
 
 (*This tool is currently developed/tested using Python 3.9.16 on macOS 14.6.1*)

diff --git a/aws2tf.py b/aws2tf.py
@@ -137,6 +137,7 @@ def build_lists():
 
     globals.region = region
     globals.regionl = len(region)
+    os.environ["AWS"] = "aws --region "+region+" "
 
     # get the current env and set directory
 
@@ -361,5 +362,14 @@ def build_lists():
     com = "sort -u pyprocessed.txt -o pyprocessed.txt"
     rout = common.rc(com)
 
-    print("\nTerraform files & state in sub-directory: "+ globals.path1+"\n")
+    print("\nTerraform files & state in sub-directory: "+ globals.path1)
+
+#### Trivy - if installed
+# 
+#        
+    com = "../../.scripts/trivy-check.sh"
+    rout = common.rc(com)  
+    print(rout.stdout.decode())
+
+
     exit(0)