ASN.1 changes

Signed-off-by: Clemens Vasters <clemens@vasters.com>
clemensv · Feb 16, 2024 · 8092a9c · 8092a9c
1 parent 57f0776
commit 8092a9c
Show file tree

Hide file tree

Showing 4 changed files with 101 additions and 26 deletions.
diff --git a/avrotize/asn1toavro.py b/avrotize/asn1toavro.py
@@ -1,17 +1,22 @@
 import asn1tools
-from asn1tools.codecs.ber import Sequence, SequenceOf, Integer, Boolean, Enumerated, OctetString, IA5String, UTF8String
+from asn1tools.codecs.ber import Sequence, SequenceOf, Integer, Boolean, Enumerated, OctetString, IA5String, UTF8String, Date, Real
 import json
 
-def asn1_type_to_avro_type(asn1_type, avro_schema):
+def asn1_type_to_avro_type(asn1_type, avro_schema, namespace):
     """Convert an ASN.1 type to an Avro type."""
     avro_type = None
     if isinstance(asn1_type,Integer):
         avro_type = 'int'
     elif isinstance(asn1_type, Boolean):
         avro_type = 'boolean'
     elif isinstance(asn1_type, Enumerated):
-        symbols = [member['name'] for member in asn1_type['members']]
-        avro_type = {'type': 'enum', 'symbols': symbols, 'name': 'EnumType'}
+        symbols = [member for member in asn1_type.data_to_value.keys()]
+        avro_type = { 
+            'type': 'enum', 
+            'name': asn1_type.type_name,
+            'namespace': namespace, 
+            'symbols': symbols
+            }
     elif isinstance(asn1_type, Sequence):
         if avro_schema and next((s for s in avro_schema if s['name'] == asn1_type.type_name), None):
             return asn1_type.type_name
@@ -20,32 +25,46 @@ def asn1_type_to_avro_type(asn1_type, avro_schema):
             for member in asn1_type.root_members:
                 fields.append({
                     'name': member.name,
-                    'type': asn1_type_to_avro_type(member, avro_schema)
+                    'type': asn1_type_to_avro_type(member, avro_schema, namespace)
                 })
-            avro_type = {'type': 'record', 'name': asn1_type.name, 'fields': fields}
+            avro_type = {
+                'type': 'record', 
+                'name': asn1_type.type_name if asn1_type.type_name != 'SEQUENCE' else asn1_type.name, 
+                'namespace': namespace,
+                'fields': fields}
     elif isinstance(asn1_type, SequenceOf):
-        item_type = asn1_type_to_avro_type(asn1_type['element'])
-        avro_type = {'type': 'array', 'items': item_type}
+        item_type = asn1_type_to_avro_type(asn1_type.element_type, avro_schema, namespace)
+        avro_type = {
+            'type': 'array',
+            'namespace': namespace, 
+            'items': item_type
+            }
     elif isinstance(asn1_type, OctetString) and asn1_type.type == 'OCTET STRING':
         avro_type = 'bytes'
     elif isinstance(asn1_type, IA5String) or isinstance(asn1_type, UTF8String):
         avro_type = 'string'
+    elif isinstance(asn1_type, Date):
+        avro_type = {'type': 'int', 'logicalType': 'date'}
+    elif isinstance(asn1_type, Real):
+        avro_type = 'double'
+
+    if len(avro_schema) > 0 and 'name' in avro_type:
+        existing_type = next((t for t in avro_schema if t.get('name') == avro_type['name'] and t.get('namespace') == avro_type.get('namespace') ), None)
+        if existing_type:
+            return existing_type.get('name')
+
     return avro_type
 
 def convert_asn1_to_avro_schema(asn1_spec_path):
     """Convert ASN.1 specification to Avro schema."""
+
     spec = asn1tools.compile_files(asn1_spec_path)
     avro_schema = []
     for module_name, module in spec.modules.items():
         for type_name, asn1_type in module.items():
-            avro_type = asn1_type_to_avro_type(asn1_type.type, avro_schema)
-            if avro_type:
-                avro_schema.append({
-                    'namespace': module_name,
-                    'type': avro_type if isinstance(avro_type, str) else avro_type['type'],
-                    'name': type_name,
-                    **({'fields': avro_type['fields']} if isinstance(avro_type, dict) and 'fields' in avro_type else {})
-                })
+            avro_type = asn1_type_to_avro_type(asn1_type.type, avro_schema, module_name)
+            if avro_type and not isinstance(avro_type, str):
+                avro_schema.append(avro_type)
 
     if len(avro_schema) == 1:
         return avro_schema[0]
@@ -56,9 +75,3 @@ def convert_asn1_to_avro(asn1_spec_path, avro_file_path):
     avro_schema = convert_asn1_to_avro_schema(asn1_spec_path)
     with open(avro_file_path, 'w') as file:
         json.dump(avro_schema, file, indent=4)
-
-# Example usage:
-# asn1_spec_path = 'path/to/your/asn1_spec.asn'
-# output_file_path = 'path/to/save/avro_schema.json'
-# avro_schema = convert_asn1_to_avro(asn1_spec_path)
-# save_avro_schema_to_file(avro_schema, output_file_path)
diff --git a/avrotize/avrotize.py b/avrotize/avrotize.py
@@ -36,19 +36,19 @@ def main():
     a2k_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True)
     a2k_parser.add_argument('--kusto', type=str, help='Path to the Kusto table', required=True)
     a2k_parser.add_argument('--record_type', type=str, help='Record type in the Avro schema', required=False)
-    a2k_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Kusto table')
+    a2k_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Kusto table', default=False)
 
     a2tsql_parser = subparsers.add_parser('a2tsql', help='Convert Avro schema to T-SQL schema')
     a2tsql_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True)
     a2tsql_parser.add_argument('--tsql', type=str, help='Path to the T-SQL table', required=True)
     a2tsql_parser.add_argument('--record_type', type=str, help='Record type in the Avro schema', required=False)
-    a2tsql_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the T-SQL table')
+    a2tsql_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the T-SQL table', default=False)
 
     a2pq_parser = subparsers.add_parser('a2pq', help='Convert Avro schema to Parquet schema')
     a2pq_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True)
     a2pq_parser.add_argument('--parquet', type=str, help='Path to the Parquet file', required=True)
     a2pq_parser.add_argument('--record_type', type=str, help='Record type in the Avro schema', required=False)
-    a2pq_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Parquet file')
+    a2pq_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Parquet file', default=False)
 
     asn2a_parser = subparsers.add_parser('asn2a', help='Convert ASN.1 schema to Avro schema')
     asn2a_parser.add_argument('--asn', type=str, help='Path to the ASN.1 schema file', required=True)

diff --git a/test/asn1/movie.asn b/test/asn1/movie.asn
@@ -0,0 +1,53 @@
+MovieSchema DEFINITIONS AUTOMATIC TAGS ::= BEGIN
+
+-- Defines the structure for a movie within the schema
+Movie ::= SEQUENCE {
+    title UTF8String (SIZE (1..100)), -- Movie title with a length restriction for brevity and clarity
+    director Person, -- Reference to the Person structure, specifying the movie's director
+    releaseDate DATE, -- The official release date of the movie
+    genre Genre OPTIONAL, -- Genre of the movie, chosen from a predefined list
+    duration Duration, -- Duration of the movie, including hours and minutes
+    rating MovieRating OPTIONAL, -- The movie's rating, indicating suitability for different audiences
+    cast SEQUENCE SIZE (0..50) OF Person OPTIONAL, -- A list of cast members, limited to 50 to avoid excessive length
+    productionDetails ProductionDetails OPTIONAL -- Additional details about the movie's production
+}
+
+-- Represents an individual involved in the movie, either in cast or crew
+Person ::= SEQUENCE {
+    name UTF8String (SIZE (1..100)), -- The person's name, with length restrictions
+    role UTF8String OPTIONAL, -- The person's role in the movie, e.g., actor, producer (optional)
+    birthDate DATE OPTIONAL, -- The person's birth date (optional)
+    nationality UTF8String OPTIONAL -- The person's nationality (optional)
+}
+
+-- Enumerated list of possible movie genres
+Genre ::= ENUMERATED {
+    action (0), -- Action genre
+    comedy (1), -- Comedy genre
+    drama (2), -- Drama genre
+    scienceFiction (3), -- Science Fiction genre
+    documentary (4), -- Documentary genre
+    thriller (5), -- Thriller genre
+    horror (6) -- Horror genre
+}
+
+-- Structure defining the duration of the movie
+Duration ::= SEQUENCE {
+    hours INTEGER (0..99), -- Number of hours, with a maximum for practicality
+    minutes INTEGER (0..59) -- Number of minutes, constrained to valid time representation
+}
+
+-- Structure for the movie's rating, including category and age restrictions
+MovieRating ::= SEQUENCE {
+    category UTF8String, -- The rating category (e.g., PG, PG-13, R)
+    minimumAge INTEGER OPTIONAL -- The minimum age recommended to watch the movie (optional)
+}
+
+-- Details about the production of the movie
+ProductionDetails ::= SEQUENCE {
+    productionCompany UTF8String, -- The name of the company that produced the movie
+    budget INTEGER OPTIONAL, -- The total budget of the movie in USD (optional)
+    filmingLocations SEQUENCE OF UTF8String OPTIONAL -- List of locations where the movie was filmed (optional)
+}
+
+END
diff --git a/test/test_asn1toavro.py b/test/test_asn1toavro.py
@@ -11,7 +11,7 @@
 from avrotize.asn1toavro import convert_asn1_to_avro
 
 class TestAsn1ToAvro(unittest.TestCase):
-    def test_convert_address_jsons_to_avro(self):
+    def test_convert_address_asn_to_avro(self):
         cwd = os.getcwd()        
         asn1_path = os.path.join(cwd, "test", "asn1", "person.asn")
         avro_path = os.path.join(cwd, "test", "tmp", "personasn.avsc")
@@ -20,3 +20,12 @@ def test_convert_address_jsons_to_avro(self):
             os.makedirs(dir)
 
         convert_asn1_to_avro(asn1_path, avro_path)           
+
+    def test_convert_movie_asn_to_avro(self):
+        cwd = os.getcwd()        
+        asn1_path = os.path.join(cwd, "test", "asn1", "movie.asn")
+        avro_path = os.path.join(cwd, "test", "tmp", "movieasn.avsc")
+        dir = os.path.dirname(avro_path)
+        if not os.path.exists(dir):
+            os.makedirs(dir)
+        convert_asn1_to_avro(asn1_path, avro_path)