Skip to content

Commit

Permalink
ASN.1 changes
Browse files Browse the repository at this point in the history
Signed-off-by: Clemens Vasters <clemens@vasters.com>
  • Loading branch information
clemensv committed Feb 16, 2024
1 parent 57f0776 commit 8092a9c
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 26 deletions.
57 changes: 35 additions & 22 deletions avrotize/asn1toavro.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import asn1tools
from asn1tools.codecs.ber import Sequence, SequenceOf, Integer, Boolean, Enumerated, OctetString, IA5String, UTF8String
from asn1tools.codecs.ber import Sequence, SequenceOf, Integer, Boolean, Enumerated, OctetString, IA5String, UTF8String, Date, Real
import json

def asn1_type_to_avro_type(asn1_type, avro_schema):
def asn1_type_to_avro_type(asn1_type, avro_schema, namespace):
"""Convert an ASN.1 type to an Avro type."""
avro_type = None
if isinstance(asn1_type,Integer):
avro_type = 'int'
elif isinstance(asn1_type, Boolean):
avro_type = 'boolean'
elif isinstance(asn1_type, Enumerated):
symbols = [member['name'] for member in asn1_type['members']]
avro_type = {'type': 'enum', 'symbols': symbols, 'name': 'EnumType'}
symbols = [member for member in asn1_type.data_to_value.keys()]
avro_type = {
'type': 'enum',
'name': asn1_type.type_name,
'namespace': namespace,
'symbols': symbols
}
elif isinstance(asn1_type, Sequence):
if avro_schema and next((s for s in avro_schema if s['name'] == asn1_type.type_name), None):
return asn1_type.type_name
Expand All @@ -20,32 +25,46 @@ def asn1_type_to_avro_type(asn1_type, avro_schema):
for member in asn1_type.root_members:
fields.append({
'name': member.name,
'type': asn1_type_to_avro_type(member, avro_schema)
'type': asn1_type_to_avro_type(member, avro_schema, namespace)
})
avro_type = {'type': 'record', 'name': asn1_type.name, 'fields': fields}
avro_type = {
'type': 'record',
'name': asn1_type.type_name if asn1_type.type_name != 'SEQUENCE' else asn1_type.name,
'namespace': namespace,
'fields': fields}
elif isinstance(asn1_type, SequenceOf):
item_type = asn1_type_to_avro_type(asn1_type['element'])
avro_type = {'type': 'array', 'items': item_type}
item_type = asn1_type_to_avro_type(asn1_type.element_type, avro_schema, namespace)
avro_type = {
'type': 'array',
'namespace': namespace,
'items': item_type
}
elif isinstance(asn1_type, OctetString) and asn1_type.type == 'OCTET STRING':
avro_type = 'bytes'
elif isinstance(asn1_type, IA5String) or isinstance(asn1_type, UTF8String):
avro_type = 'string'
elif isinstance(asn1_type, Date):
avro_type = {'type': 'int', 'logicalType': 'date'}
elif isinstance(asn1_type, Real):
avro_type = 'double'

if len(avro_schema) > 0 and 'name' in avro_type:
existing_type = next((t for t in avro_schema if t.get('name') == avro_type['name'] and t.get('namespace') == avro_type.get('namespace') ), None)
if existing_type:
return existing_type.get('name')

return avro_type

def convert_asn1_to_avro_schema(asn1_spec_path):
"""Convert ASN.1 specification to Avro schema."""

spec = asn1tools.compile_files(asn1_spec_path)
avro_schema = []
for module_name, module in spec.modules.items():
for type_name, asn1_type in module.items():
avro_type = asn1_type_to_avro_type(asn1_type.type, avro_schema)
if avro_type:
avro_schema.append({
'namespace': module_name,
'type': avro_type if isinstance(avro_type, str) else avro_type['type'],
'name': type_name,
**({'fields': avro_type['fields']} if isinstance(avro_type, dict) and 'fields' in avro_type else {})
})
avro_type = asn1_type_to_avro_type(asn1_type.type, avro_schema, module_name)
if avro_type and not isinstance(avro_type, str):
avro_schema.append(avro_type)

if len(avro_schema) == 1:
return avro_schema[0]
Expand All @@ -56,9 +75,3 @@ def convert_asn1_to_avro(asn1_spec_path, avro_file_path):
avro_schema = convert_asn1_to_avro_schema(asn1_spec_path)
with open(avro_file_path, 'w') as file:
json.dump(avro_schema, file, indent=4)

# Example usage:
# asn1_spec_path = 'path/to/your/asn1_spec.asn'
# output_file_path = 'path/to/save/avro_schema.json'
# avro_schema = convert_asn1_to_avro(asn1_spec_path)
# save_avro_schema_to_file(avro_schema, output_file_path)
6 changes: 3 additions & 3 deletions avrotize/avrotize.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@ def main():
a2k_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True)
a2k_parser.add_argument('--kusto', type=str, help='Path to the Kusto table', required=True)
a2k_parser.add_argument('--record_type', type=str, help='Record type in the Avro schema', required=False)
a2k_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Kusto table')
a2k_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Kusto table', default=False)

a2tsql_parser = subparsers.add_parser('a2tsql', help='Convert Avro schema to T-SQL schema')
a2tsql_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True)
a2tsql_parser.add_argument('--tsql', type=str, help='Path to the T-SQL table', required=True)
a2tsql_parser.add_argument('--record_type', type=str, help='Record type in the Avro schema', required=False)
a2tsql_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the T-SQL table')
a2tsql_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the T-SQL table', default=False)

a2pq_parser = subparsers.add_parser('a2pq', help='Convert Avro schema to Parquet schema')
a2pq_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True)
a2pq_parser.add_argument('--parquet', type=str, help='Path to the Parquet file', required=True)
a2pq_parser.add_argument('--record_type', type=str, help='Record type in the Avro schema', required=False)
a2pq_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Parquet file')
a2pq_parser.add_argument('--emit_cloud_events_columns', action='store_true', help='Add CloudEvents columns to the Parquet file', default=False)

asn2a_parser = subparsers.add_parser('asn2a', help='Convert ASN.1 schema to Avro schema')
asn2a_parser.add_argument('--asn', type=str, help='Path to the ASN.1 schema file', required=True)
Expand Down
53 changes: 53 additions & 0 deletions test/asn1/movie.asn
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
MovieSchema DEFINITIONS AUTOMATIC TAGS ::= BEGIN

-- Defines the structure for a movie within the schema
Movie ::= SEQUENCE {
title UTF8String (SIZE (1..100)), -- Movie title with a length restriction for brevity and clarity
director Person, -- Reference to the Person structure, specifying the movie's director
releaseDate DATE, -- The official release date of the movie
genre Genre OPTIONAL, -- Genre of the movie, chosen from a predefined list
duration Duration, -- Duration of the movie, including hours and minutes
rating MovieRating OPTIONAL, -- The movie's rating, indicating suitability for different audiences
cast SEQUENCE SIZE (0..50) OF Person OPTIONAL, -- A list of cast members, limited to 50 to avoid excessive length
productionDetails ProductionDetails OPTIONAL -- Additional details about the movie's production
}

-- Represents an individual involved in the movie, either in cast or crew
Person ::= SEQUENCE {
name UTF8String (SIZE (1..100)), -- The person's name, with length restrictions
role UTF8String OPTIONAL, -- The person's role in the movie, e.g., actor, producer (optional)
birthDate DATE OPTIONAL, -- The person's birth date (optional)
nationality UTF8String OPTIONAL -- The person's nationality (optional)
}

-- Enumerated list of possible movie genres
Genre ::= ENUMERATED {
action (0), -- Action genre
comedy (1), -- Comedy genre
drama (2), -- Drama genre
scienceFiction (3), -- Science Fiction genre
documentary (4), -- Documentary genre
thriller (5), -- Thriller genre
horror (6) -- Horror genre
}

-- Structure defining the duration of the movie
Duration ::= SEQUENCE {
hours INTEGER (0..99), -- Number of hours, with a maximum for practicality
minutes INTEGER (0..59) -- Number of minutes, constrained to valid time representation
}

-- Structure for the movie's rating, including category and age restrictions
MovieRating ::= SEQUENCE {
category UTF8String, -- The rating category (e.g., PG, PG-13, R)
minimumAge INTEGER OPTIONAL -- The minimum age recommended to watch the movie (optional)
}

-- Details about the production of the movie
ProductionDetails ::= SEQUENCE {
productionCompany UTF8String, -- The name of the company that produced the movie
budget INTEGER OPTIONAL, -- The total budget of the movie in USD (optional)
filmingLocations SEQUENCE OF UTF8String OPTIONAL -- List of locations where the movie was filmed (optional)
}

END
11 changes: 10 additions & 1 deletion test/test_asn1toavro.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from avrotize.asn1toavro import convert_asn1_to_avro

class TestAsn1ToAvro(unittest.TestCase):
def test_convert_address_jsons_to_avro(self):
def test_convert_address_asn_to_avro(self):
cwd = os.getcwd()
asn1_path = os.path.join(cwd, "test", "asn1", "person.asn")
avro_path = os.path.join(cwd, "test", "tmp", "personasn.avsc")
Expand All @@ -20,3 +20,12 @@ def test_convert_address_jsons_to_avro(self):
os.makedirs(dir)

convert_asn1_to_avro(asn1_path, avro_path)

def test_convert_movie_asn_to_avro(self):
cwd = os.getcwd()
asn1_path = os.path.join(cwd, "test", "asn1", "movie.asn")
avro_path = os.path.join(cwd, "test", "tmp", "movieasn.avsc")
dir = os.path.dirname(avro_path)
if not os.path.exists(dir):
os.makedirs(dir)
convert_asn1_to_avro(asn1_path, avro_path)

0 comments on commit 8092a9c

Please sign in to comment.