diff --git a/README.md b/README.md index 7cec28f..b671f72 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,12 @@ You can use the tool to convert between Avro Schema and other schema formats like JSON Schema, XML Schema (XSD), Protocol Buffers (Protobuf), ASN.1, and database schema formats like Kusto Data Table Definition (KQL) and T-SQL Table Definition (SQL). That means you can also convert from JSON Schema to Protobuf -going via Avro Schema. +going via Avro Schema. + +You can also generate C# and Java code from the Avro Schema documents with +Avrotize. The difference to the native Avto tools is that Avrotize can emit +data classes without Avro library dependencies and, optionally, with annotations +for JSON serialization libraries like Jackson or System.Text.Json. The tool does not convert data (instances of schemas), only the data structure definitions. @@ -126,6 +131,11 @@ Converting from Avro Schema: - [`avrotize a2tsql`](#convert-avro-schema-to-t-sql-table-definition) - Convert Avro schema to T-SQL table definition. - [`avrotize a2pq`](#convert-avro-schema-to-empty-parquet-file) - Convert Avro schema to empty Parquet file. +Generate code from Avro Schema: + +- [`avrotize a2csharp`](#generate-c-code-from-avro-schema) - Generate C# code from Avro schema. +- [`avrotize a2java`](#generate-java-code-from-avro-schema) - Generate Java code from Avro schema. + ### Convert Proto schema to Avro schema ```bash @@ -392,6 +402,60 @@ Conversion notes: to structures, not to Parquet unions since those are not supported by the PyArrow library used here. +### Generate C# code from Avro schema + +```bash +avrotize a2csharp --avsc --csharp [--avro-annotation] [--system-text-json-annotation] [--newtonsoft-json-annotation] [--pascal-properties] +``` + +Parameters: +- `--avsc`: The path to the Avro schema file to be converted. +- `--csharp`: The path to the C# directory to write the conversion result to. +- `--avro-annotation`: (optional) If set, the tool will add Avro annotations to the C# classes. +- `--system-text-json-annotation`: (optional) If set, the tool will add System.Text.Json annotations to the C# classes. +- `--newtonsoft-json-annotation`: (optional) If set, the tool will add Newtonsoft.Json annotations to the C# classes. +- `--pascal-properties`: (optional) If set, the tool will use PascalCase properties in the C# classes. + +Conversion notes: +- The tool generates C# classes that represent the Avro schema as data classes. +- Using the `--system-text-json-annotation` or `--newtonsoft-json-annotation` option + will add annotations for the respective JSON serialization library to the generated + C# classes. Because the [`JSON Schema to Avro`](#convert-json-schema-to-avro-schema) conversion generally + preserves the JSON Schema structure in the Avro schema, the generated C# classes + can be used to serialize and deserialize data that is valid per the input JSON schema. +- The classes are generated into a directory structure that reflects the Avro namespace + structure. The tool drops a minimal, default `.csproj` project file into the given + directory if none exists. + + +### Generate Java code from Avro schema + +```bash +avrotize a2java --avsc --java [--package ] [--avro-annotation] [--jackson-annotation] [--pascal-properties] +``` + +Parameters: +- `--avsc`: The path to the Avro schema file to be converted. +- `--java`: The path to the Java directory to write the conversion result to. +- `--package`: (optional) The Java package name to use in the generated Java classes. +- `--avro-annotation`: (optional) If set, the tool will add Avro annotations to the Java classes. +- `--jackson-annotation`: (optional) If set, the tool will add Jackson annotations to the Java classes. +- `--pascal-properties`: (optional) If set, the tool will use PascalCase properties in the Java classes. + +Conversion notes: + +- The tool generates Java classes that represent the Avro schema as data classes. +- Using the `--jackson-annotation` option will add annotations for the Jackson + JSON serialization library to the generated Java classes. Because the + [`JSON Schema to Avro`](#convert-json-schema-to-avro-schema) conversion generally + preserves the JSON Schema structure in the Avro schema, the generated Java classes + can be used to serialize and deserialize data that is valid per the input JSON schema. +- The directory `/src/main/java` is created in the specified directory and the + generated Java classes are written to this directory. The tool drops a + minimal, default `pom.xml` Maven project file into the given directory if none + exists. + + ## Contributing Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/avrotize/avrotize.py b/avrotize/avrotize.py index aff4d27..b64f4f8 100644 --- a/avrotize/avrotize.py +++ b/avrotize/avrotize.py @@ -1,9 +1,10 @@ import argparse from avrotize.asn1toavro import convert_asn1_to_avro +from avrotize.avrotocsharp import convert_avro_to_csharp +from avrotize.avrotojava import convert_avro_to_java from avrotize.avrotojsons import convert_avro_to_json_schema from avrotize.avrotokusto import convert_avro_to_kusto from avrotize.avrotoparquet import convert_avro_to_parquet - from avrotize.avrotoproto import convert_avro_to_proto from avrotize.avrototsql import convert_avro_to_tsql from avrotize.jsonstoavro import convert_jsons_to_avro @@ -70,6 +71,23 @@ def main(): kstruct2a_parser = subparsers.add_parser('kstruct2a', help='Convert Kafka Struct to Avro schema') kstruct2a_parser.add_argument('--kstruct', type=str, help='Path to the Kafka Struct file', required=True) kstruct2a_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True) + + a2csharp_parser = subparsers.add_parser('a2csharp', help='Convert Avro schema to C# classes') + a2csharp_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True) + a2csharp_parser.add_argument('--csharp', type=str, help='Output path for the C# classes', required=True) + a2csharp_parser.add_argument('--avro-annotation', action='store_true', help='Use Avro annotations', default=False) + a2csharp_parser.add_argument('--system-text-json-annotation', action='store_true', help='Use System.Text.Json annotations', default=False) + a2csharp_parser.add_argument('--newtonsoft-json-annotation', action='store_true', help='Use Newtonsoft.Json annotations', default=False) + a2csharp_parser.add_argument('--pascal-properties', action='store_true', help='Use PascalCase properties', default=False) + + a2java_parser = subparsers.add_parser('a2java', help='Convert Avro schema to Java classes') + a2java_parser.add_argument('--avsc', type=str, help='Path to the Avro schema file', required=True) + a2java_parser.add_argument('--java', type=str, help='Output path for the Java classes', required=True) + a2java_parser.add_argument('--package', type=str, help='Java package name', required=False) + a2java_parser.add_argument('--avro-annotation', action='store_true', help='Use Avro annotations', default=False) + a2java_parser.add_argument('--jackson-annotation', action='store_true', help='Use Jackson annotations', default=False) + a2java_parser.add_argument('--pascal-properties', action='store_true', help='Use PascalCase properties', default=False) + args = parser.parse_args() if args.command is None: @@ -142,6 +160,24 @@ def main(): avro_schema_path = args.avsc print(f'Converting Kafka Struct {kstruct_file_path} to Avro {avro_schema_path}') convert_kafka_struct_to_avro_schema(kstruct_file_path, avro_schema_path) + elif args.command == 'a2csharp': + avro_schema_path = args.avsc + csharp_path = args.csharp + avro_annotation = args.avro_annotation + system_text_json_annotation = args.system_text_json_annotation + newtonsoft_json_annotation = args.newtonsoft_json_annotation + pascal_properties = args.pascal_properties + print(f'Converting Avro {avro_schema_path} to C# {csharp_path}') + convert_avro_to_csharp(avro_schema_path, csharp_path, avro_annotation=avro_annotation, system_text_json_annotation=system_text_json_annotation, newtonsoft_json_annotation=newtonsoft_json_annotation, pascal_properties=pascal_properties) + elif args.command == 'a2java': + avro_schema_path = args.avsc + java_path = args.java + package = args.package + avro_annotation = args.avro_annotation + jackson_annotation = args.jackson_annotation + pascal_properties = args.pascal_properties + print(f'Converting Avro {avro_schema_path} to Java {java_path}') + convert_avro_to_java(avro_schema_path, java_path, package_name=package, avro_annotation=avro_annotation, jackson_annotation=jackson_annotation, pascal_properties=pascal_properties) if __name__ == "__main__": try: diff --git a/avrotize/avrotocsharp.py b/avrotize/avrotocsharp.py new file mode 100644 index 0000000..975d6a3 --- /dev/null +++ b/avrotize/avrotocsharp.py @@ -0,0 +1,222 @@ +""" AvroToCSharp class for converting Avro schema to C# classes """ + +import json +import os +from typing import Dict, List, Union + +from avrotize.common import pascal + +INDENT = ' ' +CSPROJ_CONTENT = """ + + + net8.0 + enable + + + + + + + +""" + +class AvroToCSharp: + """ Converts Avro schema to C# classes """ + def __init__(self, base_namespace: str = '') -> None: + self.base_namespace = base_namespace + self.output_dir = os.getcwd() + self.pascal_properties = False + self.system_text_json_annotation = False + self.newtonsoft_json_annotation = False + self.avro_annotation = False + + def concat_namespace(self, namespace: str, name: str) -> str: + """ Concatenates namespace and name with a dot separator """ + return f"{namespace}.{name}" if namespace != '' else name + + def map_primitive_to_csharp(self, avro_type: str) -> str: + """ Maps Avro primitive types to C# types """ + mapping = { + 'null': 'void', # Placeholder, actual handling for nullable types is in the union logic + 'boolean': 'bool', + 'int': 'int', + 'long': 'long', + 'float': 'float', + 'double': 'double', + 'bytes': 'byte[]', + 'string': 'string', + } + return mapping.get(avro_type, 'object') + + def convert_avro_type_to_csharp(self, avro_type: Union[str, Dict, List], parent_namespace: str) -> str: + """ Converts Avro type to C# type """ + if isinstance(avro_type, str): + return self.map_primitive_to_csharp(avro_type) + elif isinstance(avro_type, list): + # Handle nullable types and unions + non_null_types = [t for t in avro_type if t != 'null'] + if len(non_null_types) == 1: + # Nullable type + return f"{self.convert_avro_type_to_csharp(non_null_types[0], parent_namespace)}?" + else: + # Handle union by generating classes for complex types within + for t in non_null_types: + if isinstance(t, dict) and (t.get('type') == 'record' or t.get('type') == 'enum'): + self.generate_class_or_enum(t, parent_namespace) + return 'object' # Placeholder for complex unions + elif isinstance(avro_type, dict): + # Handle complex types: records, enums, arrays, and maps + if avro_type['type'] in ['record', 'enum']: + return self.generate_class_or_enum(avro_type, parent_namespace, write_file=True) + elif avro_type['type'] == 'array': + return f"List<{self.convert_avro_type_to_csharp(avro_type['items'], parent_namespace)}>" + elif avro_type['type'] == 'map': + return f"Dictionary" + return self.convert_avro_type_to_csharp(avro_type['type'], parent_namespace) + return 'object' + + def generate_class_or_enum(self, avro_schema: Dict, parent_namespace: str, write_file: bool = True) -> str: + """ Generates a Class or Enum """ + if avro_schema['type'] == 'record': + return self.generate_class(avro_schema, parent_namespace, write_file) + elif avro_schema['type'] == 'enum': + return self.generate_enum(avro_schema, parent_namespace, write_file) + return '' + + def generate_class(self, avro_schema: Dict, parent_namespace: str, write_file: bool) -> str: + """ Generates a Class """ + class_definition = '' + namespace = pascal(f"{self.concat_namespace(parent_namespace, avro_schema.get('namespace', ''))}") + if 'doc' in avro_schema: + class_definition += f"/// \n/// {avro_schema['doc']}\n/// \n" + class_name = pascal(avro_schema['name']) + fields_str = [self.generate_property(field, class_name, parent_namespace) for field in avro_schema.get('fields', [])] + class_body = "\n".join(fields_str) + class_definition += f"public class {class_name}" + if self.avro_annotation: + class_definition += " : global::Avro.Specific.ISpecificRecord" + class_definition += "\n{\n"+class_body + if self.avro_annotation: + avro_schema_json = json.dumps(avro_schema) + # wrap schema at 80 characters + avro_schema_json = avro_schema_json.replace('"', '§') + avro_schema_json = f"\"+\n{INDENT}\"".join([avro_schema_json[i:i+80] for i in range(0, len(avro_schema_json), 80)]) + avro_schema_json = avro_schema_json.replace('§', '\\"') + class_definition += f"\n\n{INDENT}public static global::Avro.Schema AvroSchema = global::Avro.Schema.Parse(\n{INDENT}\"{avro_schema_json}\");\n" + class_definition += f"\n{INDENT}Schema global::Avro.Specific.ISpecificRecord.Schema => AvroSchema;\n" + get_method = f"{INDENT}object global::Avro.Specific.ISpecificRecord.Get(int fieldPos)\n"+INDENT+"{"+f"\n{INDENT}{INDENT}switch (fieldPos)\n{INDENT}{INDENT}" + "{" + put_method = f"{INDENT}void global::Avro.Specific.ISpecificRecord.Put(int fieldPos, object fieldValue)\n"+INDENT+"{"+f"\n{INDENT}{INDENT}switch (fieldPos)\n{INDENT}{INDENT}"+"{" + for pos, field in enumerate(avro_schema.get('fields', [])): + field_name = field['name'] + if self.pascal_properties: + field_name = pascal(field_name) + if field_name == class_name: + field_name += "_" + field_type = self.convert_avro_type_to_csharp(field['type'], parent_namespace) + get_method += f"\n{INDENT}{INDENT}{INDENT}case {pos}: return this.{field_name};" + put_method += f"\n{INDENT}{INDENT}{INDENT}case {pos}: this.{field_name} = ({field_type})fieldValue; break;" + get_method += f"\n{INDENT}{INDENT}{INDENT}default: throw new global::Avro.AvroRuntimeException($\"Bad index {{fieldPos}} in Get()\");" + put_method += f"\n{INDENT}{INDENT}{INDENT}default: throw new global::Avro.AvroRuntimeException($\"Bad index {{fieldPos}} in Put()\");" + get_method += "\n"+INDENT+INDENT+"}\n"+INDENT+"}" + put_method += "\n"+INDENT+INDENT+"}\n"+INDENT+"}" + class_definition += f"\n{get_method}\n{put_method}" + + class_definition += "\n"+"}" + + if write_file: + self.write_to_file(namespace, class_name, class_definition) + return self.concat_namespace(namespace, class_name) + + def generate_enum(self, avro_schema: Dict, parent_namespace: str, write_file: bool) -> str: + """ Generates an Enum """ + enum_definition = '' + namespace = pascal(f"{self.concat_namespace(parent_namespace, avro_schema.get('namespace', ''))}") + if 'doc' in avro_schema: + enum_definition += f"/// \n/// {avro_schema['doc']}\n/// \n" + enum_name = pascal(avro_schema['name']) + symbols_str = [f"{INDENT}{symbol}" for symbol in avro_schema['symbols']] + enum_body = ",\n".join(symbols_str) + enum_definition += f"public enum {enum_name}\n{{\n{enum_body}\n}}" + + if write_file: + self.write_to_file(namespace, enum_name, enum_definition) + return self.concat_namespace(namespace, enum_name) + + def generate_property(self, field: Dict, class_name: str, parent_namespace: str) -> str: + """ Generates a property """ + field_type = self.convert_avro_type_to_csharp(field['type'], parent_namespace) + annotation_name = field_name = field['name'] + if self.pascal_properties: + field_name = pascal(field_name) + if field_name == class_name: + field_name += "_" + prop = '' + if 'doc' in field: + prop += f"{INDENT}/// \n{INDENT}/// {field['doc']}\n{INDENT}/// \n" + if self.system_text_json_annotation: + prop += f"{INDENT}[JsonPropertyName(\"{annotation_name}\")]\n" + if self.newtonsoft_json_annotation: + prop += f"{INDENT}[JsonProperty(\"{annotation_name}\")]\n" + prop += f"{INDENT}public {field_type} {field_name} {{ get; set; }}" + + return prop + + + def write_to_file(self, namespace: str, name: str, definition: str): + """ Writes the class or enum to a file """ + directory_path = os.path.join(self.output_dir, os.path.join(namespace.replace('.', os.sep))) + if not os.path.exists(directory_path): + os.makedirs(directory_path) + file_path = os.path.join(directory_path, f"{name}.cs") + + with open(file_path, 'w', encoding='utf-8') as file: + # Common using statements (add more as needed) + file_content = "#pragma warning disable CS8618\n#pragma warning disable CS8603\n\nusing System;\nusing System.Collections.Generic;\n" + if self.system_text_json_annotation: + file_content += "using System.Text.Json.Serialization;\n" + if self.newtonsoft_json_annotation: + file_content += "using Newtonsoft.Json;\n" + if self.avro_annotation: + file_content += "using Avro;\nusing Avro.Specific;\n" + # Namespace declaration with correct indentation for the definition + file_content += f"\nnamespace {namespace}\n{{\n" + indented_definition = '\n'.join([f"{INDENT}{line}" for line in definition.split('\n')]) + file_content += f"{indented_definition}\n}}" + + file.write(file_content) + + def convert(self, avro_schema_path: str, output_dir: str): + """ Converts Avro schema to C# """ + with open(avro_schema_path, 'r', encoding='utf-8') as file: + schema = json.load(file) + + if isinstance(schema, dict): + schema = [schema] + + csproj_file = os.path.join(output_dir, f"{os.path.basename(output_dir)}.csproj") + if not os.path.exists(csproj_file): + with open(csproj_file, 'w', encoding='utf-8') as file: + file.write(CSPROJ_CONTENT) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + self.output_dir = output_dir + for avro_schema in schema: + self.generate_class_or_enum(avro_schema, self.base_namespace) + +def convert_avro_to_csharp(avro_schema_path, cs_file_path, pascal_properties=False, system_text_json_annotation=False, newtonsoft_json_annotation=False, avro_annotation=False): + """_summary_ + + Converts Avro schema to C# classes + + Args: + avro_schema_path (_type_): Avro input schema path + cs_file_path (_type_): Output C# file path + """ + avrotocs = AvroToCSharp() + avrotocs.pascal_properties = pascal_properties + avrotocs.system_text_json_annotation = system_text_json_annotation + avrotocs.newtonsoft_json_annotation = newtonsoft_json_annotation + avrotocs.avro_annotation = avro_annotation + avrotocs.convert(avro_schema_path, cs_file_path) \ No newline at end of file diff --git a/avrotize/avrotojava.py b/avrotize/avrotojava.py new file mode 100644 index 0000000..4970944 --- /dev/null +++ b/avrotize/avrotojava.py @@ -0,0 +1,264 @@ +""" Generates Java classes from Avro schema """ +import json +import os +from typing import Dict, List, Union + +from avrotize.common import pascal + +INDENT = ' ' +POM_CONTENT = """ + + 4.0.0 + com.example + demo + 1.0-SNAPSHOT + + 21 + 21 + + + + org.apache.avro + avro + 1.11.3 + + + com.fasterxml.jackson + jackson-bom + 2.17.0 + pom + + + +""" + + +def flatten_type_name(name: str) -> str: + """Strips the namespace from a name""" + base_name = pascal(name.replace(' ', '')).split('.')[-1].replace('>', '').replace('<', '').replace(',', '') + return base_name + +def is_java_reserved_word(word: str) -> bool: + """Checks if a word is a Java reserved word""" + reserved_words = [ + 'abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', 'const', + 'continue', 'default', 'do', 'double', 'else', 'enum', 'extends', 'final', 'finally', 'float', + 'for', 'goto', 'if', 'implements', 'import', 'instanceof', 'int', 'interface', 'long', 'native', + 'new', 'package', 'private', 'protected', 'public', 'return', 'short', 'static', 'strictfp', + 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'try', 'void', 'volatile', + 'while', 'true', 'false', 'null' + ] + return word in reserved_words + +class AvroToJava: + """Converts Avro schema to Java classes, including Jackson annotations and Avro SpecificRecord methods""" + def __init__(self, base_package: str = '') -> None: + self.base_package = base_package.replace('.', '/') + self.output_dir = os.getcwd() + self.avro_annotation = False + self.jackson_annotations = False + self.pascal_properties = False + + def concat_package(self, package: str, name: str) -> str: + """Concatenates package and name using a dot separator""" + return f"{package}.{name}" if package else name + + class JavaType: + """Java type definition""" + def __init__(self, type_name: str, union_types: List['AvroToJava.JavaType'] | None = None): + self.type_name = type_name + self.union_types = union_types + + def map_primitive_to_java(self, avro_type: str) -> JavaType: + """Maps Avro primitive types to Java types""" + mapping = { + 'null': 'Void', + 'boolean': 'Boolean', + 'int': 'Integer', + 'long': 'Long', + 'float': 'Float', + 'double': 'Double', + 'bytes': 'byte[]', + 'string': 'String', + } + return AvroToJava.JavaType(mapping.get(avro_type, 'Object')) + + + def convert_avro_type_to_java(self, avro_type: Union[str, Dict, List], parent_package: str) -> JavaType: + """Converts Avro type to Java type""" + if isinstance(avro_type, str): + return self.map_primitive_to_java(avro_type) + elif isinstance(avro_type, list): + non_null_types = [t for t in avro_type if t != 'null'] + if len(non_null_types) == 1: + return self.convert_avro_type_to_java(non_null_types[0], parent_package) + else: + types : List[AvroToJava.JavaType] = [self.convert_avro_type_to_java(t, parent_package) for t in non_null_types] + return AvroToJava.JavaType('Object', types) + elif isinstance(avro_type, dict): + if avro_type['type'] in ['record', 'enum']: + return self.generate_class_or_enum(avro_type, parent_package, write_file=True) + elif avro_type['type'] == 'array': + return AvroToJava.JavaType(f"List<{self.convert_avro_type_to_java(avro_type['items'], parent_package).type_name}>") + elif avro_type['type'] == 'map': + return AvroToJava.JavaType(f"Map") + return self.convert_avro_type_to_java(avro_type['type'], parent_package) + return 'Object' + + def generate_class_or_enum(self, avro_schema: Dict, parent_package: str, write_file: bool = True) -> JavaType: + """ Generates a Java class or enum from an Avro schema """ + if avro_schema['type'] == 'record': + return self.generate_class(avro_schema, parent_package, write_file) + elif avro_schema['type'] == 'enum': + return self.generate_enum(avro_schema, parent_package, write_file) + return AvroToJava.JavaType('Object') + + def generate_class(self, avro_schema: Dict, parent_package: str, write_file: bool) -> JavaType: + """ Generates a Java class from an Avro record schema """ + class_definition = '' + if 'doc' in avro_schema: + class_definition += f"/** {avro_schema['doc']} */\n" + package = self.concat_package(self.base_package, avro_schema.get('namespace', '').replace('.', '/')).lower() + class_name = pascal(avro_schema['name']) + fields_str = [self.generate_property(field, parent_package) for field in avro_schema.get('fields', [])] + class_body = "\n".join(fields_str) + class_definition += f"public class {class_name}" + if self.avro_annotation: + class_definition += " implements SpecificRecord" + class_definition += " {\n" + class_definition += class_body + if self.avro_annotation: + avro_schema_json = json.dumps(avro_schema) + avro_schema_json = avro_schema_json.replace('"', '§') + avro_schema_json = f"\"+\n{INDENT}\"".join([avro_schema_json[i:i+80] for i in range(0, len(avro_schema_json), 80)]) + avro_schema_json = avro_schema_json.replace('§', '\\"') + class_definition += f"\n\n{INDENT}public static Schema AvroSchema = new Schema.Parser().parse(\n{INDENT}\"{avro_schema_json}\");\n" + class_definition += f"\n{INDENT}@Override\n{INDENT}public Schema getSchema(){{ return AvroSchema; }}\n" + class_definition += self.generate_get_method(avro_schema.get('fields', []), parent_package) + class_definition += self.generate_put_method(avro_schema.get('fields', []), parent_package) + class_definition += "\n}" + + if write_file: + self.write_to_file(package, class_name, class_definition) + return AvroToJava.JavaType(self.concat_package(package.replace('/', '.'), class_name)) + + def generate_get_method(self, fields: List[Dict], parent_package: str) -> str: + """ Generates the get method for SpecificRecord """ + get_method = f"\n{INDENT}@Override\n{INDENT}public Object get(int field$) {{\n" + get_method += f"{INDENT * 2}switch (field$) {{\n" + for index, field in enumerate(fields): + field_name = pascal(field['name']) if self.pascal_properties else field['name'] + get_method += f"{INDENT * 3}case {index}: return this.{field_name};\n" + get_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n" + get_method += f"{INDENT * 2}}}\n{INDENT}}}\n" + return get_method + + def generate_put_method(self, fields: List[Dict], parent_package: str) -> str: + """ Generates the put method for SpecificRecord """ + put_method = f"\n{INDENT}@Override\n{INDENT}public void put(int field$, Object value$) {{\n" + put_method += f"{INDENT * 2}switch (field$) {{\n" + for index, field in enumerate(fields): + field_name = pascal(field['name']) if self.pascal_properties else field['name'] + java_type = self.convert_avro_type_to_java(field['type'], parent_package) + put_method += f"{INDENT * 3}case {index}: this.{field_name} = ({java_type.type_name})value$; break;\n" + put_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n" + put_method += f"{INDENT * 2}}}\n{INDENT}}}\n" + return put_method + + def generate_enum(self, avro_schema: Dict, parent_package: str, write_file: bool) -> JavaType: + """ Generates a Java enum from an Avro enum schema """ + enum_definition = '' + if 'doc' in avro_schema: + enum_definition += f"/** {avro_schema['doc']} */\n" + package = self.concat_package(self.base_package, avro_schema.get('namespace', '').replace('.', '/')).lower() + enum_name = pascal(avro_schema['name']) + symbols = avro_schema.get('symbols', []) + symbols_str = ', '.join(symbols) + enum_definition += f"public enum {enum_name} {{\n" + enum_definition += f"{INDENT}{symbols_str};\n" + enum_definition += "}\n" + if write_file: + self.write_to_file(package, enum_name, enum_definition) + return AvroToJava.JavaType(self.concat_package(package.replace('/', '.'), enum_name)) + + + def generate_property(self, field: Dict, parent_package: str) -> str: + """ Generates a Java property definition """ + field_type = self.convert_avro_type_to_java(field['type'], parent_package) + field_name = pascal(field['name']) if self.pascal_properties else field['name'] + if is_java_reserved_word(field_name): + field_name += "_" + property_def = '' + if 'doc' in field: + property_def += f"{INDENT}/** {field['doc']} */\n" + if self.jackson_annotations: + property_def += f"{INDENT}@JsonProperty(\"{field['name']}\")\n" + property_def += f"{INDENT}private {field_type.type_name} {field_name};\n" + property_def += f"{INDENT}public {field_type.type_name} get{field_name.capitalize()}() {{ return {field_name}; }}\n" + property_def += f"{INDENT}public void set{field_name.capitalize()}({field_type.type_name} {field_name}) {{ this.{field_name} = {field_name}; }}\n" + if field_type.union_types: + for union_type in field_type.union_types: + property_def += f"{INDENT}public {union_type.type_name} get{field_name.capitalize()}As{flatten_type_name(union_type.type_name)}() {{ return ({union_type.type_name}){field_name}; }}\n" + property_def += f"{INDENT}public void set{field_name.capitalize()}As{flatten_type_name(union_type.type_name)}({union_type.type_name} {field_name}) {{ this.{field_name} = {field_name}; }}\n" + return property_def + + def write_to_file(self, package: str, name: str, definition: str): + """ Writes a Java class or enum to a file """ + directory_path = os.path.join(self.output_dir, package) + if not os.path.exists(directory_path): + os.makedirs(directory_path) + file_path = os.path.join(directory_path, f"{name}.java") + + with open(file_path, 'w', encoding='utf-8') as file: + if package: + file.write(f"package {package.replace('/', '.')};\n\n") + file.write("import java.util.List;\n") + file.write("import java.util.Map;\n") + if self.avro_annotation: + file.write("import org.apache.avro.specific.SpecificRecord;\n") + file.write("import org.apache.avro.AvroRuntimeException;\n") + file.write("import org.apache.avro.Schema;\n") + if self.jackson_annotations: + file.write("import com.fasterxml.jackson.annotation.JsonProperty;\n") + file.write("\n") + file.write(definition) + + def convert(self, avro_schema_path: str, output_dir: str): + """Converts Avro schema to Java""" + with open(avro_schema_path, 'r', encoding='utf-8') as file: + schema = json.load(file) + + if isinstance(schema, dict): + schema = [schema] + + pom_path = os.path.join(output_dir, "pom.xml") + if not os.path.exists(pom_path): + with open(pom_path, 'w', encoding='utf-8') as file: + file.write(POM_CONTENT) + + output_dir = os.path.join(output_dir, "src/main/java".replace('/', os.sep)) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + self.output_dir = output_dir + for avro_schema in schema: + self.generate_class_or_enum(avro_schema, self.base_package) + + + +def convert_avro_to_java(avro_schema_path, java_file_path, package_name = '', pascal_properties=False, jackson_annotation=False, avro_annotation=False): + """_summary_ + + Converts Avro schema to C# classes + + Args: + avro_schema_path (_type_): Avro input schema path + cs_file_path (_type_): Output C# file path + """ + avrotojava = AvroToJava() + avrotojava.base_package = package_name + avrotojava.pascal_properties = pascal_properties + avrotojava.avro_annotation = avro_annotation + avrotojava.jackson_annotations = jackson_annotation + avrotojava.convert(avro_schema_path, java_file_path) \ No newline at end of file diff --git a/avrotize/common.py b/avrotize/common.py index 17dfe04..cdc09b5 100644 --- a/avrotize/common.py +++ b/avrotize/common.py @@ -300,3 +300,25 @@ def group_by_hash(tree_hash_list: Dict[str, NodeHashReference]) -> Dict[bytes, l del hash_groups[k] return hash_groups +def pascal(string): + """ Convert a string to PascalCase """ + if '::' in string: + strings = string.split('::') + return strings[0] + '::' + '::'.join(pascal(s) for s in strings[1:]) + if '.' in string: + strings = string.split('.') + return '.'.join(pascal(s) for s in strings) + if not string or len(string) == 0: + return string + words = [] + if '_' in string: + # snake_case + words = re.split(r'_', string) + elif string[0].isupper(): + # PascalCase + words = re.findall(r'[A-Z][a-z0-9_]*\.?', string) + else: + # camelCase + words = re.findall(r'[a-z0-9]+\.?|[A-Z][a-z0-9_]*\.?', string) + result = ''.join(word.capitalize() for word in words) + return result \ No newline at end of file diff --git a/test/test_avrotocsharp.py b/test/test_avrotocsharp.py new file mode 100644 index 0000000..17bf9c3 --- /dev/null +++ b/test/test_avrotocsharp.py @@ -0,0 +1,101 @@ +import os +import shutil +import sys +from os import path, getcwd + +import pytest + +from avrotize.avrotocsharp import convert_avro_to_csharp +from avrotize.jsonstoavro import convert_jsons_to_avro + +current_script_path = os.path.abspath(__file__) +project_root = os.path.dirname(os.path.dirname(current_script_path)) +sys.path.append(project_root) + +import unittest +from unittest.mock import patch + +class TestAvroToCSharp(unittest.TestCase): + def test_convert_address_avsc_to_csharp(self): + """ Test converting an address.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + cs_path = os.path.join(cwd, "test", "tmp", "address-cs") + if os.path.exists(cs_path): + shutil.rmtree(cs_path) + os.makedirs(cs_path) + + convert_avro_to_csharp(avro_path, cs_path) + + def test_convert_address_avsc_to_csharp_avro_annotation(self): + """ Test converting an address.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + cs_path = os.path.join(cwd, "test", "tmp", "address-cs-avro") + if os.path.exists(cs_path): + shutil.rmtree(cs_path) + os.makedirs(cs_path) + + convert_avro_to_csharp(avro_path, cs_path, avro_annotation=True) + + def test_convert_address_avsc_to_csharp_system_text_json_annotation(self): + """ Test converting an address.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + cs_path = os.path.join(cwd, "test", "tmp", "address-cs-stj") + if os.path.exists(cs_path): + shutil.rmtree(cs_path) + os.makedirs(cs_path) + + convert_avro_to_csharp(avro_path, cs_path, system_text_json_annotation=True, pascal_properties=True) + + def test_convert_address_avsc_to_csharp_newtonsoft_json_annotation(self): + """ Test converting an address.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + cs_path = os.path.join(cwd, "test", "tmp", "address-cs-nj") + if os.path.exists(cs_path): + shutil.rmtree(cs_path) + os.makedirs(cs_path) + + convert_avro_to_csharp(avro_path, cs_path, newtonsoft_json_annotation=True, pascal_properties=True) + + + def test_convert_telemetry_avsc_to_csharp(self): + """ Test converting a telemetry.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "telemetry.avsc") + cs_path = os.path.join(cwd, "test", "tmp", "telemetry-cs") + if os.path.exists(cs_path): + shutil.rmtree(cs_path) + os.makedirs(cs_path) + + convert_avro_to_csharp(avro_path, cs_path) + + def test_convert_jfrog_pipelines_jsons_to_avro_to_csharp(self): + """ Test converting a jfrog-pipelines.json file to C# """ + cwd = getcwd() + jsons_path = path.join(cwd, "test", "jsons", "jfrog-pipelines.json") + avro_path = path.join(cwd, "test", "tmp", "jfrog-pipelines.avsc") + cs_path = path.join(cwd, "test", "tmp", "jfrog-pipelines-cs") + if os.path.exists(cs_path): + shutil.rmtree(cs_path) + os.makedirs(cs_path) + + + convert_jsons_to_avro(jsons_path, avro_path) + convert_avro_to_csharp(avro_path, cs_path) + + def test_convert_jfrog_pipelines_jsons_to_avro_to_csharp_annotated(self): + """ Test converting a jfrog-pipelines.json file to C# """ + cwd = getcwd() + jsons_path = path.join(cwd, "test", "jsons", "jfrog-pipelines.json") + avro_path = path.join(cwd, "test", "tmp", "jfrog-pipelines.avsc") + cs_path = path.join(cwd, "test", "tmp", "jfrog-pipelines-cs-ann") + if os.path.exists(cs_path): + shutil.rmtree(cs_path) + os.makedirs(cs_path) + + + convert_jsons_to_avro(jsons_path, avro_path) + convert_avro_to_csharp(avro_path, cs_path, pascal_properties=True, avro_annotation=True, system_text_json_annotation=True, newtonsoft_json_annotation=True) diff --git a/test/test_avrotojava.py b/test/test_avrotojava.py new file mode 100644 index 0000000..e0ee8a5 --- /dev/null +++ b/test/test_avrotojava.py @@ -0,0 +1,90 @@ +import os +import shutil +import sys +from os import path, getcwd + +import pytest + +from avrotize.avrotojava import convert_avro_to_java +from avrotize.jsonstoavro import convert_jsons_to_avro + +current_script_path = os.path.abspath(__file__) +project_root = os.path.dirname(os.path.dirname(current_script_path)) +sys.path.append(project_root) + +import unittest +from unittest.mock import patch + +class TestAvroToJava(unittest.TestCase): + def test_convert_address_avsc_to_java(self): + """ Test converting an address.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + java_path = os.path.join(cwd, "test", "tmp", "address-java") + if os.path.exists(java_path): + shutil.rmtree(java_path) + os.makedirs(java_path) + + convert_avro_to_java(avro_path, java_path) + + def test_convert_address_avsc_to_java_avro_annotation(self): + """ Test converting an address.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + java_path = os.path.join(cwd, "test", "tmp", "address-java-avro") + if os.path.exists(java_path): + shutil.rmtree(java_path) + os.makedirs(java_path) + + convert_avro_to_java(avro_path, java_path, avro_annotation=True) + + def test_convert_address_avsc_to_java_jackson_annotation(self): + """ Test converting an address.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") + java_path = os.path.join(cwd, "test", "tmp", "address-java-jackson") + if os.path.exists(java_path): + shutil.rmtree(java_path) + os.makedirs(java_path) + + convert_avro_to_java(avro_path, java_path, jackson_annotation=True, pascal_properties=True) + + + def test_convert_telemetry_avsc_to_java(self): + """ Test converting a telemetry.avsc file to C# """ + cwd = os.getcwd() + avro_path = os.path.join(cwd, "test", "avsc", "telemetry.avsc") + java_path = os.path.join(cwd, "test", "tmp", "telemetry-java") + if os.path.exists(java_path): + shutil.rmtree(java_path) + os.makedirs(java_path) + + convert_avro_to_java(avro_path, java_path) + + def test_convert_jfrog_pipelines_jsons_to_avro_to_java(self): + """ Test converting a jfrog-pipelines.json file to C# """ + cwd = getcwd() + jsons_path = path.join(cwd, "test", "jsons", "jfrog-pipelines.json") + avro_path = path.join(cwd, "test", "tmp", "jfrog-pipelines.avsc") + java_path = path.join(cwd, "test", "tmp", "jfrog-pipelines-java") + if os.path.exists(java_path): + shutil.rmtree(java_path) + os.makedirs(java_path) + + + convert_jsons_to_avro(jsons_path, avro_path) + convert_avro_to_java(avro_path, java_path) + + def test_convert_jfrog_pipelines_jsons_to_avro_to_java_annotated(self): + """ Test converting a jfrog-pipelines.json file to C# """ + cwd = getcwd() + jsons_path = path.join(cwd, "test", "jsons", "jfrog-pipelines.json") + avro_path = path.join(cwd, "test", "tmp", "jfrog-pipelines.avsc") + java_path = path.join(cwd, "test", "tmp", "jfrog-pipelines-java-ann") + if os.path.exists(java_path): + shutil.rmtree(java_path) + os.makedirs(java_path) + + + convert_jsons_to_avro(jsons_path, avro_path) + convert_avro_to_java(avro_path, java_path, pascal_properties=True, avro_annotation=True, jackson_annotation=True) diff --git a/test/test_avrotoproto.py b/test/test_avrotoproto.py index 5e26655..5968b59 100644 --- a/test/test_avrotoproto.py +++ b/test/test_avrotoproto.py @@ -15,7 +15,7 @@ from unittest.mock import patch class TestAvroToProto(unittest.TestCase): - def test_convert_address_avsc_to_tsql(self): + def test_convert_address_avsc_to_proto(self): cwd = os.getcwd() avro_path = os.path.join(cwd, "test", "avsc", "address.avsc") proto_path = os.path.join(cwd, "test", "tmp", "address.proto") @@ -25,7 +25,7 @@ def test_convert_address_avsc_to_tsql(self): convert_avro_to_proto(avro_path, proto_path) - def test_convert_telemetry_avsc_to_tsql(self): + def test_convert_telemetry_avsc_to_proto(self): cwd = os.getcwd() avro_path = os.path.join(cwd, "test", "avsc", "telemetry.avsc") proto_path = os.path.join(cwd, "test", "tmp", "telemetry.proto")