From 682491fac6d917d25c994e85d04d71b5abfff30e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aaditya=20Ura=20=28looking=20for=20PhD=20Fall=E2=80=9924?= =?UTF-8?q?=29?= Date: Sun, 11 Aug 2024 15:15:26 +0530 Subject: [PATCH] Update pyparser.py --- pyparser.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/pyparser.py b/pyparser.py index e907433..770c6f7 100644 --- a/pyparser.py +++ b/pyparser.py @@ -6,9 +6,6 @@ class MarkdownParsingError(Exception): pass def parse_markdown_to_json(markdown_text): - - line_ending = '\r\n' if '\r\n' in markdown_text else '\n' - task = { "name": "", "description": "", @@ -21,7 +18,6 @@ def parse_markdown_to_json(markdown_text): # Helper function to extract content between headers def extract_section(start_pattern, end_pattern=None): - if end_pattern: match = re.search(f"{start_pattern}(.*?){end_pattern}", markdown_text, re.DOTALL) else: @@ -61,22 +57,21 @@ def extract_section(start_pattern, end_pattern=None): examples_text = extract_section(r'## Examples:', r'## Tags:') if not examples_text: raise MarkdownParsingError("Examples section not found or empty.") - + examples = re.split(r'###\s+Example\s+\d+:', examples_text) if len(examples) < 2: # First split is empty, so we need at least 2 elements raise MarkdownParsingError("No examples found. Expected at least one '### Example X:' section.") - + for example in examples[1:]: # Skip the first split as it's empty - input_match = re.search(r'Input:\s*(.+?)Output:', example, re.DOTALL) - output_match = re.search(r'Output:\s*(.+?)$', example, re.DOTALL) + input_match = re.search(r'Input:\s*```(.+?)```\s*Output:', example, re.DOTALL) + output_match = re.search(r'Output:\s*```(.+?)```\s*$', example, re.DOTALL) if not input_match or not output_match: raise MarkdownParsingError(f"Invalid example format. Expected 'Input:' and 'Output:' sections with code blocks.") task["examples"].append([{ - "input": input_match.group(1).replace('\`\`\`', '').strip(), - "output": output_match.group(1).replace('\`\`\`', '').strip() # Remove the .replace('\n', line_ending) + "input": input_match.group(1).strip(), + "output": output_match.group(1).strip() }]) - # Extract tags (required) tags_text = extract_section(r'## Tags:') if not tags_text: