Fix output (#14)

* fix the error that output was splitted by comma * fix doc * fix flake8 error --------- Co-authored-by: parisa-zahedi <p.zahedi@uu.nl>
UtrechtUniversity · Apr 15, 2024 · a48aac7 · a48aac7
1 parent 122d367
commit a48aac7
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 10 deletions.
diff --git a/interest/output_generator/text_formater.py b/interest/output_generator/text_formater.py
@@ -39,18 +39,18 @@ def __init__(self, output_unit: str, sentences_per_segment: int,
         self.texts: List[str] = []
 
     def format_output(self, texts: Union[None, List[str]]) -> (
-            Union)[str, List[str], None]:
+            Union)[str, List[str], List[List[str]], None]:
         """
         Formats input texts based on the specified output unit.
 
         Args:
             texts (List[str]): List of input texts to be formatted.
 
         Returns:
-            Union[str, List[List[str]]]: Formatted output text based on the
-            selected output_unit. For 'full_text', returns a single string.
-            For 'paragraph' and 'segmented_text', returns a list of segmented
-             text lists.
+            Union[str, List[str], List[List[str]]]: Formatted output text
+            based on the selected output_unit. For 'full_text', returns a
+            single string. For 'paragraph' and 'segmented_text', returns a
+            list of segmented text lists.
 
         Raises:
             ValueError: If input 'texts' is not a list of strings.
@@ -91,11 +91,11 @@ def _format_fulltext(self) -> str:
         """
         return '\n'.join(self.texts)
 
-    def _format_segmented_text(self) -> List[str]:
+    def _format_segmented_text(self) -> List[List[str]]:
         """Formats texts as segmented text based on sentences_per_segment.
 
         Returns:
-            List[str]: Flattened list of segmented text strings.
+             List[List[str]]: Flattened list of segmented text strings.
         """
         segmented_texts = []
         for text in self.texts:
@@ -104,7 +104,7 @@ def _format_segmented_text(self) -> List[str]:
 
             for i in range(0, len(sentences), self.sentences_per_segment):
                 segment = sentences[i:i + self.sentences_per_segment]
-                segmented_texts.extend(segment)
+                segmented_texts.append(segment)
 
         return segmented_texts
 

diff --git a/scripts/step4_generate_output.py b/scripts/step4_generate_output.py
@@ -135,6 +135,8 @@ def find_articles_in_file(filepath: str, formatter: TextFormatter) -> (
                                    spacy_model=SPACY_MODEL)
     for articles_filepath in args.input_dir.rglob(args.glob):
         df = find_articles_in_file(articles_filepath, text_formatter)
+        if df is None:
+            continue
         file_name = get_file_name_without_extension(articles_filepath)
-        df.to_csv(os.path.join(args.output_dir, 'articles_to_label_'+file_name+'.csv'), index = False)
-
+        df.to_csv(os.path.join(args.output_dir, 'articles_to_label_'
+                               + file_name+'.csv'), index=False)