Skip to content

Commit

Permalink
Added tests to telegram_export_text_generator.py + updated on Documen…
Browse files Browse the repository at this point in the history
…tation

Signed-off-by: Guilherme Bacellar Moralez <guibacellar@gmail.com>
  • Loading branch information
guibacellar committed Aug 22, 2023
1 parent 93b507c commit 66a3631
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 2 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ TEx listen --config CONFIGURATION_FILE_PATH --group_id 1234,5678
* **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's

### Download Messages (Download since first message for each group)
Scrap Messages from Telegram Server
```bash
TEx download_messages --config CONFIGURATION_FILE_PATH --group_id 1234,5678
```
Expand All @@ -132,6 +133,7 @@ TEx download_messages --config CONFIGURATION_FILE_PATH --group_id 1234,5678
* **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's

### Generate Report
Generate HTML Report
```bash
TEx report --config CONFIGURATION_FILE_PATH --report_folder REPORT_FOLDER_PATH --group_id * --around_messages NUM --order_desc --limit_days 3 --filter FILTER_EXPRESSION_1,FILTER_EXPRESSION_2,FILTER_EXPRESSION_N
```
Expand All @@ -145,6 +147,7 @@ TEx report --config CONFIGURATION_FILE_PATH --report_folder REPORT_FOLDER_PATH -
* **suppress_repeating_messages** > Optional - If present, suppress all repeating messages in the same report

### Export Downloaded Files
Export Downloaded Files by MimeType
```bash
TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PATH --group_id * --filter * --limit_days 3 --mime_type text/plain
```
Expand All @@ -155,6 +158,17 @@ TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PA
* **limit_days** > Optional - Number of Days of past to filter the Messages
* **mime_type** > Optional - File MIME Type. Ex: application/vnd.android.package-archive

### Export Texts
Export Messages (Texts) using Regex finder
```bash
TEx export_text --config CONFIGURATION_FILE_PATH --order_desc --limit_days 3 --regex REGEX --report_folder REPORT_FOLDER_PATH --group_id *
```
* **config** > Required - Created Configuration File Path
* **report_folder** > Optional - Defines the Report Files Folder
* **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's
* **limit_days** > Optional - Number of Days of past to filter the Messages
* **regex** > Required - Regex to find the messages.
* Ex: Export Links from Messages (.*http://.*),(.*https://.*)

<!-- LICENSE -->
## License
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:

# Load Groups from DB
db_groups: List[TelegramGroupOrmEntity] = TelegramGroupDatabaseManager.get_all_by_phone_number(
args['target_phone_number'])
config['CONFIGURATION']['phone_number'])
logger.info(f'\t\tFound {len(db_groups)} Groups')

# Map to Facade Entities
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""Telegram Report - Export Texts Generator."""

import asyncio
import datetime
import os.path
import shutil
import unittest
from configparser import ConfigParser
from typing import Dict
from unittest import mock

import pytz
from sqlalchemy import select, insert, delete
from telethon.tl.functions.messages import GetDialogsRequest

from TEx.core.dir_manager import DirectoryManagerUtils
from TEx.database.db_initializer import DbInitializer
from TEx.database.db_manager import DbManager
from TEx.database.telegram_group_database import TelegramGroupDatabaseManager, TelegramMediaDatabaseManager, \
TelegramMessageDatabaseManager
from TEx.models.database.telegram_db_model import (
TelegramGroupOrmEntity,
TelegramMediaOrmEntity, TelegramMessageOrmEntity, TelegramUserOrmEntity,
)
from TEx.modules.execution_configuration_handler import ExecutionConfigurationHandler
from TEx.modules.telegram_messages_scrapper import TelegramGroupMessageScrapper
from TEx.modules.telegram_report_generator.telegram_export_text_generator import TelegramExportTextGenerator
from TEx.modules.telegram_report_generator.telegram_html_report_generator import TelegramReportGenerator
from tests.modules.common import TestsCommon
from tests.modules.mockups_groups_mockup_data import base_groups_mockup_data, base_messages_mockup_data, \
base_users_mockup_data


class TelegramExportTextGeneratorTest(unittest.TestCase):

def setUp(self) -> None:

self.config = ConfigParser()
self.config.read('../../config.ini')

DirectoryManagerUtils.ensure_dir_struct('_data')
DirectoryManagerUtils.ensure_dir_struct('_data/resources')
DirectoryManagerUtils.ensure_dir_struct('_data/media')
DirectoryManagerUtils.ensure_dir_struct('_data/media/2')

DbInitializer.init(data_path='_data/')

# Reset SQLlite Groups
DbManager.SESSIONS['data'].execute(delete(TelegramMessageOrmEntity))
DbManager.SESSIONS['data'].execute(delete(TelegramGroupOrmEntity))
DbManager.SESSIONS['data'].execute(delete(TelegramMediaOrmEntity))
DbManager.SESSIONS['data'].commit()

# Add Group 2 - With Previous Messages
TelegramGroupDatabaseManager.insert_or_update({
'id': 2, 'constructor_id': 'B', 'access_hash': 'BBBBBB',
'fake': False, 'gigagroup': False, 'has_geo': False,
'participants_count': 2, 'restricted': False,
'scam': False, 'group_username': 'UN-b',
'verified': False, 'title': 'UT-02', 'source': '5526986587745'
})
TelegramMessageDatabaseManager.insert({
'id': 55, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 1', 'raw': 'Raw Message 1', 'from_id': None, 'from_type': None,
'to_id': None, 'media_id': None
})
TelegramMessageDatabaseManager.insert({
'id': 56, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 2', 'raw': 'Raw Message 2', 'from_id': None, 'from_type': None,
'to_id': None, 'media_id': None
})
TelegramMessageDatabaseManager.insert({
'id': 57, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 2', 'raw': 'Raw Message 3', 'from_id': None, 'from_type': None,
'to_id': None, 'media_id': None
})
TelegramMessageDatabaseManager.insert({
'id': 58, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 3', 'raw': 'Raw Message 4 - http://www.url.domain.com', 'from_id': None, 'from_type': None,
'to_id': None, 'media_id': None
})

# Add Group 3 - With Previous Messages
TelegramGroupDatabaseManager.insert_or_update({
'id': 3, 'constructor_id': 'B', 'access_hash': 'BBBBBB',
'fake': False, 'gigagroup': False, 'has_geo': False,
'participants_count': 2, 'restricted': False,
'scam': False, 'group_username': 'UN-c',
'verified': False, 'title': 'UT-03', 'source': '5526986587745'
})
TelegramMessageDatabaseManager.insert({
'id': 60, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 7', 'raw': 'Raw Message 1', 'from_id': None, 'from_type': None,
'to_id': None, 'media_id': None
})
TelegramMessageDatabaseManager.insert({
'id': 61, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 8', 'raw': 'Raw Message 2', 'from_id': None, 'from_type': None,
'to_id': None, 'media_id': None
})
TelegramMessageDatabaseManager.insert({
'id': 62, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 9', 'raw': 'Raw Message 3', 'from_id': None, 'from_type': None,
'to_id': None, 'media_id': None
})
TelegramMessageDatabaseManager.insert({
'id': 63, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc),
'message': 'Message 10', 'raw': 'Raw Message 4 - http://www.url.domain.com/2', 'from_id': None,
'from_type': None, 'to_id': None, 'media_id': None
})

DbManager.SESSIONS['data'].commit()

def tearDown(self) -> None:
DbManager.SESSIONS['data'].close()

def test_run_generate_report_all(self):
"""Test Run Method."""

# Call Test Target Method
target: TelegramExportTextGenerator = TelegramExportTextGenerator()
args: Dict = {
'export_text': True,
'config': 'unittest_configfile.config',
'report_folder': '_report',
'group_id': '*',
'order_desc': True,
'filter': 'Message',
'limit_days': 30,
'regex': '(.*http://.*),(.*https://.*)'
}
data: Dict = {}
TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)

with self.assertLogs() as captured:
loop = asyncio.get_event_loop()
loop.run_until_complete(
target.run(
config=self.config,
args=args,
data=data
)
)

# Check Output Files
self.assertTrue(
os.path.exists(os.path.join('_report', f'result_UN-b_2.txt'))
)
self.assertTrue(
os.path.exists(os.path.join('_report', f'result_UN-c_3.txt'))
)

def test_run_generate_report_filtered(self):
"""Test Run Method."""

# Call Test Target Method
target: TelegramExportTextGenerator = TelegramExportTextGenerator()
args: Dict = {
'export_text': True,
'config': 'unittest_configfile.config',
'report_folder': '_report',
'group_id': '2',
'order_desc': True,
'filter': 'Message',
'limit_days': 30,
'regex': '(.*http://.*),(.*https://.*)'
}
data: Dict = {}
TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)

with self.assertLogs() as captured:
loop = asyncio.get_event_loop()
loop.run_until_complete(
target.run(
config=self.config,
args=args,
data=data
)
)

# Check Output Files
self.assertTrue(
os.path.exists(os.path.join('_report', f'result_UN-b_2.txt'))
)
self.assertFalse(
os.path.exists(os.path.join('_report', f'result_UN-c_3.txt'))
)
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ commands =
poetry run coverage erase
poetry run coverage run --source='../TEx' -m pytest . {posargs} --color=yes
poetry run coverage report --rcfile=../coverage.rc
poetry run coverage html --rcfile=../coverage.rc --fail-under=78
poetry run coverage html --rcfile=../coverage.rc --fail-under=85


[testenv]
Expand Down

0 comments on commit 66a3631

Please sign in to comment.