From 474422c7f1cc07662804eff13176fcf8b5a6af8c Mon Sep 17 00:00:00 2001 From: Umputun Date: Tue, 5 Dec 2023 19:22:50 -0600 Subject: [PATCH] add more docs --- README.md | 57 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index b4533cda..f4ef4afd 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,20 @@ Anti-Spam bot for Telegram. +## What is it and how it works? + +The bot is designed to run as a docker container. It requires a token and a group name/id to work. The bot will listen to all messages in the group and ban users who post spam. + +Spam detection based on several factors: +- similarity to known spam messages +- number of emojis in the message +- check user against [Combot Anti-Spam System](https://cas.chat) (CAS) +- check the overall similarity of the message to the known spam messages +- compare with the list of stop words + +If the message is considered spam, the bot will delete it and ban the user. + + ## Getting bot token for Telegram To get a token, talk to [BotFather](https://core.telegram.org/bots#6-botfather). All you need is to send `/newbot` command and choose the name for your bot (it must end in `bot`). That is it, and you got a token which you'll need to write down into remark42 configuration as `TELEGRAM_TOKEN`. @@ -41,34 +55,37 @@ Use this token to access the HTTP API: ## Application Options ``` - -l, --logs= path to logs (default: logs) [$TELEGRAM_LOGS] - --super= super-users - --min-msg-len= min message length to check (default: 100) [$MIN_MSG_LEN] - --max-emoji= max emoji count in message (default: 5) [$MAX_EMOJI] - --stop-words= path to stop words file [$STOP_WORDS] - --dry dry mode, no bans [$DRY] - --dbg debug mode [$DEBUG] + -l, --logs= path to logs (default: logs) [$TELEGRAM_LOGS] + --super= super-users + --similarity-threshold= spam threshold (default: 0.5) [$SIMILARITY_THRESHOLD] + --min-msg-len= min message length to check (default: 50) [$MIN_MSG_LEN] + --max-emoji= max emoji count in message (default: 5) [$MAX_EMOJI] + --paranoid paranoid mode, check all messages [$PARANOID] + --dry dry mode, no bans [$DRY] + --dbg debug mode [$DEBUG] telegram: - --telegram.token= telegram bot token (default: test) [$TELEGRAM_TOKEN] - --telegram.group= group name/id (default: test) [$TELEGRAM_GROUP] - --telegram.timeout= http client timeout for telegram (default: 30s) [$TELEGRAM_TIMEOUT] - --telegram.idle= idle duration (default: 30s) [$TELEGRAM_IDLE] + --telegram.token= telegram bot token (default: test) [$TELEGRAM_TOKEN] + --telegram.group= group name/id (default: test) [$TELEGRAM_GROUP] + --telegram.timeout= http client timeout for telegram (default: 30s) [$TELEGRAM_TIMEOUT] + --telegram.idle= idle duration (default: 30s) [$TELEGRAM_IDLE] cas: - --cas.api= CAS API (default: https://api.cas.chat) [$CAS_API] - --cas.timeout= CAS timeout (default: 5s) [$CAS_TIMEOUT] + --cas.api= CAS API (default: https://api.cas.chat) [$CAS_API] + --cas.timeout= CAS timeout (default: 5s) [$CAS_TIMEOUT] -similarity: - --similarity.threshold= spam threshold (default: 0.5) [$SIMILARITY_THRESHOLD] - --similarity.samples= path to spam samples [$SIMILARITY_SAMPLES] - --similarity.exclude-tokens= path to exclude tokens file [$SIMILARITY_EXCLUDE_TOKENS] +files: + --files.samples-spam= path to spam samples (default: spam-samples.txt) [$FILES_SAMPLES_SPAM] + --files.samples-ham= path to ham samples (default: ham-samples.txt) [$FILES_SAMPLES_HAM] + --files.exclude-tokens= path to exclude tokens file (default: exclude-tokens.txt) [$FILES_EXCLUDE_TOKENS] + --files.stop-words= path to stop words file (default: stop-words.txt) [$FILES_STOP_WORDS] message: - --message.spam= spam message (default: this is spam) [$MESSAGE_SPAM] - --message.dry= spam dry message (default: this is spam (dry mode)) [$MESSAGE_DRY] + --message.startup= startup message [$MESSAGE_STARTUP] + --message.spam= spam message (default: this is spam) [$MESSAGE_SPAM] + --message.dry= spam dry message (default: this is spam (dry mode)) [$MESSAGE_DRY] Help Options: - -h, --help Show this help message + -h, --help Show this help message ``` \ No newline at end of file