Skip to content

Commit

Permalink
Merge branch 'feature/transcribe-audio-messages'
Browse files Browse the repository at this point in the history
  • Loading branch information
n3d1117 committed Mar 4, 2023
2 parents 71209d6 + dd12bdd commit c164f0a
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/__pycache__
/.idea
.env
.DS_Store
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ name = "pypi"
requests = "*"
python-telegram-bot = "==20.1"
openai = "==0.27.0"
pydub = "==0.25.1"
python-dotenv = "*"

[dev-packages]
Expand Down
18 changes: 13 additions & 5 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
A [Telegram bot](https://core.telegram.org/bots/api) that integrates with OpenAI's _official_ [ChatGPT](https://openai.com/blog/chatgpt/) APIs to provide answers. Ready to use with minimal configuration required.

## Screenshots
![demo.pdf](https://github.com/n3d1117/chatgpt-telegram-bot/files/10876708/demo.pdf)
![demo.pdf](https://github.com/n3d1117/chatgpt-telegram-bot/files/10889253/demo.pdf)

## Features
- [x] Support markdown in answers
Expand All @@ -20,6 +20,7 @@ A [Telegram bot](https://core.telegram.org/bots/api) that integrates with OpenAI
- [x] (NEW!) See token usage after each answer
- [x] (NEW!) Multi-chat support
- [x] (NEW!) Image generation using DALL·E via the `/image` command
- [x] (NEW!) Transcribe audio messages using Whisper (may require [ffmpeg](https://ffmpeg.org))

## Additional Features - help needed!
- [ ] Group chat support
Expand Down Expand Up @@ -117,6 +118,7 @@ docker-compose up
## Credits
- [ChatGPT](https://chat.openai.com/chat) from [OpenAI](https://openai.com)
- [python-telegram-bot](https://python-telegram-bot.org)
- [jiaaro/pydub](https://github.com/jiaaro/pydub)

## Disclaimer
This is a personal project and is not affiliated with OpenAI in any way.
Expand Down
11 changes: 11 additions & 0 deletions openai_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ def generate_image(self, prompt: str) -> str:
logging.exception(e)
raise e

def transcribe(self, filename):
"""
Transcribes the audio file using the Whisper model.
"""
try:
with open(filename, "rb") as audio:
result = openai.Audio.transcribe("whisper-1", audio)
return result.text
except Exception as e:
logging.exception(e)
raise e

def reset_chat_history(self, chat_id):
"""
Expand Down
66 changes: 66 additions & 0 deletions telegram_bot.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
import os

import telegram.constants as constants
from telegram import Update
from telegram.ext import ApplicationBuilder, ContextTypes, CommandHandler, MessageHandler, filters

from openai_helper import OpenAIHelper
from pydub import AudioSegment


class ChatGPT3TelegramBot:
Expand Down Expand Up @@ -43,6 +45,7 @@ async def reset(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
return

logging.info(f'Resetting the conversation for user {update.message.from_user.name}...')

chat_id = update.effective_chat.id
self.openai.reset_chat_history(chat_id=chat_id)
await context.bot.send_message(chat_id=chat_id, text='Done!')
Expand All @@ -56,6 +59,8 @@ async def image(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
await self.send_disallowed_message(update, context)
return

logging.info(f'New image generation request received from user {update.message.from_user.name}')

chat_id = update.effective_chat.id
image_query = update.message.text.replace('/image', '').strip()
if image_query == '':
Expand All @@ -77,6 +82,66 @@ async def image(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
text='Failed to generate image'
)

async def transcribe(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
"""
Transcribe audio messages.
"""
if not self.is_allowed(update):
logging.warning(f'User {update.message.from_user.name} is not allowed to transcribe audio messages')
await self.send_disallowed_message(update, context)
return

if not update.message.voice and not update.message.audio:
await context.bot.send_message(
chat_id=update.effective_chat.id,
reply_to_message_id=update.message.message_id,
text='Unsupported file type'
)
return

logging.info(f'New transcribe request received from user {update.message.from_user.name}')

chat_id = update.effective_chat.id
await context.bot.send_chat_action(chat_id=chat_id, action=constants.ChatAction.TYPING)
filename = update.message.voice.file_unique_id if update.message.voice else update.message.audio.file_unique_id
filename_ogg = f'{filename}.ogg'
filename_mp3 = f'{filename}.mp3'

try:
if update.message.voice:
audio_file = await context.bot.get_file(update.message.voice.file_id)
await audio_file.download_to_drive(filename_ogg)
ogg_audio = AudioSegment.from_ogg(filename_ogg)
ogg_audio.export(filename_mp3, format="mp3")

elif update.message.audio:
audio_file = await context.bot.get_file(update.message.audio.file_id)
await audio_file.download_to_drive(filename_mp3)

# Transcribe the audio file
transcript = self.openai.transcribe(filename_mp3)

# Send the transcript
await context.bot.send_message(
chat_id=chat_id,
reply_to_message_id=update.message.message_id,
text=transcript,
parse_mode=constants.ParseMode.MARKDOWN
)
except:
await context.bot.send_message(
chat_id=chat_id,
reply_to_message_id=update.message.message_id,
text='Failed to transcribe text'
)

finally:
# Cleanup files
if os.path.exists(filename_mp3):
os.remove(filename_mp3)
if os.path.exists(filename_ogg):
os.remove(filename_ogg)

async def prompt(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
"""
React to incoming messages and respond accordingly.
Expand Down Expand Up @@ -136,6 +201,7 @@ def run(self):
application.add_handler(CommandHandler('help', self.help))
application.add_handler(CommandHandler('image', self.image))
application.add_handler(CommandHandler('start', self.help))
application.add_handler(MessageHandler(filters.VOICE | filters.AUDIO, self.transcribe))
application.add_handler(MessageHandler(filters.TEXT & (~filters.COMMAND), self.prompt))

application.add_error_handler(self.error_handler)
Expand Down

0 comments on commit c164f0a

Please sign in to comment.