diff --git a/.env.example b/.env.example
index ee827a0b..d6eb078b 100644
--- a/.env.example
+++ b/.env.example
@@ -28,7 +28,7 @@ ALLOWED_TELEGRAM_USER_IDS=USER_ID_1,USER_ID_2
 # MAX_TOKENS=1200
 # MAX_HISTORY_SIZE=15
 # MAX_CONVERSATION_AGE_MINUTES=180
-# VOICE_REPLY_WITH_TRANSCRIPT_ONLY=false
+# VOICE_REPLY_WITH_TRANSCRIPT_ONLY=true
 # VOICE_REPLY_PROMPTS="Hi bot;Hey bot;Hi chat;Hey chat"
 # N_CHOICES=1
 # TEMPERATURE=1.0
diff --git a/README.md b/README.md
index ca06d689..944e0b75 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ Check out the [Budget Manual](https://github.com/n3d1117/chatgpt-telegram-bot/di
 | `MAX_HISTORY_SIZE`                 | Max number of messages to keep in memory, after which the conversation will be summarised to avoid excessive token usage                                                                                                       | `15`                               |
 | `MAX_CONVERSATION_AGE_MINUTES`     | Maximum number of minutes a conversation should live since the last message, after which the conversation will be reset                                                                                                        | `180`                              |
 | `VOICE_REPLY_WITH_TRANSCRIPT_ONLY` | Whether to answer to voice messages with the transcript only or with a ChatGPT response of the transcript                                                                                                                      | `false`                            |
-| `VOICE_REPLY_PROMPTS`              | A semicolon separated list of phrases (i.e. `Hi bot;Hello chat`). If the transcript starts with any of them, it will be treated as a prompt                                                                                    | -
+| `VOICE_REPLY_PROMPTS`              | A semicolon separated list of phrases (i.e. `Hi bot;Hello chat`). If the transcript starts with any of them, it will be treated as a prompt even if `VOICE_REPLY_WITH_TRANSCRIPT_ONLY` is set to `true`                        | -                                  |
 | `N_CHOICES`                        | Number of answers to generate for each input message. **Note**: setting this to a number higher than 1 will not work properly if `STREAM` is enabled                                                                           | `1`                                |
 | `TEMPERATURE`                      | Number between 0 and 2. Higher values will make the output more random                                                                                                                                                         | `1.0`                              |
 | `PRESENCE_PENALTY`                 | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far                                                                                                               | `0.0`                              |
diff --git a/bot/telegram_bot.py b/bot/telegram_bot.py
index eb8cbe09..3d37e23c 100644
--- a/bot/telegram_bot.py
+++ b/bot/telegram_bot.py
@@ -322,7 +322,8 @@ async def _execute():
                     self.usage["guests"].add_transcription_seconds(audio_track.duration_seconds, transcription_price)
 
                 # check if transcript starts with any of the prefixes
-                response_to_transcription = any(transcript.startswith(prefix) if prefix else False for prefix in self.config['voice_reply_prompts'])
+                response_to_transcription = any(transcript.lower().startswith(prefix.lower()) if prefix else False
+                                                for prefix in self.config['voice_reply_prompts'])
 
                 if self.config['voice_reply_transcript'] and not response_to_transcription: