From b97022af4cedb35fde493596d6fc2c2caa7ecb81 Mon Sep 17 00:00:00 2001 From: merefield Date: Sun, 20 Aug 2023 23:08:55 +0100 Subject: [PATCH 01/28] wip --- .../regular/chatbot_post_embedding_job.rb | 12 ++++ app/models/embedding.rb | 7 ++ ...230820010101_enable_embedding_extension.rb | 18 +++++ ...0010103_create_chatbot_embeddings_table.rb | 11 +++ ...0010105_create_chatbot_embeddings_index.rb | 16 +++++ lib/discourse_chatbot/embedding_process.rb | 69 +++++++++++++++++++ lib/tasks/chatbot.rake | 66 ++++++++++++++++++ plugin.rb | 4 ++ 8 files changed, 203 insertions(+) create mode 100644 app/jobs/regular/chatbot_post_embedding_job.rb create mode 100644 app/models/embedding.rb create mode 100644 db/migrate/20230820010101_enable_embedding_extension.rb create mode 100644 db/migrate/20230820010103_create_chatbot_embeddings_table.rb create mode 100644 db/migrate/20230820010105_create_chatbot_embeddings_index.rb create mode 100644 lib/discourse_chatbot/embedding_process.rb create mode 100644 lib/tasks/chatbot.rake diff --git a/app/jobs/regular/chatbot_post_embedding_job.rb b/app/jobs/regular/chatbot_post_embedding_job.rb new file mode 100644 index 0000000..4e90703 --- /dev/null +++ b/app/jobs/regular/chatbot_post_embedding_job.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +# Job is triggered on an update to a Post. +class ::Jobs::ChatbotPostEmbeddingJob < Jobs::Base + def execute(opts) + post_id = opts[:post_id] + + post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + + post_embedding.upsert_embedding(post_id) + end +end diff --git a/app/models/embedding.rb b/app/models/embedding.rb new file mode 100644 index 0000000..81269be --- /dev/null +++ b/app/models/embedding.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class ::DiscourseChatbot::Embedding < ActiveRecord::Base + self.table_name = 'chatbot_embeddings' + + validates :post_id, presence: true, uniqueness: true +end diff --git a/db/migrate/20230820010101_enable_embedding_extension.rb b/db/migrate/20230820010101_enable_embedding_extension.rb new file mode 100644 index 0000000..a57666c --- /dev/null +++ b/db/migrate/20230820010101_enable_embedding_extension.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class EnableEmbeddingExtension < ActiveRecord::Migration[7.0] + def change + begin + enable_extension :embedding + rescue Exception => e + if DB.query_single("SELECT 1 FROM pg_available_extensions WHERE name = 'embedding';").empty? + STDERR.puts "----------------------------DISCOURSE CHATBOT ERROR----------------------------------" + STDERR.puts " Discourse Chatbot now requires the embedding extension on the PostgreSQL database." + STDERR.puts " Run a `./launcher rebuild app` to fix it on a standard install." + STDERR.puts " Alternatively, you can remove Discourse Chatbot to rebuild." + STDERR.puts "----------------------------DISCOURSE CHATBOT ERROR----------------------------------" + end + raise e + end + end +end diff --git a/db/migrate/20230820010103_create_chatbot_embeddings_table.rb b/db/migrate/20230820010103_create_chatbot_embeddings_table.rb new file mode 100644 index 0000000..53cfdb5 --- /dev/null +++ b/db/migrate/20230820010103_create_chatbot_embeddings_table.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class CreateChatbotEmbeddingsTable < ActiveRecord::Migration[7.0] + def change + create_table :chatbot_embeddings do |t| + t.integer :post_id, null: false, index: { unique: true }, foreign_key: true + t.column :embedding, "real[]", null: false + t.timestamps + end + end +end diff --git a/db/migrate/20230820010105_create_chatbot_embeddings_index.rb b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb new file mode 100644 index 0000000..f0dce3f --- /dev/null +++ b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class CreateChatbotEmbeddingsIndex < ActiveRecord::Migration[7.0] + def up + execute <<-SQL + CREATE INDEX hnsw_index_on_chatbot_embeddings ON chatbot_embeddings USING hnsw(embedding) + WITH (dims=1536, m=8, efconstruction=8, efsearch=8); + SQL + end + + def down + execute <<-SQL + DROP INDEX hnsw_index_on_chatbot_embeddings; + SQL + end +end diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/embedding_process.rb new file mode 100644 index 0000000..79ec878 --- /dev/null +++ b/lib/discourse_chatbot/embedding_process.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true +require "openai" + +module ::DiscourseChatbot + + EMBEDDING_MODEL = "text-embedding-ada-002".freeze + + class EmbeddingProcess + + def initialize + if SiteSetting.chatbot_azure_open_ai_model_url.include?("azure") + ::OpenAI.configure do |config| + config.access_token = SiteSetting.chatbot_azure_open_ai_token + config.uri_base = SiteSetting.chatbot_azure_open_ai_model_url + config.api_type = :azure + config.api_version = "2023-05-15" + end + @client = ::OpenAI::Client.new + else + @client = ::OpenAI::Client.new(access_token: SiteSetting.chatbot_open_ai_token) + end + end + + def upsert_embedding(post_id) + response = @client.embeddings( + parameters: { + model: EMBEDDING_MODEL, + input: ::Post.find(post_id).raw + } + ) + + embedding_vector = response.dig("data", 0, "embedding") + + ::DiscourseChatbot::Embedding.upsert({post_id: post_id, embedding: embedding_vector}, on_duplicate: :update, unique_by: :post_id) + end + + def semantic_search(query) + response = @client.embeddings( + parameters: { + model: EMBEDDING_MODEL, + input: query + } + ) + + query_vector = response.dig("data", 0, "embedding") + + begin + search_result_post_ids = + DB.query(<<~SQL, query_embedding: query_vector, limit: 8).map( + SELECT + post_id + FROM + chatbot_embeddings + ORDER BY + embedding::real[] <-> array[:query_embedding] + LIMIT :limit + SQL + &:post_id + ) + rescue PG::Error => e + Rails.logger.error( + "Error #{e} querying embeddings for search #{query}", + ) + raise MissingEmbeddingError + end + search_result_post_ids + end + end +end diff --git a/lib/tasks/chatbot.rake b/lib/tasks/chatbot.rake new file mode 100644 index 0000000..1ba2da9 --- /dev/null +++ b/lib/tasks/chatbot.rake @@ -0,0 +1,66 @@ +desc "Update embeddings for each post" +task "chatbot:refresh_embeddings", %i[delay] => :environment do |_, args| + ENV["RAILS_DB"] ? refresh_embeddings(args) : refresh_embeddings_all_sites(args) +end + +desc "Refresh embeddings for all posts matching string/regex and optionally delay the loop" +task "chatbot:refresh_embeddings_match", %i[pattern type delay] => [:environment] do |_, args| + args.with_defaults(type: "string") + pattern = args[:pattern] + type = args[:type]&.downcase + delay = args[:delay]&.to_i + + if !pattern + puts "ERROR: Expecting rake chatbot:refresh_embeddings_match[pattern,type,delay]" + exit 1 + elsif delay && delay < 1 + puts "ERROR: delay parameter should be an integer and greater than 0" + exit 1 + elsif type != "string" && type != "regex" + puts "ERROR: Expecting rake chatbot:refresh_embeddings_match[pattern,type] where type is string or regex" + exit 1 + end + + search = Post.raw_match(pattern, type) + + refreshed = 0 + total = search.count + + search.find_each do |post| + post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + post_embedding.upsert_embedding(post.id) + print_status(refreshed += 1, total) + sleep(delay) if delay + end + + puts "", "#{refreshed} posts done!", "" +end + +def refresh_embeddings_all_sites(opts) + RailsMultisite::ConnectionManagement.each_connection { |db| refresh_embeddings(opts)} +end + +def refresh_embeddings(opts) + puts "Refreshing embeddings for all posts for '#{RailsMultisite::ConnectionManagement.current_db}'" + + begin + total = Post.count + refreshed = 0 + batch = 1000 + + (0..(total - 1).abs).step(batch) do |i| + Post + .order(id: :desc) + .offset(i) + .limit(batch) + .each do |post| + post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + post_embedding.upsert_embedding(post.id) + print_status(refreshed += 1, total) + sleep(delay) if delay + end + end + end + + puts "", "#{refreshed} posts done!", "-" * 50 +end diff --git a/plugin.rb b/plugin.rb index 12e4229..a3f7dd4 100644 --- a/plugin.rb +++ b/plugin.rb @@ -48,6 +48,10 @@ def progress_debug_message(message) %w( ../lib/discourse_chatbot/event_evaluation.rb + ../app/models/embedding.rb + ../lib/discourse_chatbot/embedding_process.rb + ../lib/tasks/chatbot.rake + ../app/jobs/regular/chatbot_post_embedding_job.rb ../lib/discourse_chatbot/message/message_evaluation.rb ../lib/discourse_chatbot/post/post_evaluation.rb ../lib/discourse_chatbot/bot.rb From 05dee2854ef99ff712f037bc30635b6c56bace35 Mon Sep 17 00:00:00 2001 From: merefield Date: Mon, 21 Aug 2023 10:57:48 +0100 Subject: [PATCH 02/28] add semantic search function, post event embedding maintenance --- .../chatbot_post_embedding_delete_job.rb | 18 +++++++ .../regular/chatbot_post_embedding_job.rb | 14 +++-- lib/discourse_chatbot/bots/open_ai_agent.rb | 4 +- .../functions/forum_search_function.rb | 53 +++++++++++++++++++ lib/tasks/chatbot.rake | 13 +++-- plugin.rb | 49 ++++++++++++++--- 6 files changed, 137 insertions(+), 14 deletions(-) create mode 100644 app/jobs/regular/chatbot_post_embedding_delete_job.rb create mode 100644 lib/discourse_chatbot/functions/forum_search_function.rb diff --git a/app/jobs/regular/chatbot_post_embedding_delete_job.rb b/app/jobs/regular/chatbot_post_embedding_delete_job.rb new file mode 100644 index 0000000..681605a --- /dev/null +++ b/app/jobs/regular/chatbot_post_embedding_delete_job.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +# Job is triggered on a Post destruction. +class ::Jobs::ChatbotPostEmbeddingDeleteJob < Jobs::Base + sidekiq_options retry: false + + def execute(opts) + begin + post_id = opts[:id] + + ::DiscourseChatbot.progress_debug_message("101. Deleting a Post Embedding for Post id: #{post_id}") + + ::DiscourseChatbot::Embedding.find_by(post_id: post_id).destroy! + rescue => e + Rails.logger.error ("OpenAIBot Post Embedding: There was a problem, but will retry til limit: #{e}") + end + end +end diff --git a/app/jobs/regular/chatbot_post_embedding_job.rb b/app/jobs/regular/chatbot_post_embedding_job.rb index 4e90703..700f417 100644 --- a/app/jobs/regular/chatbot_post_embedding_job.rb +++ b/app/jobs/regular/chatbot_post_embedding_job.rb @@ -2,11 +2,19 @@ # Job is triggered on an update to a Post. class ::Jobs::ChatbotPostEmbeddingJob < Jobs::Base + sidekiq_options retry: 5, dead: false + def execute(opts) - post_id = opts[:post_id] + begin + post_id = opts[:id] + + ::DiscourseChatbot.progress_debug_message("100. Creating/updating a Post Embedding for Post id: #{post_id}") - post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + post_embedding = ::DiscourseChatbot::EmbeddingProcess.new - post_embedding.upsert_embedding(post_id) + post_embedding.upsert_embedding(post_id) + rescue => e + Rails.logger.error ("OpenAIBot Post Embedding: There was a problem, but will retry til limit: #{e}") + end end end diff --git a/lib/discourse_chatbot/bots/open_ai_agent.rb b/lib/discourse_chatbot/bots/open_ai_agent.rb index e70e210..b529397 100644 --- a/lib/discourse_chatbot/bots/open_ai_agent.rb +++ b/lib/discourse_chatbot/bots/open_ai_agent.rb @@ -24,8 +24,10 @@ def initialize wikipedia_function = ::DiscourseChatbot::WikipediaFunction.new news_function = ::DiscourseChatbot::NewsFunction.new google_search_function = ::DiscourseChatbot::GoogleSearchFunction.new + forum_search_function = ::DiscourseChatbot::ForumSearchFunction.new stock_data_function = ::DiscourseChatbot::StockDataFunction.new - functions = [calculator_function, wikipedia_function] + + functions = [calculator_function, wikipedia_function, forum_search_function] functions << news_function if !SiteSetting.chatbot_news_api_token.blank? functions << google_search_function if !SiteSetting.chatbot_serp_api_key.blank? diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb new file mode 100644 index 0000000..ae08323 --- /dev/null +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require_relative '../function' + +module DiscourseChatbot + class ForumSearchFunction < Function + + def name + 'local_forum_search' + end + + def description + <<~EOS + Search the local forum for information that may help you answer the question. Especially useful when the forum specialises in the subject matter of the query. + Searching the local forum is preferable to searching google or the internet and should be considered higher priority. + + Input should be a search query. + + Outputs text from the Post and a url you can provide the user with to link them to the relevant Post. + EOS + end + + def parameters + [ + { name: "query", type: String, description: "search query for looking up information on the forum" } , + ] + end + + def required + ['query'] + end + + def process(args) + begin + super(args) + query = args[parameters[0][:name]] + + post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + results = post_embedding.semantic_search(query) + + top_result = results[0].to_i + + top_post = ::Post.find(top_result) + url = "https://localhost:4200/t/slug/#{top_post.topic_id}/#{top_post.post_number}" + raw = top_post.raw + + "The top Post on the forum with related information can be accessed here: #{url} and the text is #{raw}" + rescue + "\"#{args[parameters[0][:name]]}\": my search for this on the forum failed." + end + end + end +end \ No newline at end of file diff --git a/lib/tasks/chatbot.rake b/lib/tasks/chatbot.rake index 1ba2da9..6b39520 100644 --- a/lib/tasks/chatbot.rake +++ b/lib/tasks/chatbot.rake @@ -36,13 +36,20 @@ task "chatbot:refresh_embeddings_match", %i[pattern type delay] => [:environment puts "", "#{refreshed} posts done!", "" end -def refresh_embeddings_all_sites(opts) - RailsMultisite::ConnectionManagement.each_connection { |db| refresh_embeddings(opts)} +def refresh_embeddings_all_sites(args) + RailsMultisite::ConnectionManagement.each_connection { |db| refresh_embeddings(args)} end -def refresh_embeddings(opts) +def refresh_embeddings(args) puts "Refreshing embeddings for all posts for '#{RailsMultisite::ConnectionManagement.current_db}'" + delay = args[:delay]&.to_i + + if delay && delay < 1 + puts "ERROR: delay parameter should be an integer and greater than 0" + exit 1 + end + begin total = Post.count refreshed = 0 diff --git a/plugin.rb b/plugin.rb index a3f7dd4..1a3507c 100644 --- a/plugin.rb +++ b/plugin.rb @@ -62,6 +62,7 @@ def progress_debug_message(message) ../lib/discourse_chatbot/functions/news_function.rb ../lib/discourse_chatbot/functions/wikipedia_function.rb ../lib/discourse_chatbot/functions/google_search_function.rb + ../lib/discourse_chatbot/functions/forum_search_function.rb ../lib/discourse_chatbot/functions/stock_data_function.rb ../lib/discourse_chatbot/functions/parser.rb ../lib/discourse_chatbot/prompt_utils.rb @@ -81,19 +82,53 @@ def progress_debug_message(message) DiscourseEvent.on(:post_created) do |*params| post, opts, user = params - if SiteSetting.chatbot_enabled && (post.post_type == 1 || post.post_type == 4 && SiteSetting.chatbot_can_trigger_from_whisper) - ::DiscourseChatbot.progress_debug_message("1. trigger") + if SiteSetting.chatbot_enabled + if post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingJob + job_class.perform_async(post.as_json) + end - bot_username = SiteSetting.chatbot_bot_user - bot_user = User.find_by(username: bot_username) + if (post.post_type == 1 || post.post_type == 4 && SiteSetting.chatbot_can_trigger_from_whisper) + ::DiscourseChatbot.progress_debug_message("1. trigger") - if bot_user && (user.id != bot_user.id) - event_evaluation = ::DiscourseChatbot::PostEvaluation.new - event_evaluation.on_submission(post) + bot_username = SiteSetting.chatbot_bot_user + bot_user = User.find_by(username: bot_username) + + if bot_user && (user.id != bot_user.id) + event_evaluation = ::DiscourseChatbot::PostEvaluation.new + event_evaluation.on_submission(post) + end end end end + DiscourseEvent.on(:post_edited) do |*params| + post, opts = params + + if SiteSetting.chatbot_enabled && post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingJob + job_class.perform_async(post.as_json) + end + end + + DiscourseEvent.on(:post_recovered) do |*params| + post, opts = params + + if SiteSetting.chatbot_enabled && post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingJob + job_class.perform_async(post.as_json) + end + end + + DiscourseEvent.on(:post_destroyed) do |*params| + post, opts, user = params + + if SiteSetting.chatbot_enabled && post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingDeleteJob + job_class.perform_async(post.as_json) + end + end + DiscourseEvent.on(:chat_message_created) do |*params| chat_message, chat_channel, user = params From 47722b909b19ec7119e12dbe12b63a6b292bedfb Mon Sep 17 00:00:00 2001 From: merefield Date: Mon, 21 Aug 2023 15:34:05 +0100 Subject: [PATCH 03/28] move narrative strings to localisation file, rubocop --- config/locales/server.en.yml | 76 +++++++++++++++++++ ...0010103_create_chatbot_embeddings_table.rb | 2 +- lib/discourse_chatbot/embedding_process.rb | 18 ++--- .../functions/calculator_function.rb | 22 +----- .../functions/forum_search_function.rb | 17 ++--- .../functions/google_search_function.rb | 13 +--- .../functions/news_function.rb | 16 ++-- .../functions/stock_data_function.rb | 12 ++- .../functions/wikipedia_function.rb | 13 +--- lib/tasks/chatbot.rake | 5 +- plugin.rb | 6 +- 11 files changed, 116 insertions(+), 84 deletions(-) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index ac50a05..c4ad287 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -46,6 +46,82 @@ en: title: "The subject of this conversation is %{topic_title}" first_post: "The first thing someone said was %{username} who said %{raw}" post: "%{username} said %{raw}" + function: + calculator: + description: | + Useful for getting the result of a math expression. It is a general purpose calculator. It works with Ruby expressions. + + You can retrieve the current date from it too and using the core Ruby Time method to calculate dates. + + The input to this tool should be a valid mathematical expression that could be executed by the base Ruby programming language with no extensions. + + Be certain to prefix any functions with 'Math.' + + Usage: + Action Input: 1 + 1 + Action Input: 3 * 2 / 4 + Action Input: 9 - 7 + Action Input: Time.now - 2 * 24 * 60 * 60 + Action Input: Math.cbrt(13) + Math.cbrt(12) + Action Input: Math.sqrt(8) + Action Input: (4.1 + 2.3) / (2.0 - 5.6) * 3 + parameters: + input: the mathematical expression you need to process and get the answer to. Make sure it is Ruby compatible. + error: "'%{parameter}' is an invalid mathematical expression, make sure if you are trying to calculate dates use Ruby Time class" + forum_search: + description: | + Search the local forum for information that may help you answer the question. Especially useful when the forum specialises in the subject matter of the query. + Searching the local forum is preferable to searching google or the internet and should be considered higher priority. It is quicker and cheaper. + + Input should be a search query. + + Outputs text from the Post and a url you can provide the user with to link them to the relevant Post. + parameters: + query: "search query for looking up information on the forum" + answer: "The top Post on the forum with related information can be accessed here: %{url} and the text is %{raw}" + error: "'%{query}': my search for this on the forum failed." + google_search: + description: | + A wrapper around Google Search. + + Useful for when you need to answer questions about current events. + Always one of the first options when you need to find information on internet. + + Input should be a search query. + parameters: + query: "search query for looking up information on the internet" + error: "%{query}: my search for this on the internet failed." + news: + description: | + A wrapper around the News API. + + Useful for when you need to answer questions about current events in the news, current events or affairs. + + Input should be a search query and a date from which to search news, so if the request is today, the search should be for todays date + parameters: + query: "query string for searching current news and events" + start_date: "start date from which to search for news in format YYYY-MM-DD" + answer: "The latest news about this is: " + error: "ERROR: Had trouble retrieving the news!" + stock_data: + description: | + An API for MarketStack stock data. You need to call it using the stock ticker. You can optionally also provide a specific date. + parameters: + ticker: "ticker for share or stock query" + date: "date for data in format YYYY-MM-DD" + answer: "Ticker %{ticker} had a day close of %{close} on %{date}, with a high of %{high} and a low of %{low}" + error: "ERROR: Had trouble retrieving information from Market Stack for stock market information!" + wikipedia: + description: | + A wrapper around Wikipedia. + + Useful for when you need to answer general questions about + people, places, companies, facts, historical events, or other subjects. + + Input should be a search query + parameters: + query: "query string for wikipedia search" + error: "ERROR: Had trouble retrieving information from Wikipedia!" errors: general: "Sorry, I'm not well right now. Lets talk some other time. Meanwhile, please ask the admin to check the logs, thank you!" retries: "I've tried working out a response for you several times, but ultimately failed. Please contact the admin if this persists, thank you!" diff --git a/db/migrate/20230820010103_create_chatbot_embeddings_table.rb b/db/migrate/20230820010103_create_chatbot_embeddings_table.rb index 53cfdb5..37bc53e 100644 --- a/db/migrate/20230820010103_create_chatbot_embeddings_table.rb +++ b/db/migrate/20230820010103_create_chatbot_embeddings_table.rb @@ -3,7 +3,7 @@ class CreateChatbotEmbeddingsTable < ActiveRecord::Migration[7.0] def change create_table :chatbot_embeddings do |t| - t.integer :post_id, null: false, index: { unique: true }, foreign_key: true + t.integer :post_id, null: false, index: { unique: true }, foreign_key: true t.column :embedding, "real[]", null: false t.timestamps end diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/embedding_process.rb index 79ec878..c2eab00 100644 --- a/lib/discourse_chatbot/embedding_process.rb +++ b/lib/discourse_chatbot/embedding_process.rb @@ -31,7 +31,7 @@ def upsert_embedding(post_id) embedding_vector = response.dig("data", 0, "embedding") - ::DiscourseChatbot::Embedding.upsert({post_id: post_id, embedding: embedding_vector}, on_duplicate: :update, unique_by: :post_id) + ::DiscourseChatbot::Embedding.upsert({ post_id: post_id, embedding: embedding_vector }, on_duplicate: :update, unique_by: :post_id) end def semantic_search(query) @@ -45,8 +45,8 @@ def semantic_search(query) query_vector = response.dig("data", 0, "embedding") begin - search_result_post_ids = - DB.query(<<~SQL, query_embedding: query_vector, limit: 8).map( + search_result_post_ids = + DB.query(<<~SQL, query_embedding: query_vector, limit: 8).map( SELECT post_id FROM @@ -55,13 +55,13 @@ def semantic_search(query) embedding::real[] <-> array[:query_embedding] LIMIT :limit SQL - &:post_id + &:post_id + ) + rescue PG::Error => e + Rails.logger.error( + "Error #{e} querying embeddings for search #{query}", ) - rescue PG::Error => e - Rails.logger.error( - "Error #{e} querying embeddings for search #{query}", - ) - raise MissingEmbeddingError + raise MissingEmbeddingError end search_result_post_ids end diff --git a/lib/discourse_chatbot/functions/calculator_function.rb b/lib/discourse_chatbot/functions/calculator_function.rb index 0dbdf2b..626001b 100644 --- a/lib/discourse_chatbot/functions/calculator_function.rb +++ b/lib/discourse_chatbot/functions/calculator_function.rb @@ -10,28 +10,12 @@ def name end def description - <<~EOS - Useful for getting the result of a math expression. It is a general purpose calculator. It works with Ruby expressions. - - You can retrieve the current date from it too and using the core Ruby Time method to calculate dates. - - The input to this tool should be a valid mathematical expression that could be executed by the base Ruby programming language with no extensions. - - Be certain to prefix any functions with 'Math.' - Usage: - Action Input: 1 + 1 - Action Input: 3 * 2 / 4 - Action Input: 9 - 7 - Action Input: Time.now - 2 * 24 * 60 * 60 - Action Input: Math.cbrt(13) + Math.cbrt(12) - Action Input: Math.sqrt(8) - Action Input: (4.1 + 2.3) / (2.0 - 5.6) * 3" - EOS + I18n.t("chatbot.prompt.function.calculator.description") end def parameters [ - { name: "input", type: String, description: "the mathematical expression you need to process and get the answer to. Make sure it is Ruby compatible." } , + { name: "input", type: String, description: I18n.t("chatbot.prompt.function.calculator.parameters.input") } , ] end @@ -45,7 +29,7 @@ def process(args) SafeRuby.eval(args[parameters[0][:name]], timeout: 5) rescue - "\"#{args[parameters[0][:name]]}\" is an invalid mathematical expression, make sure if you are trying to calculate dates use Ruby Time class" + I18n.t("chatbot.prompt.function.calculator.error", parameter: args[parameters[0][:name]]) end end end diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb index ae08323..25fca33 100644 --- a/lib/discourse_chatbot/functions/forum_search_function.rb +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -10,19 +10,12 @@ def name end def description - <<~EOS - Search the local forum for information that may help you answer the question. Especially useful when the forum specialises in the subject matter of the query. - Searching the local forum is preferable to searching google or the internet and should be considered higher priority. - - Input should be a search query. - - Outputs text from the Post and a url you can provide the user with to link them to the relevant Post. - EOS + I18n.t("chatbot.prompt.function.forum_search.description") end def parameters [ - { name: "query", type: String, description: "search query for looking up information on the forum" } , + { name: "query", type: String, description: I18n.t("chatbot.prompt.function.forum_search.parameters.query") } , ] end @@ -44,10 +37,10 @@ def process(args) url = "https://localhost:4200/t/slug/#{top_post.topic_id}/#{top_post.post_number}" raw = top_post.raw - "The top Post on the forum with related information can be accessed here: #{url} and the text is #{raw}" + I18n.t("chatbot.prompt.function.forum_search.answer", url: url, raw: raw) rescue - "\"#{args[parameters[0][:name]]}\": my search for this on the forum failed." + I18n.t("chatbot.prompt.function.forum_search.error", query: args[parameters[0][:name]]) end end end -end \ No newline at end of file +end diff --git a/lib/discourse_chatbot/functions/google_search_function.rb b/lib/discourse_chatbot/functions/google_search_function.rb index 25743fb..d8b1aaf 100644 --- a/lib/discourse_chatbot/functions/google_search_function.rb +++ b/lib/discourse_chatbot/functions/google_search_function.rb @@ -11,19 +11,12 @@ def name end def description - <<~EOS - A wrapper around Google Search. - - Useful for when you need to answer questions about current events. - Always one of the first options when you need to find information on internet. - - Input should be a search query. - EOS + I18n.t("chatbot.prompt.function.google_search.description") end def parameters [ - { name: "query", type: String, description: "search query for looking up information on the internet" } , + { name: "query", type: String, description: I18n.t("chatbot.prompt.function.google_search.parameters.query") } , ] end @@ -42,7 +35,7 @@ def process(args) hash_results.dig(:answer_box, :snippet) || hash_results.dig(:organic_results, 0, :snippet) rescue - "\"#{args[parameters[0][:name]]}\": my search for this on the internet failed." + I18n.t("chatbot.prompt.function.google_search.error", query: args[parameters[0][:name]]) end end end diff --git a/lib/discourse_chatbot/functions/news_function.rb b/lib/discourse_chatbot/functions/news_function.rb index f08865a..8a707d2 100644 --- a/lib/discourse_chatbot/functions/news_function.rb +++ b/lib/discourse_chatbot/functions/news_function.rb @@ -10,19 +10,13 @@ def name end def description - <<~EOS - A wrapper around the News API. - - Useful for when you need to answer questions about current events in the news, current events or affairs. - - Input should be a search query and a date from which to search news, so if the request is today, the search should be for todays date' - EOS + I18n.t("chatbot.prompt.function.news.description") end def parameters [ - { name: 'query', type: String, description: "query string for searching current news and events" }, - { name: 'start_date', type: String, description: "start date from which to search for news in format YYYY-MM-DD" } + { name: 'query', type: String, description: I18n.t("chatbot.prompt.function.news.parameters.query") }, + { name: 'start_date', type: String, description: I18n.t("chatbot.prompt.function.news.parameters.start_date") } ] end @@ -57,13 +51,13 @@ def process(args) all_articles = response_body["articles"] - news = "The latest news about this is: " + news = I18n.t("chatbot.prompt.function.news.answer") all_articles.each do |a| news += "#{a["title"]}. " end news rescue - "ERROR: Had trouble retrieving the news!" + I18n.t("chatbot.prompt.function.news.error") end end end diff --git a/lib/discourse_chatbot/functions/stock_data_function.rb b/lib/discourse_chatbot/functions/stock_data_function.rb index 4633559..17f5f45 100644 --- a/lib/discourse_chatbot/functions/stock_data_function.rb +++ b/lib/discourse_chatbot/functions/stock_data_function.rb @@ -14,15 +14,13 @@ def name end def description - <<~EOS - An API for MarketStack stock data. You need to call it using the stock ticker. You can optionally also provide a specific date. - EOS + I18n.t("chatbot.prompt.function.stock_data.description") end def parameters [ - { name: 'ticker', type: String, description: "ticker for share or stock query" }, - { name: 'date', type: String, description: "date for data in format YYYY-MM-DD" } + { name: 'ticker', type: String, description: I18n.t("chatbot.prompt.function.stock_data.parameters.ticker") }, + { name: 'date', type: String, description: I18n.t("chatbot.prompt.function.stock_data.parameters.date") } ] end @@ -58,9 +56,9 @@ def process(args) stock_data = api_response['data'][0] - "Ticker #{stock_data['symbol']} had a day close of #{stock_data['close'].to_s} on #{stock_data['date'].to_s}, with a high of #{stock_data['high'].to_s} and a low of #{stock_data['low'].to_s}" + I18n.t("chatbot.prompt.function.stock_data.answer", ticker: stock_data['symbol'], close: stock_data['close'].to_s, date: stock_data['date'].to_s, high: stock_data['high'].to_s, low: stock_data['low'].to_s) rescue - "ERROR: Had trouble retrieving information from Market Stack for stock market information!" + I18n.t("chatbot.prompt.function.stock_data.error") end end end diff --git a/lib/discourse_chatbot/functions/wikipedia_function.rb b/lib/discourse_chatbot/functions/wikipedia_function.rb index 3678d2a..596b2d4 100644 --- a/lib/discourse_chatbot/functions/wikipedia_function.rb +++ b/lib/discourse_chatbot/functions/wikipedia_function.rb @@ -12,19 +12,12 @@ def name end def description - <<~EOS - A wrapper around Wikipedia. - - Useful for when you need to answer general questions about - people, places, companies, facts, historical events, or other subjects. - - Input should be a search query - EOS + I18n.t("chatbot.prompt.function.wikipedia.description") end def parameters [ - { name: 'query', type: String, description: "query string for wikipedia search" } + { name: 'query', type: String, description: I18n.t("chatbot.prompt.function.wikipedia.parameters.query") } ] end @@ -40,7 +33,7 @@ def process(args) page.summary rescue - "ERROR: Had trouble retrieving information from Wikipedia!" + I18n.t("chatbot.prompt.function.wikipedia.error") end end end diff --git a/lib/tasks/chatbot.rake b/lib/tasks/chatbot.rake index 6b39520..6d9bd48 100644 --- a/lib/tasks/chatbot.rake +++ b/lib/tasks/chatbot.rake @@ -1,3 +1,4 @@ +# frozen_string_literal: true desc "Update embeddings for each post" task "chatbot:refresh_embeddings", %i[delay] => :environment do |_, args| ENV["RAILS_DB"] ? refresh_embeddings(args) : refresh_embeddings_all_sites(args) @@ -37,7 +38,7 @@ task "chatbot:refresh_embeddings_match", %i[pattern type delay] => [:environment end def refresh_embeddings_all_sites(args) - RailsMultisite::ConnectionManagement.each_connection { |db| refresh_embeddings(args)} + RailsMultisite::ConnectionManagement.each_connection { |db| refresh_embeddings(args) } end def refresh_embeddings(args) @@ -54,7 +55,7 @@ def refresh_embeddings(args) total = Post.count refreshed = 0 batch = 1000 - + (0..(total - 1).abs).step(batch) do |i| Post .order(id: :desc) diff --git a/plugin.rb b/plugin.rb index 1a3507c..3a8f4ea 100644 --- a/plugin.rb +++ b/plugin.rb @@ -82,7 +82,7 @@ def progress_debug_message(message) DiscourseEvent.on(:post_created) do |*params| post, opts, user = params - if SiteSetting.chatbot_enabled + if SiteSetting.chatbot_enabled if post.post_type == 1 job_class = ::Jobs::ChatbotPostEmbeddingJob job_class.perform_async(post.as_json) @@ -113,7 +113,7 @@ def progress_debug_message(message) DiscourseEvent.on(:post_recovered) do |*params| post, opts = params - + if SiteSetting.chatbot_enabled && post.post_type == 1 job_class = ::Jobs::ChatbotPostEmbeddingJob job_class.perform_async(post.as_json) @@ -125,7 +125,7 @@ def progress_debug_message(message) if SiteSetting.chatbot_enabled && post.post_type == 1 job_class = ::Jobs::ChatbotPostEmbeddingDeleteJob - job_class.perform_async(post.as_json) + job_class.perform_async(post.as_json) end end From a580570902fd6875f6c39da23fb218ab163d69ec Mon Sep 17 00:00:00 2001 From: merefield Date: Mon, 21 Aug 2023 18:34:48 +0100 Subject: [PATCH 04/28] move prompt text from agent to localisation --- config/locales/server.en.yml | 13 ++++++++++++- lib/discourse_chatbot/bots/open_ai_agent.rb | 12 +++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index c4ad287..95b8c1f 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -75,7 +75,7 @@ en: Input should be a search query. - Outputs text from the Post and a url you can provide the user with to link them to the relevant Post. + Outputs text from the Post and a url link to it you can provide the user. parameters: query: "search query for looking up information on the forum" answer: "The top Post on the forum with related information can be accessed here: %{url} and the text is %{raw}" @@ -121,7 +121,18 @@ en: Input should be a search query parameters: query: "query string for wikipedia search" + answer: "The relevant wikipedia page has the following summary: '%{summary}' and the article can be found at this url link: %{url}" error: "ERROR: Had trouble retrieving information from Wikipedia!" + agent: + handle_function_call: + answer: "The answer is %{result}." + call_function: + error: "There was something wrong with your function arguments" + final_thought_answer: + opener: "To answer the question I will use these step by step instructions.\n\n" + thought_declaration: "I will use the %{function_name} function to calculate the answer with arguments %{arguments}.\n\n" + final_thought: "%{thoughts} Based on the above, I will now answer the question, this message will only be seen by me so answer with the assumption with that the user has not seen this message." + errors: general: "Sorry, I'm not well right now. Lets talk some other time. Meanwhile, please ask the admin to check the logs, thank you!" retries: "I've tried working out a response for you several times, but ultimately failed. Please contact the admin if this persists, thank you!" diff --git a/lib/discourse_chatbot/bots/open_ai_agent.rb b/lib/discourse_chatbot/bots/open_ai_agent.rb index b529397..3da8252 100644 --- a/lib/discourse_chatbot/bots/open_ai_agent.rb +++ b/lib/discourse_chatbot/bots/open_ai_agent.rb @@ -118,7 +118,7 @@ def handle_function_call(res) func_name = first_message["function_call"]["name"] args_str = first_message["function_call"]["arguments"] result = call_function(func_name, args_str) - res_msg = { 'role' => 'assistant', 'content' => "The answer is #{result}." } + res_msg = { 'role' => 'assistant', 'content' => I18n.t("chatbot.prompt.agent.handle_function_call.answer", result: result) } @internal_thoughts << res_msg end @@ -134,23 +134,25 @@ def call_function(func_name, args_str) res = func.process(args) res rescue - "There was something wrong with your function arguments" + I18n.t("chatbot.prompt.agent.call_function.error") end end def final_thought_answer - thoughts = "To answer the question I will use these step by step instructions.\n\n" + thoughts = I18n.t("chatbot.prompt.agent.final_thought_answer.opener") @internal_thoughts.each do |thought| if thought.key?('function_call') - thoughts += "I will use the #{thought['function_call']['name']} function to calculate the answer with arguments #{thought['function_call']['arguments']}.\n\n" + thoughts += I18n.t("chatbot.prompt.agent.final_thought_answer.function_declaration", function_name: thought['function_call']['name'], arguments: thought['function_call']['arguments']) else thoughts += "#{thought['content']}\n\n" end end + final_thought = { 'role' => 'assistant', - 'content' => "#{thoughts} Based on the above, I will now answer the question, this message will only be seen by me so answer with the assumption with that the user has not seen this message." + 'content' => I18n.t("chatbot.prompt.agent.final_thought_answer.final_thought", thoughts: thoughts) } + final_thought end From 688579c88ff9e4d8faccf24fd94687f9e13e1617 Mon Sep 17 00:00:00 2001 From: merefield Date: Mon, 21 Aug 2023 18:36:06 +0100 Subject: [PATCH 05/28] improve wikipedia output to include link to source page --- lib/discourse_chatbot/functions/wikipedia_function.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/discourse_chatbot/functions/wikipedia_function.rb b/lib/discourse_chatbot/functions/wikipedia_function.rb index 596b2d4..35653c3 100644 --- a/lib/discourse_chatbot/functions/wikipedia_function.rb +++ b/lib/discourse_chatbot/functions/wikipedia_function.rb @@ -31,7 +31,7 @@ def process(args) page = ::Wikipedia.find(args[parameters[0][:name]]) - page.summary + I18n.t("chatbot.prompt.function.wikipedia.answer", summary: page.summary, url: page.fullurl) rescue I18n.t("chatbot.prompt.function.wikipedia.error") end From 9842864c6db6c1f25a715b641a893392967cbf44 Mon Sep 17 00:00:00 2001 From: merefield Date: Mon, 21 Aug 2023 19:00:49 +0100 Subject: [PATCH 06/28] experimental improvement to forum search --- config/locales/server.en.yml | 3 ++- .../functions/forum_search_function.rb | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 95b8c1f..bf61380 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -78,7 +78,8 @@ en: Outputs text from the Post and a url link to it you can provide the user. parameters: query: "search query for looking up information on the forum" - answer: "The top Post on the forum with related information can be accessed here: %{url} and the text is %{raw}" + answer_summary: "The top three posts on the forum related to this query are, best match first:\n\n" + answer: "Number %{rank}: the post is at this link url: %{url} and the text is '%{raw}'.\n\n" error: "'%{query}': my search for this on the forum failed." google_search: description: | diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb index 25fca33..1fecd81 100644 --- a/lib/discourse_chatbot/functions/forum_search_function.rb +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -31,13 +31,17 @@ def process(args) post_embedding = ::DiscourseChatbot::EmbeddingProcess.new results = post_embedding.semantic_search(query) - top_result = results[0].to_i - - top_post = ::Post.find(top_result) - url = "https://localhost:4200/t/slug/#{top_post.topic_id}/#{top_post.post_number}" - raw = top_post.raw - - I18n.t("chatbot.prompt.function.forum_search.answer", url: url, raw: raw) + top_results = results[0..2] + + response = I18n.t("chatbot.prompt.function.forum_search.answer_summary") + + top_results.each_with_index do |result, index| + current_post = ::Post.find(result.to_i) + url = "#{Discourse.current_hostname}/t/slug/#{current_post.topic_id}/#{current_post.post_number}" + raw = current_post.raw + response += I18n.t("chatbot.prompt.function.forum_search.answer", url: url, raw: raw, rank: index + 1) + end + response rescue I18n.t("chatbot.prompt.function.forum_search.error", query: args[parameters[0][:name]]) end From 6a6045abf7d2ef84c45d9cbbf4a61e1104cdd136 Mon Sep 17 00:00:00 2001 From: merefield Date: Tue, 22 Aug 2023 10:38:06 +0100 Subject: [PATCH 07/28] Dont embed posts hidden to basic users --- lib/discourse_chatbot/embedding_process.rb | 31 +++++++++++++++------- plugin.rb | 2 +- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/embedding_process.rb index c2eab00..79a27dd 100644 --- a/lib/discourse_chatbot/embedding_process.rb +++ b/lib/discourse_chatbot/embedding_process.rb @@ -4,6 +4,7 @@ module ::DiscourseChatbot EMBEDDING_MODEL = "text-embedding-ada-002".freeze + CHAR_LIMIT = 32000 class EmbeddingProcess @@ -22,23 +23,35 @@ def initialize end def upsert_embedding(post_id) - response = @client.embeddings( - parameters: { - model: EMBEDDING_MODEL, - input: ::Post.find(post_id).raw - } - ) + benchmark_user = User.where(trust_level: 1, active: true, admin: false, suspended_at: nil).last + if benchmark_user.nil? + raise StandardError, "No benchmark user exists for Post embedding suitability check, please add a basic user" + end + benchmark_user_guardian = Guardian.new(benchmark_user) + + post = ::Post.find_by(id: post_id) + + return if post.nil? - embedding_vector = response.dig("data", 0, "embedding") + if benchmark_user_guardian.can_see?(post) + response = @client.embeddings( + parameters: { + model: EMBEDDING_MODEL, + input: post.raw[0..CHAR_LIMIT] + } + ) - ::DiscourseChatbot::Embedding.upsert({ post_id: post_id, embedding: embedding_vector }, on_duplicate: :update, unique_by: :post_id) + embedding_vector = response.dig("data", 0, "embedding") + + ::DiscourseChatbot::Embedding.upsert({ post_id: post_id, embedding: embedding_vector }, on_duplicate: :update, unique_by: :post_id) + end end def semantic_search(query) response = @client.embeddings( parameters: { model: EMBEDDING_MODEL, - input: query + input: query[0..CHAR_LIMIT] } ) diff --git a/plugin.rb b/plugin.rb index 3a8f4ea..68cb0e5 100644 --- a/plugin.rb +++ b/plugin.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # name: discourse-chatbot # about: a plugin that allows you to have a conversation with a configurable chatbot in Discourse Chat, Topics and Private Messages -# version: 0.29 +# version: 0.30 # authors: merefield # url: https://github.com/merefield/discourse-chatbot From 0bec60d0068448352fdcbefcf1bf953f6ab40723 Mon Sep 17 00:00:00 2001 From: merefield Date: Wed, 23 Aug 2023 12:44:45 +0100 Subject: [PATCH 08/28] add missing only option to embeddings rake task, remove load of rake task --- lib/tasks/chatbot.rake | 19 ++++++++++++++----- plugin.rb | 3 +-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/tasks/chatbot.rake b/lib/tasks/chatbot.rake index 6d9bd48..f2c812e 100644 --- a/lib/tasks/chatbot.rake +++ b/lib/tasks/chatbot.rake @@ -1,6 +1,6 @@ # frozen_string_literal: true desc "Update embeddings for each post" -task "chatbot:refresh_embeddings", %i[delay] => :environment do |_, args| +task "chatbot:refresh_embeddings", %i[missing_only delay] => :environment do |_, args| ENV["RAILS_DB"] ? refresh_embeddings(args) : refresh_embeddings_all_sites(args) end @@ -42,10 +42,17 @@ def refresh_embeddings_all_sites(args) end def refresh_embeddings(args) - puts "Refreshing embeddings for all posts for '#{RailsMultisite::ConnectionManagement.current_db}'" + puts "-" * 50 + puts "Refreshing embeddings for posts for '#{RailsMultisite::ConnectionManagement.current_db}'" + puts "-" * 50 + missing_only = args[:missing_only]&.to_i delay = args[:delay]&.to_i + puts "for missing only" if !missing_only.to_i.zero? + puts "with a delay of #{delay} second(s) between API calls" if !delay.to_i.zero? + puts "-" * 50 + if delay && delay < 1 puts "ERROR: delay parameter should be an integer and greater than 0" exit 1 @@ -62,10 +69,12 @@ def refresh_embeddings(args) .offset(i) .limit(batch) .each do |post| - post_embedding = ::DiscourseChatbot::EmbeddingProcess.new - post_embedding.upsert_embedding(post.id) + if !missing_only.to_i.zero? && ::DiscourseChatbot::Embedding.find_by(post_id: post.id).nil? || missing_only.to_i.zero? + post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + post_embedding.upsert_embedding(post.id) + sleep(delay) if delay + end print_status(refreshed += 1, total) - sleep(delay) if delay end end end diff --git a/plugin.rb b/plugin.rb index 68cb0e5..0afda27 100644 --- a/plugin.rb +++ b/plugin.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # name: discourse-chatbot # about: a plugin that allows you to have a conversation with a configurable chatbot in Discourse Chat, Topics and Private Messages -# version: 0.30 +# version: 0.31 # authors: merefield # url: https://github.com/merefield/discourse-chatbot @@ -50,7 +50,6 @@ def progress_debug_message(message) ../lib/discourse_chatbot/event_evaluation.rb ../app/models/embedding.rb ../lib/discourse_chatbot/embedding_process.rb - ../lib/tasks/chatbot.rake ../app/jobs/regular/chatbot_post_embedding_job.rb ../lib/discourse_chatbot/message/message_evaluation.rb ../lib/discourse_chatbot/post/post_evaluation.rb From f35012b12fa0c5b2dfc20514589a3d7a7bef362f Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 09:56:19 +0100 Subject: [PATCH 09/28] uprate vector search accuracy --- db/migrate/20230820010105_create_chatbot_embeddings_index.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/migrate/20230820010105_create_chatbot_embeddings_index.rb b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb index f0dce3f..a52490f 100644 --- a/db/migrate/20230820010105_create_chatbot_embeddings_index.rb +++ b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb @@ -4,7 +4,7 @@ class CreateChatbotEmbeddingsIndex < ActiveRecord::Migration[7.0] def up execute <<-SQL CREATE INDEX hnsw_index_on_chatbot_embeddings ON chatbot_embeddings USING hnsw(embedding) - WITH (dims=1536, m=8, efconstruction=8, efsearch=8); + WITH (dims=1536, m=32, efconstruction=32, efsearch=32); SQL end From 2c8b18ae10ce016c68ffcaa3aaf2b0f9e3a1a7f9 Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 10:07:00 +0100 Subject: [PATCH 10/28] uprate vector search accuracy --- db/migrate/20230820010105_create_chatbot_embeddings_index.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/migrate/20230820010105_create_chatbot_embeddings_index.rb b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb index a52490f..47d5d85 100644 --- a/db/migrate/20230820010105_create_chatbot_embeddings_index.rb +++ b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb @@ -4,7 +4,7 @@ class CreateChatbotEmbeddingsIndex < ActiveRecord::Migration[7.0] def up execute <<-SQL CREATE INDEX hnsw_index_on_chatbot_embeddings ON chatbot_embeddings USING hnsw(embedding) - WITH (dims=1536, m=32, efconstruction=32, efsearch=32); + WITH (dims=1536, m=64, efconstruction=64, efsearch=64); SQL end From 4a80d930ede7d29ab03ca4cf96f568dcf19f03ca Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 12:34:30 +0100 Subject: [PATCH 11/28] improve data in post search response --- config/locales/server.en.yml | 2 +- lib/discourse_chatbot/functions/forum_search_function.rb | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index bf61380..7518d05 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -79,7 +79,7 @@ en: parameters: query: "search query for looking up information on the forum" answer_summary: "The top three posts on the forum related to this query are, best match first:\n\n" - answer: "Number %{rank}: the post is at this link url: %{url} and the text is '%{raw}'.\n\n" + answer: "Number %{rank}: the post is at this web address: %{url}, it was written by '%{username}' on %{date} and the text is '%{raw}'.\n\n" error: "'%{query}': my search for this on the forum failed." google_search: description: | diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb index 1fecd81..34c505e 100644 --- a/lib/discourse_chatbot/functions/forum_search_function.rb +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -39,7 +39,9 @@ def process(args) current_post = ::Post.find(result.to_i) url = "#{Discourse.current_hostname}/t/slug/#{current_post.topic_id}/#{current_post.post_number}" raw = current_post.raw - response += I18n.t("chatbot.prompt.function.forum_search.answer", url: url, raw: raw, rank: index + 1) + username = User.find(current_post.user_id).username + date = current_post.created_at.to_date + response += I18n.t("chatbot.prompt.function.forum_search.answer", url: url, username: username, date: date, raw: raw, rank: index + 1) end response rescue From ad1a83c98e962b75e0b2dcebfea0a6a839a1054d Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 12:48:42 +0100 Subject: [PATCH 12/28] improve bot class inheritance --- lib/discourse_chatbot/bots/open_ai_agent.rb | 4 +-- lib/discourse_chatbot/bots/open_ai_bot.rb | 6 ++-- .../bots/open_ai_bot_base.rb | 35 +++++++++++++++++++ plugin.rb | 1 + 4 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 lib/discourse_chatbot/bots/open_ai_bot_base.rb diff --git a/lib/discourse_chatbot/bots/open_ai_agent.rb b/lib/discourse_chatbot/bots/open_ai_agent.rb index 5f807ba..3dacf4a 100644 --- a/lib/discourse_chatbot/bots/open_ai_agent.rb +++ b/lib/discourse_chatbot/bots/open_ai_agent.rb @@ -3,13 +3,11 @@ module ::DiscourseChatbot - class OpenAIAgent < Bot + class OpenAiAgent < OpenAiBotBase def initialize super - @model_name = SiteSetting.chatbot_open_ai_model_custom ? SiteSetting.chatbot_open_ai_model_custom_name : SiteSetting.chatbot_open_ai_model - calculator_function = ::DiscourseChatbot::CalculatorFunction.new wikipedia_function = ::DiscourseChatbot::WikipediaFunction.new news_function = ::DiscourseChatbot::NewsFunction.new diff --git a/lib/discourse_chatbot/bots/open_ai_bot.rb b/lib/discourse_chatbot/bots/open_ai_bot.rb index f44f30b..1213c59 100644 --- a/lib/discourse_chatbot/bots/open_ai_bot.rb +++ b/lib/discourse_chatbot/bots/open_ai_bot.rb @@ -3,7 +3,7 @@ module ::DiscourseChatbot - class OpenAIBot < Bot + class OpenAIBot < OpenAiBotBase def initialize super @@ -13,11 +13,9 @@ def get_response(prompt) system_message = { "role": "system", "content": I18n.t("chatbot.prompt.system.basic") } prompt.unshift(system_message) - model_name = SiteSetting.chatbot_open_ai_model_custom ? SiteSetting.chatbot_open_ai_model_custom_name : SiteSetting.chatbot_open_ai_model - response = @client.chat( parameters: { - model: model_name, + model: @model_name, messages: prompt, max_tokens: SiteSetting.chatbot_max_response_tokens, temperature: SiteSetting.chatbot_request_temperature / 100.0, diff --git a/lib/discourse_chatbot/bots/open_ai_bot_base.rb b/lib/discourse_chatbot/bots/open_ai_bot_base.rb new file mode 100644 index 0000000..018049e --- /dev/null +++ b/lib/discourse_chatbot/bots/open_ai_bot_base.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true +require "openai" + +module ::DiscourseChatbot + + class OpenAiBotBase < Bot + def initialize + if SiteSetting.chatbot_open_ai_model_custom_api_type == "azure" + ::OpenAI.configure do |config| + config.access_token = SiteSetting.chatbot_open_ai_token + config.uri_base = SiteSetting.chatbot_open_ai_model_custom_url + config.api_type = :azure + config.api_version = SiteSetting.chatbot_open_ai_model_custom_api_version + end + else + if !SiteSetting.chatbot_open_ai_model_custom_url.blank? + ::OpenAI.configure do |config| + config.access_token = SiteSetting.chatbot_open_ai_token + config.uri_base = SiteSetting.chatbot_open_ai_model_custom_url + end + @client = ::OpenAI::Client.new + else + @client = ::OpenAI::Client.new(access_token: SiteSetting.chatbot_open_ai_token) + end + end + + @model_name = SiteSetting.chatbot_open_ai_model_custom ? SiteSetting.chatbot_open_ai_model_custom_name : SiteSetting.chatbot_open_ai_model + end + + def get_response(prompt) + raise "Overwrite me!" + end + + end +end diff --git a/plugin.rb b/plugin.rb index 2a903e5..8efb503 100644 --- a/plugin.rb +++ b/plugin.rb @@ -54,6 +54,7 @@ def progress_debug_message(message) ../lib/discourse_chatbot/message/message_evaluation.rb ../lib/discourse_chatbot/post/post_evaluation.rb ../lib/discourse_chatbot/bot.rb + ../lib/discourse_chatbot/bots/open_ai_bot_base.rb ../lib/discourse_chatbot/bots/open_ai_bot.rb ../lib/discourse_chatbot/bots/open_ai_agent.rb ../lib/discourse_chatbot/function.rb From 9af3b1cc8780684ea9fd616bbe03dd2b2092d3ae Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 13:06:54 +0100 Subject: [PATCH 13/28] rubocop --- lib/discourse_chatbot/functions/forum_search_function.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb index 34c505e..30379e2 100644 --- a/lib/discourse_chatbot/functions/forum_search_function.rb +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -32,7 +32,7 @@ def process(args) results = post_embedding.semantic_search(query) top_results = results[0..2] - + response = I18n.t("chatbot.prompt.function.forum_search.answer_summary") top_results.each_with_index do |result, index| From 46872a85861e967d897ee729c8d3f59e770b2630 Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 15:11:05 +0100 Subject: [PATCH 14/28] walk back name change --- lib/discourse_chatbot/bots/open_ai_agent.rb | 2 +- lib/discourse_chatbot/bots/open_ai_bot.rb | 2 +- lib/discourse_chatbot/bots/open_ai_bot_base.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/discourse_chatbot/bots/open_ai_agent.rb b/lib/discourse_chatbot/bots/open_ai_agent.rb index 3dacf4a..0a93987 100644 --- a/lib/discourse_chatbot/bots/open_ai_agent.rb +++ b/lib/discourse_chatbot/bots/open_ai_agent.rb @@ -3,7 +3,7 @@ module ::DiscourseChatbot - class OpenAiAgent < OpenAiBotBase + class OpenAIAgent < OpenAIBotBase def initialize super diff --git a/lib/discourse_chatbot/bots/open_ai_bot.rb b/lib/discourse_chatbot/bots/open_ai_bot.rb index 1213c59..fc75aba 100644 --- a/lib/discourse_chatbot/bots/open_ai_bot.rb +++ b/lib/discourse_chatbot/bots/open_ai_bot.rb @@ -3,7 +3,7 @@ module ::DiscourseChatbot - class OpenAIBot < OpenAiBotBase + class OpenAIBot < OpenAIBotBase def initialize super diff --git a/lib/discourse_chatbot/bots/open_ai_bot_base.rb b/lib/discourse_chatbot/bots/open_ai_bot_base.rb index 018049e..b4560d0 100644 --- a/lib/discourse_chatbot/bots/open_ai_bot_base.rb +++ b/lib/discourse_chatbot/bots/open_ai_bot_base.rb @@ -3,7 +3,7 @@ module ::DiscourseChatbot - class OpenAiBotBase < Bot + class OpenAIBotBase < Bot def initialize if SiteSetting.chatbot_open_ai_model_custom_api_type == "azure" ::OpenAI.configure do |config| From 3470fefc5ea14de8960510b3bca9e4bb8bc56ca9 Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 19:49:04 +0100 Subject: [PATCH 15/28] expand github ci workflow to install pg_embeddings --- .github/workflows/plugin-tests.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/plugin-tests.yml b/.github/workflows/plugin-tests.yml index 622c4c1..2f75ff3 100644 --- a/.github/workflows/plugin-tests.yml +++ b/.github/workflows/plugin-tests.yml @@ -59,6 +59,14 @@ jobs: sudo -E -u postgres script/start_test_db.rb sudo -u postgres psql -c "CREATE ROLE $PGUSER LOGIN SUPERUSER PASSWORD '$PGPASSWORD';" + - name: Install pg_embeddings + run: | + sudo apt apt-get -y install -y postgresql-server-dev-13 + git clone https://github.com/neondatabase/pg_embedding.git + cd pg_embedding + make + make install + - name: Bundler cache uses: actions/cache@v3 with: From f06311eba65bc3edc0ce861153411fad2ff0931a Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 19:51:39 +0100 Subject: [PATCH 16/28] fix workflow command --- .github/workflows/plugin-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/plugin-tests.yml b/.github/workflows/plugin-tests.yml index 2f75ff3..290acfb 100644 --- a/.github/workflows/plugin-tests.yml +++ b/.github/workflows/plugin-tests.yml @@ -61,7 +61,7 @@ jobs: - name: Install pg_embeddings run: | - sudo apt apt-get -y install -y postgresql-server-dev-13 + sudo apt-get -y install -y postgresql-server-dev-13 git clone https://github.com/neondatabase/pg_embedding.git cd pg_embedding make From 79db1b6af04fe50f7a448781bb511a5534608df3 Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 19:53:40 +0100 Subject: [PATCH 17/28] fix yml indenting for workflow --- .github/workflows/plugin-tests.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/plugin-tests.yml b/.github/workflows/plugin-tests.yml index 290acfb..f196711 100644 --- a/.github/workflows/plugin-tests.yml +++ b/.github/workflows/plugin-tests.yml @@ -60,12 +60,12 @@ jobs: sudo -u postgres psql -c "CREATE ROLE $PGUSER LOGIN SUPERUSER PASSWORD '$PGPASSWORD';" - name: Install pg_embeddings - run: | - sudo apt-get -y install -y postgresql-server-dev-13 - git clone https://github.com/neondatabase/pg_embedding.git - cd pg_embedding - make - make install + run: | + sudo apt-get -y install -y postgresql-server-dev-13 + git clone https://github.com/neondatabase/pg_embedding.git + cd pg_embedding + make + make install - name: Bundler cache uses: actions/cache@v3 From 4dd96dea4e281b776cd846934d55cb31e02e0c20 Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 20:00:50 +0100 Subject: [PATCH 18/28] add postgres repo to apt sources for github workflow --- .github/workflows/plugin-tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/plugin-tests.yml b/.github/workflows/plugin-tests.yml index f196711..6c9d2d6 100644 --- a/.github/workflows/plugin-tests.yml +++ b/.github/workflows/plugin-tests.yml @@ -61,6 +61,10 @@ jobs: - name: Install pg_embeddings run: | + sudo apt-get install wget ca-certificates + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - + sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" >> /etc/apt/sources.list.d/pgdg.list' + sudo apt-get update sudo apt-get -y install -y postgresql-server-dev-13 git clone https://github.com/neondatabase/pg_embedding.git cd pg_embedding From 070cb946929668756a73c7cf93a4c8e1b48e42a6 Mon Sep 17 00:00:00 2001 From: merefield Date: Thu, 24 Aug 2023 20:32:23 +0100 Subject: [PATCH 19/28] fix locale key --- lib/discourse_chatbot/bots/open_ai_agent.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/discourse_chatbot/bots/open_ai_agent.rb b/lib/discourse_chatbot/bots/open_ai_agent.rb index 0a93987..2e6c7ba 100644 --- a/lib/discourse_chatbot/bots/open_ai_agent.rb +++ b/lib/discourse_chatbot/bots/open_ai_agent.rb @@ -130,7 +130,7 @@ def final_thought_answer thoughts = I18n.t("chatbot.prompt.agent.final_thought_answer.opener") @internal_thoughts.each do |thought| if thought.key?('function_call') - thoughts += I18n.t("chatbot.prompt.agent.final_thought_answer.function_declaration", function_name: thought['function_call']['name'], arguments: thought['function_call']['arguments']) + thoughts += I18n.t("chatbot.prompt.agent.final_thought_answer.thought_declaration", function_name: thought['function_call']['name'], arguments: thought['function_call']['arguments']) else thoughts += "#{thought['content']}\n\n" end From a2a45e80e496da25ff25dbc27d10907a1f336911 Mon Sep 17 00:00:00 2001 From: merefield Date: Sat, 26 Aug 2023 08:25:34 +0100 Subject: [PATCH 20/28] rename embeddings table to better reflect granularity --- .../regular/chatbot_post_embedding_delete_job.rb | 2 +- app/jobs/regular/chatbot_post_embedding_job.rb | 2 +- app/models/embedding.rb | 4 ++-- ...0230826010101_rename_chatbot_embeddings_table.rb | 13 +++++++++++++ ...0230826010103_rename_chatbot_embeddings_index.rb | 7 +++++++ lib/discourse_chatbot/embedding_process.rb | 2 +- .../functions/forum_search_function.rb | 2 +- lib/tasks/chatbot.rake | 6 +++--- 8 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 db/migrate/20230826010101_rename_chatbot_embeddings_table.rb create mode 100644 db/migrate/20230826010103_rename_chatbot_embeddings_index.rb diff --git a/app/jobs/regular/chatbot_post_embedding_delete_job.rb b/app/jobs/regular/chatbot_post_embedding_delete_job.rb index 681605a..a09c8ef 100644 --- a/app/jobs/regular/chatbot_post_embedding_delete_job.rb +++ b/app/jobs/regular/chatbot_post_embedding_delete_job.rb @@ -10,7 +10,7 @@ def execute(opts) ::DiscourseChatbot.progress_debug_message("101. Deleting a Post Embedding for Post id: #{post_id}") - ::DiscourseChatbot::Embedding.find_by(post_id: post_id).destroy! + ::DiscourseChatbot::PostEmbedding.find_by(post_id: post_id).destroy! rescue => e Rails.logger.error ("OpenAIBot Post Embedding: There was a problem, but will retry til limit: #{e}") end diff --git a/app/jobs/regular/chatbot_post_embedding_job.rb b/app/jobs/regular/chatbot_post_embedding_job.rb index 700f417..226abc5 100644 --- a/app/jobs/regular/chatbot_post_embedding_job.rb +++ b/app/jobs/regular/chatbot_post_embedding_job.rb @@ -10,7 +10,7 @@ def execute(opts) ::DiscourseChatbot.progress_debug_message("100. Creating/updating a Post Embedding for Post id: #{post_id}") - post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new post_embedding.upsert_embedding(post_id) rescue => e diff --git a/app/models/embedding.rb b/app/models/embedding.rb index 81269be..a2ef3b4 100644 --- a/app/models/embedding.rb +++ b/app/models/embedding.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -class ::DiscourseChatbot::Embedding < ActiveRecord::Base - self.table_name = 'chatbot_embeddings' +class ::DiscourseChatbot::PostEmbedding < ActiveRecord::Base + self.table_name = 'chatbot_post_embeddings' validates :post_id, presence: true, uniqueness: true end diff --git a/db/migrate/20230826010101_rename_chatbot_embeddings_table.rb b/db/migrate/20230826010101_rename_chatbot_embeddings_table.rb new file mode 100644 index 0000000..983900a --- /dev/null +++ b/db/migrate/20230826010101_rename_chatbot_embeddings_table.rb @@ -0,0 +1,13 @@ + +# frozen_string_literal: true + +class RenameChatbotEmbeddingsTable < ActiveRecord::Migration[7.0] + def change + begin + Migration::SafeMigrate.disable! + rename_table :chatbot_embeddings, :chatbot_post_embeddings + ensure + Migration::SafeMigrate.enable! + end + end +end diff --git a/db/migrate/20230826010103_rename_chatbot_embeddings_index.rb b/db/migrate/20230826010103_rename_chatbot_embeddings_index.rb new file mode 100644 index 0000000..0e3763f --- /dev/null +++ b/db/migrate/20230826010103_rename_chatbot_embeddings_index.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class RenameChatbotEmbeddingsIndex < ActiveRecord::Migration[7.0] + def change + rename_index :chatbot_post_embeddings, 'hnsw_index_on_chatbot_embeddings', 'hnsw_index_on_chatbot_post_embeddings' + end +end diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/embedding_process.rb index 79a27dd..7fcf1b2 100644 --- a/lib/discourse_chatbot/embedding_process.rb +++ b/lib/discourse_chatbot/embedding_process.rb @@ -43,7 +43,7 @@ def upsert_embedding(post_id) embedding_vector = response.dig("data", 0, "embedding") - ::DiscourseChatbot::Embedding.upsert({ post_id: post_id, embedding: embedding_vector }, on_duplicate: :update, unique_by: :post_id) + ::DiscourseChatbot::PostEmbedding.upsert({ post_id: post_id, embedding: embedding_vector }, on_duplicate: :update, unique_by: :post_id) end end diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb index 30379e2..8bf2e3d 100644 --- a/lib/discourse_chatbot/functions/forum_search_function.rb +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -28,7 +28,7 @@ def process(args) super(args) query = args[parameters[0][:name]] - post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new results = post_embedding.semantic_search(query) top_results = results[0..2] diff --git a/lib/tasks/chatbot.rake b/lib/tasks/chatbot.rake index f2c812e..057c2fd 100644 --- a/lib/tasks/chatbot.rake +++ b/lib/tasks/chatbot.rake @@ -28,7 +28,7 @@ task "chatbot:refresh_embeddings_match", %i[pattern type delay] => [:environment total = search.count search.find_each do |post| - post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new post_embedding.upsert_embedding(post.id) print_status(refreshed += 1, total) sleep(delay) if delay @@ -69,8 +69,8 @@ def refresh_embeddings(args) .offset(i) .limit(batch) .each do |post| - if !missing_only.to_i.zero? && ::DiscourseChatbot::Embedding.find_by(post_id: post.id).nil? || missing_only.to_i.zero? - post_embedding = ::DiscourseChatbot::EmbeddingProcess.new + if !missing_only.to_i.zero? && ::DiscourseChatbot::PostEmbedding.find_by(post_id: post.id).nil? || missing_only.to_i.zero? + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new post_embedding.upsert_embedding(post.id) sleep(delay) if delay end From d0e66ad1add2a0ee6e170280ffad6240dc3ff1eb Mon Sep 17 00:00:00 2001 From: merefield Date: Sat, 26 Aug 2023 08:46:23 +0100 Subject: [PATCH 21/28] move constants to plugin.rb --- lib/discourse_chatbot/embedding_process.rb | 11 ++++------- plugin.rb | 2 ++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/embedding_process.rb index 7fcf1b2..29b1fb4 100644 --- a/lib/discourse_chatbot/embedding_process.rb +++ b/lib/discourse_chatbot/embedding_process.rb @@ -3,9 +3,6 @@ module ::DiscourseChatbot - EMBEDDING_MODEL = "text-embedding-ada-002".freeze - CHAR_LIMIT = 32000 - class EmbeddingProcess def initialize @@ -36,8 +33,8 @@ def upsert_embedding(post_id) if benchmark_user_guardian.can_see?(post) response = @client.embeddings( parameters: { - model: EMBEDDING_MODEL, - input: post.raw[0..CHAR_LIMIT] + model: ::DiscourseChatbot::EMBEDDING_MODEL, + input: post.raw[0..::DiscourseChatbot::EMBEDDING_CHAR_LIMIT] } ) @@ -50,8 +47,8 @@ def upsert_embedding(post_id) def semantic_search(query) response = @client.embeddings( parameters: { - model: EMBEDDING_MODEL, - input: query[0..CHAR_LIMIT] + model: ::DiscourseChatbot::EMBEDDING_MODEL, + input: query[0..::DiscourseChatbot::EMBEDDING_CHAR_LIMIT] } ) diff --git a/plugin.rb b/plugin.rb index 8efb503..c52a959 100644 --- a/plugin.rb +++ b/plugin.rb @@ -23,6 +23,8 @@ module ::DiscourseChatbot CHATBOT_QUERIES_CUSTOM_FIELD = "chatbot_queries" POST_TYPES_REGULAR_ONLY = [1] POST_TYPES_INC_WHISPERS = [1, 4] + EMBEDDING_MODEL = "text-embedding-ada-002".freeze + EMBEDDING_CHAR_LIMIT = 32000 def progress_debug_message(message) if SiteSetting.chatbot_enable_verbose_console_response_progress_logging From 72ed6b981e0a29dc2f44c8f2265371645c305143 Mon Sep 17 00:00:00 2001 From: merefield Date: Sat, 26 Aug 2023 08:52:51 +0100 Subject: [PATCH 22/28] remove explicit repo source --- .github/workflows/plugin-tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/plugin-tests.yml b/.github/workflows/plugin-tests.yml index 6c9d2d6..fba9852 100644 --- a/.github/workflows/plugin-tests.yml +++ b/.github/workflows/plugin-tests.yml @@ -61,9 +61,6 @@ jobs: - name: Install pg_embeddings run: | - sudo apt-get install wget ca-certificates - wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - - sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" >> /etc/apt/sources.list.d/pgdg.list' sudo apt-get update sudo apt-get -y install -y postgresql-server-dev-13 git clone https://github.com/neondatabase/pg_embedding.git From 55d8306197e88fade57ad02407b501170ab4927c Mon Sep 17 00:00:00 2001 From: merefield Date: Sat, 26 Aug 2023 22:07:46 +0100 Subject: [PATCH 23/28] rename post embedding process --- .../{embedding_process.rb => post_embedding_process.rb} | 2 +- plugin.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename lib/discourse_chatbot/{embedding_process.rb => post_embedding_process.rb} (98%) diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/post_embedding_process.rb similarity index 98% rename from lib/discourse_chatbot/embedding_process.rb rename to lib/discourse_chatbot/post_embedding_process.rb index 29b1fb4..b70d568 100644 --- a/lib/discourse_chatbot/embedding_process.rb +++ b/lib/discourse_chatbot/post_embedding_process.rb @@ -3,7 +3,7 @@ module ::DiscourseChatbot - class EmbeddingProcess + class PostEmbeddingProcess def initialize if SiteSetting.chatbot_azure_open_ai_model_url.include?("azure") diff --git a/plugin.rb b/plugin.rb index c52a959..b7646e8 100644 --- a/plugin.rb +++ b/plugin.rb @@ -51,7 +51,7 @@ def progress_debug_message(message) %w( ../lib/discourse_chatbot/event_evaluation.rb ../app/models/embedding.rb - ../lib/discourse_chatbot/embedding_process.rb + ../lib/discourse_chatbot/post_embedding_process.rb ../app/jobs/regular/chatbot_post_embedding_job.rb ../lib/discourse_chatbot/message/message_evaluation.rb ../lib/discourse_chatbot/post/post_evaluation.rb From a210a2dd421dd1f6808e1e2db11cf69185db57d9 Mon Sep 17 00:00:00 2001 From: merefield Date: Sat, 26 Aug 2023 22:46:00 +0100 Subject: [PATCH 24/28] streamline client setup code --- .../bots/open_ai_bot_base.rb | 22 ++++++++---------- .../post_embedding_process.rb | 23 +++++++++++-------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/lib/discourse_chatbot/bots/open_ai_bot_base.rb b/lib/discourse_chatbot/bots/open_ai_bot_base.rb index b4560d0..f329be0 100644 --- a/lib/discourse_chatbot/bots/open_ai_bot_base.rb +++ b/lib/discourse_chatbot/bots/open_ai_bot_base.rb @@ -5,25 +5,21 @@ module ::DiscourseChatbot class OpenAIBotBase < Bot def initialize - if SiteSetting.chatbot_open_ai_model_custom_api_type == "azure" + ::OpenAI.configure do |config| + config.access_token = SiteSetting.chatbot_open_ai_token + end + if !SiteSetting.chatbot_open_ai_model_custom_url.blank? ::OpenAI.configure do |config| - config.access_token = SiteSetting.chatbot_open_ai_token config.uri_base = SiteSetting.chatbot_open_ai_model_custom_url + end + end + if SiteSetting.chatbot_open_ai_model_custom_api_type == "azure" + ::OpenAI.configure do |config| config.api_type = :azure config.api_version = SiteSetting.chatbot_open_ai_model_custom_api_version end - else - if !SiteSetting.chatbot_open_ai_model_custom_url.blank? - ::OpenAI.configure do |config| - config.access_token = SiteSetting.chatbot_open_ai_token - config.uri_base = SiteSetting.chatbot_open_ai_model_custom_url - end - @client = ::OpenAI::Client.new - else - @client = ::OpenAI::Client.new(access_token: SiteSetting.chatbot_open_ai_token) - end end - + @client = ::OpenAI::Client.new @model_name = SiteSetting.chatbot_open_ai_model_custom ? SiteSetting.chatbot_open_ai_model_custom_name : SiteSetting.chatbot_open_ai_model end diff --git a/lib/discourse_chatbot/post_embedding_process.rb b/lib/discourse_chatbot/post_embedding_process.rb index b70d568..89f054c 100644 --- a/lib/discourse_chatbot/post_embedding_process.rb +++ b/lib/discourse_chatbot/post_embedding_process.rb @@ -6,17 +6,22 @@ module ::DiscourseChatbot class PostEmbeddingProcess def initialize - if SiteSetting.chatbot_azure_open_ai_model_url.include?("azure") + ::OpenAI.configure do |config| + config.access_token = SiteSetting.chatbot_open_ai_token + end + if !SiteSetting.chatbot_open_ai_model_custom_url.blank? + ::OpenAI.configure do |config| + config.uri_base = SiteSetting.chatbot_open_ai_model_custom_url + end + end + if SiteSetting.chatbot_open_ai_model_custom_api_type == "azure" ::OpenAI.configure do |config| - config.access_token = SiteSetting.chatbot_azure_open_ai_token - config.uri_base = SiteSetting.chatbot_azure_open_ai_model_url config.api_type = :azure - config.api_version = "2023-05-15" + config.api_version = SiteSetting.chatbot_open_ai_model_custom_api_version end - @client = ::OpenAI::Client.new - else - @client = ::OpenAI::Client.new(access_token: SiteSetting.chatbot_open_ai_token) end + @model_name = ::DiscourseChatbot::EMBEDDING_MODEL + @client = ::OpenAI::Client.new end def upsert_embedding(post_id) @@ -33,7 +38,7 @@ def upsert_embedding(post_id) if benchmark_user_guardian.can_see?(post) response = @client.embeddings( parameters: { - model: ::DiscourseChatbot::EMBEDDING_MODEL, + model: @model_name, input: post.raw[0..::DiscourseChatbot::EMBEDDING_CHAR_LIMIT] } ) @@ -47,7 +52,7 @@ def upsert_embedding(post_id) def semantic_search(query) response = @client.embeddings( parameters: { - model: ::DiscourseChatbot::EMBEDDING_MODEL, + model: @model_name, input: query[0..::DiscourseChatbot::EMBEDDING_CHAR_LIMIT] } ) From 5bdc646ca3178f228c6c87de949e3010409d8642 Mon Sep 17 00:00:00 2001 From: merefield Date: Sun, 27 Aug 2023 18:39:57 +0100 Subject: [PATCH 25/28] fix workflow by adding correct pg_config path for current ver --- .github/workflows/plugin-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/plugin-tests.yml b/.github/workflows/plugin-tests.yml index fba9852..2245e01 100644 --- a/.github/workflows/plugin-tests.yml +++ b/.github/workflows/plugin-tests.yml @@ -65,8 +65,8 @@ jobs: sudo apt-get -y install -y postgresql-server-dev-13 git clone https://github.com/neondatabase/pg_embedding.git cd pg_embedding - make - make install + make PG_CONFIG=/usr/lib/postgresql/13/bin/pg_config + make PG_CONFIG=/usr/lib/postgresql/13/bin/pg_config install - name: Bundler cache uses: actions/cache@v3 From 8b6b40449e3ae157944d609dd14181f182c2ad5e Mon Sep 17 00:00:00 2001 From: merefield Date: Sun, 27 Aug 2023 21:12:29 +0100 Subject: [PATCH 26/28] fix search query for new table name --- lib/discourse_chatbot/post_embedding_process.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/discourse_chatbot/post_embedding_process.rb b/lib/discourse_chatbot/post_embedding_process.rb index 89f054c..6aa8cac 100644 --- a/lib/discourse_chatbot/post_embedding_process.rb +++ b/lib/discourse_chatbot/post_embedding_process.rb @@ -65,7 +65,7 @@ def semantic_search(query) SELECT post_id FROM - chatbot_embeddings + chatbot_post_embeddings ORDER BY embedding::real[] <-> array[:query_embedding] LIMIT :limit From 76a6e913e09d9f822ef4ff19b3420c0d349c6f3e Mon Sep 17 00:00:00 2001 From: merefield Date: Mon, 28 Aug 2023 12:44:38 +0100 Subject: [PATCH 27/28] Add long prompt capable function calling models --- config/settings.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/config/settings.yml b/config/settings.yml index 6c38e55..a9d5f1e 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -66,11 +66,13 @@ plugins: default: gpt-3.5-turbo choices: - gpt-3.5-turbo - - gpt-3.5-turbo-16k - gpt-3.5-turbo-0613 + - gpt-3.5-turbo-16k + - gpt-3.5-turbo-16k-0613 - gpt-4 - - gpt-4-32k - gpt-4-0613 + - gpt-4-32k + - gpt-4-32k-0613 chatbot_reply_job_time_delay: client: false default: 3 From 281ded4314b4f6dab5a66473f7bef69420bc1fed Mon Sep 17 00:00:00 2001 From: merefield Date: Mon, 28 Aug 2023 12:46:12 +0100 Subject: [PATCH 28/28] Add optional post count parameter to forum search queries --- config/locales/server.en.yml | 7 ++++--- lib/discourse_chatbot/functions/forum_search_function.rb | 9 ++++++--- lib/discourse_chatbot/post_embedding_process.rb | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 7ac6cbc..168dcd7 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -74,12 +74,13 @@ en: Search the local forum for information that may help you answer the question. Especially useful when the forum specialises in the subject matter of the query. Searching the local forum is preferable to searching google or the internet and should be considered higher priority. It is quicker and cheaper. - Input should be a search query. + Input should be a search query. You can optionally also specify the number of posts you wish returned from your query. - Outputs text from the Post and a url link to it you can provide the user. + Outputs text from the Post and a url link to it you can provide the user. When presenting the url in your reply, do not embed in an anchor, just write the straight link. parameters: query: "search query for looking up information on the forum" - answer_summary: "The top three posts on the forum related to this query are, best match first:\n\n" + number_of_posts: "specify the number of posts you want returned from your query" + answer_summary: "The top %{number_of_posts} posts on the forum related to this query are, best match first:\n\n" answer: "Number %{rank}: the post is at this web address: %{url}, it was written by '%{username}' on %{date} and the text is '%{raw}'.\n\n" error: "'%{query}': my search for this on the forum failed." google_search: diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb index 8bf2e3d..ab89e52 100644 --- a/lib/discourse_chatbot/functions/forum_search_function.rb +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -16,6 +16,7 @@ def description def parameters [ { name: "query", type: String, description: I18n.t("chatbot.prompt.function.forum_search.parameters.query") } , + { name: "number_of_posts", type: Integer, description: I18n.t("chatbot.prompt.function.stock_data.parameters.number_of_posts") } ] end @@ -27,17 +28,19 @@ def process(args) begin super(args) query = args[parameters[0][:name]] + number_of_posts = args[parameters[1][:name]].blank? ? 3 : args[parameters[1][:name]] + number_of_posts = number_of_posts > 10 ? 10 : number_of_posts post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new results = post_embedding.semantic_search(query) - top_results = results[0..2] + top_results = results[0..(number_of_posts - 1)] - response = I18n.t("chatbot.prompt.function.forum_search.answer_summary") + response = I18n.t("chatbot.prompt.function.forum_search.answer_summary", number_of_posts: number_of_posts) top_results.each_with_index do |result, index| current_post = ::Post.find(result.to_i) - url = "#{Discourse.current_hostname}/t/slug/#{current_post.topic_id}/#{current_post.post_number}" + url = "https://#{Discourse.current_hostname}/t/slug/#{current_post.topic_id}/#{current_post.post_number}" raw = current_post.raw username = User.find(current_post.user_id).username date = current_post.created_at.to_date diff --git a/lib/discourse_chatbot/post_embedding_process.rb b/lib/discourse_chatbot/post_embedding_process.rb index 6aa8cac..abbf05c 100644 --- a/lib/discourse_chatbot/post_embedding_process.rb +++ b/lib/discourse_chatbot/post_embedding_process.rb @@ -61,7 +61,7 @@ def semantic_search(query) begin search_result_post_ids = - DB.query(<<~SQL, query_embedding: query_vector, limit: 8).map( + DB.query(<<~SQL, query_embedding: query_vector, limit: 10).map( SELECT post_id FROM