From 3f95f6b24163531c0336a6c38ba971fde9762860 Mon Sep 17 00:00:00 2001 From: Kyo Lee Date: Fri, 1 Sep 2023 19:30:00 +0000 Subject: [PATCH 01/11] [demos][python] Add a new Python PaLM demo: Docs Agent Doc Agents allows PaLM API users to launch a chat application using their own documents as a dataset. --- demos/palm/python/docs-agent/CONTRIBUTING.md | 28 + demos/palm/python/docs-agent/LICENSE | 202 + demos/palm/python/docs-agent/README.md | 694 +++ .../python/docs-agent/chatbot/__init__.py | 21 + .../palm/python/docs-agent/chatbot/chatui.py | 286 + .../palm/python/docs-agent/chatbot/launch.sh | 38 + .../docs-agent/chatbot/static/css/chatbox.css | 1 + .../docs-agent/chatbot/static/css/style.css | 324 ++ .../chatbot/static/images/favicon.png | Bin 0 -> 608 bytes .../chatbot/static/javascript/app.js | 146 + .../chatbot/templates/chatui/base.html | 25 + .../chatbot/templates/chatui/index.html | 18 + .../chatbot/templates/chatui/result.html | 58 + demos/palm/python/docs-agent/chroma.py | 200 + demos/palm/python/docs-agent/condition.txt | 2 + demos/palm/python/docs-agent/config.yaml | 49 + .../images/docs-agent-architecture-01.png | Bin 0 -> 137642 bytes .../images/docs-agent-architecture-02.png | Bin 0 -> 134403 bytes .../docs/images/docs-agent-embeddings-01.png | Bin 0 -> 139420 bytes .../docs/images/docs-agent-embeddings-02.png | Bin 0 -> 412176 bytes .../images/docs-agent-prompt-structure-01.png | Bin 0 -> 266254 bytes .../images/docs-agent-ui-screenshot-01.png | Bin 0 -> 408671 bytes demos/palm/python/docs-agent/docs_agent.py | 151 + demos/palm/python/docs-agent/hello_world.py | 60 + demos/palm/python/docs-agent/palm.py | 153 + demos/palm/python/docs-agent/poetry.lock | 4605 +++++++++++++++++ demos/palm/python/docs-agent/pylintrc | 429 ++ demos/palm/python/docs-agent/pyproject.toml | 42 + demos/palm/python/docs-agent/run_console.py | 72 + .../scripts/markdown_to_plain_text.py | 416 ++ .../scripts/populate_vector_database.py | 302 ++ .../python/docs-agent/scripts/read_config.py | 96 + .../scripts/test_vector_database.py | 127 + demos/palm/python/docs-agent/setup.py | 24 + .../docs-agent/third_party/css/chatbox.css | 42 + .../python/docs-agent/vector_stores/.gitkeep | 0 36 files changed, 8611 insertions(+) create mode 100644 demos/palm/python/docs-agent/CONTRIBUTING.md create mode 100644 demos/palm/python/docs-agent/LICENSE create mode 100644 demos/palm/python/docs-agent/README.md create mode 100644 demos/palm/python/docs-agent/chatbot/__init__.py create mode 100644 demos/palm/python/docs-agent/chatbot/chatui.py create mode 100755 demos/palm/python/docs-agent/chatbot/launch.sh create mode 120000 demos/palm/python/docs-agent/chatbot/static/css/chatbox.css create mode 100644 demos/palm/python/docs-agent/chatbot/static/css/style.css create mode 100644 demos/palm/python/docs-agent/chatbot/static/images/favicon.png create mode 100644 demos/palm/python/docs-agent/chatbot/static/javascript/app.js create mode 100644 demos/palm/python/docs-agent/chatbot/templates/chatui/base.html create mode 100644 demos/palm/python/docs-agent/chatbot/templates/chatui/index.html create mode 100644 demos/palm/python/docs-agent/chatbot/templates/chatui/result.html create mode 100644 demos/palm/python/docs-agent/chroma.py create mode 100644 demos/palm/python/docs-agent/condition.txt create mode 100644 demos/palm/python/docs-agent/config.yaml create mode 100644 demos/palm/python/docs-agent/docs/images/docs-agent-architecture-01.png create mode 100644 demos/palm/python/docs-agent/docs/images/docs-agent-architecture-02.png create mode 100644 demos/palm/python/docs-agent/docs/images/docs-agent-embeddings-01.png create mode 100644 demos/palm/python/docs-agent/docs/images/docs-agent-embeddings-02.png create mode 100644 demos/palm/python/docs-agent/docs/images/docs-agent-prompt-structure-01.png create mode 100644 demos/palm/python/docs-agent/docs/images/docs-agent-ui-screenshot-01.png create mode 100644 demos/palm/python/docs-agent/docs_agent.py create mode 100644 demos/palm/python/docs-agent/hello_world.py create mode 100644 demos/palm/python/docs-agent/palm.py create mode 100644 demos/palm/python/docs-agent/poetry.lock create mode 100644 demos/palm/python/docs-agent/pylintrc create mode 100644 demos/palm/python/docs-agent/pyproject.toml create mode 100644 demos/palm/python/docs-agent/run_console.py create mode 100644 demos/palm/python/docs-agent/scripts/markdown_to_plain_text.py create mode 100644 demos/palm/python/docs-agent/scripts/populate_vector_database.py create mode 100644 demos/palm/python/docs-agent/scripts/read_config.py create mode 100644 demos/palm/python/docs-agent/scripts/test_vector_database.py create mode 100644 demos/palm/python/docs-agent/setup.py create mode 100644 demos/palm/python/docs-agent/third_party/css/chatbox.css create mode 100644 demos/palm/python/docs-agent/vector_stores/.gitkeep diff --git a/demos/palm/python/docs-agent/CONTRIBUTING.md b/demos/palm/python/docs-agent/CONTRIBUTING.md new file mode 100644 index 000000000..294f64ae1 --- /dev/null +++ b/demos/palm/python/docs-agent/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# Contributing to Docs Agent + +Docs Agent lets anyone contribute to the project, regardless of their employer. +The Docs Agent project reviews and encourages well-tested, high-quality +contributions from anyone who wants to contribute to Docs Agent. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement (CLA). + +To see any Contributor License Agreements on file or to sign a CLA, +go to . + +For more information about the Google CLA, +see [Contributor License Agreements](https://cla.developers.google.com/about). + +## Contributing changes and submitting code reviews + +All changes require review, including changes by project members. + +For detailed instructions on how to contribute changes, +see README.md. + +## Community guidelines + +This project observes Google's +[Open Source Community Guidelines](https://opensource.google/conduct/). diff --git a/demos/palm/python/docs-agent/LICENSE b/demos/palm/python/docs-agent/LICENSE new file mode 100644 index 000000000..7a4a3ea24 --- /dev/null +++ b/demos/palm/python/docs-agent/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/demos/palm/python/docs-agent/README.md b/demos/palm/python/docs-agent/README.md new file mode 100644 index 000000000..3550b45bf --- /dev/null +++ b/demos/palm/python/docs-agent/README.md @@ -0,0 +1,694 @@ +# Docs Agent + +The Docs Agent project is for [PaLM API][genai-doc-site] users who want to check out and set +up the Docs Agent sample app and [contribute][contribute-to-docs-agent] to the Docs project. + +**Note**: If you're interested in setting up and launching the Docs Agent sample app on your +host machine, see the [Set up Docs Agent][set-up-docs-agent] section below. + +## Overview + +The Docs Agent sample app is being developed to demonstrate an AI-powered chatbot application +(including a backend server and web UI) that can answer questions specific to any product, +service, or topic that has a great quantity of information available as documentation (which +can be from various sources such as Markdown, HTML, Google Docs, chat conversations, etc.). + +The main goal of the Docs Agent project is: + +- You can supply your own set of documents to enable a PaLM 2 model to synthesize useful, + relevant, and accurate responses that are grounded on the documented information. + +The Docs Agent sample app is designed to be easily set up and configured in a Linux environment +and is required that you have access to Google’s [PaLM API][genai-doc-site]. + +Keep in mind that this approach is not to “fine-tune” an LLM (large language model) +itself, but the Docs Agent sample app uses a mixture of prompt engineering and +embeddings techniques on top of a publicly available LLM model such as PaLM 2. + +![Docs Agent architecture](docs/images/docs-agent-architecture-01.png) + +**Figure 1**. Docs Agent uses a vector database to retrieve context for augmenting prompts. + +However, once you launch this chatbot using your own set of documents, you may soon realize +that your existing documents alone are not sufficient for this chatbot to be effective in +production. Besides continuing to enhance the quality of your documents, you may want to +consider setting up a type of reinforcement learning process for improving the system and +refining your dataset using real-world feedback from users. + +## Main features + +The key features of the Docs Agent sample app are: + +- Add context to user questions to augment their prompts to an LLM. +- Process documents into embeddings and store them in a vector database for context retrieval. + +![Docs Agent flow](docs/images/docs-agent-architecture-02.png) + +**Figure 2**. A user question is augmented by the Docs Agent server and passed to an LLM. + +**Note**: For the moment, the Docs Agent project focuses on providing Python scripts that make it +easy to process Markdown files into embeddings. However, there is no hard requirement that the +source documents must exist in Markdown format. What’s important is that the processed content +is available as embeddings in the vector database. + +### Structure of a prompt to a PaLM 2 model + +To enable an LLM to answer questions that are not part of the public knowledge (which the LLM +is likely trained on), the Docs Agent project applies a mixture of prompt engineering and +embeddings techniques. That is, we process a set of documents (which contain domain specific +knowledge) into embeddings and store them in a vector database. This vector database allows +the Docs Agent server to perform semantic search on stored embeddings to find the most relevant +content from the source documents given user questions. + +Once the most relevant content is returned, the Docs Agent server uses the prompt structure +shown in Figure 3 to augment the user question with a preset **condition** and a list of +**context**. (When the Docs Agent server starts, the condition value is read from the +[`condition.txt`][condition-txt] file.) Then the Docs Agent server sends this prompt to a +PaLM 2 model using the PaLM API and receives a response generated by the model. + +![Docs Agent prompt strcture](docs/images/docs-agent-prompt-structure-01.png) + +**Figure 3**. Prompt structure for augmenting a user question with related context +(Context source: [eventhorizontelescope.org][context-source-01]) + +### Processing of Markdown files into embeddings + +To process information into embeddings using the Python scripts in the project, the +information needs to be stored in Markdown format. Once you have a set of Markdown files +stored in a directory on your host machine, you can run the +[`markdown_to_plain_text.py`][markdown-to-plain-text] script to process those Markdown +files into small plain text files – the script splits the content by the top three Markdown +headers (`#`, `##`, and `###`). + +Once Markdown files are processed into small plain text files, you can run the +[`populate_vector_database.py`][populate-vector-database] script to generate embeddings +for each text file and store those embeddings into a [Chroma][chroma-docs] vector database +running on the host machine. + +The embeddings in this vector database enable the Docs Agent server to perform semantic search +and retrieve context related to user questions for augmenting prompts. + +![Document to embeddings](docs/images/docs-agent-embeddings-01.png) + +**Figure 4**. A document is split into small semantic chunks, which are then used to generate +embeddings. + +![Markdown to embeddings](docs/images/docs-agent-embeddings-02.png) + +**Figure 5**. A Markdown page is split by headers and processed into embeddings. + +## Summary of tasks and features + +The following list summarizes the tasks and features of the Docs Agent sample app: + +- **Process Markdown**: Split Markdown files into small plain text files. (See the + [`markdown_to_plain_text.py`][markdown-to-plain-text] script.) +- **Generate embeddings**: Use small plain text files to generate embeddings, processed by + an embedding model (`embedding-gecko-001`), and store them in a local Chroma vector + database. (See the [`populate_vector_database.py`][populate-vector-database] script.) +- **Semantic search using embeddings**: Compare embeddings in the vector database for most + relevant content given user questions (which are also processed into embeddings using + the same `embedding-gecko-001` model). +- **Add context to a user question in a prompt**: Add the list of content returned from + the semantic search as context to the user question and send the prompt to a PaLM 2 + model using the PaLM API. +- **(Experimental) “Fact-check” responses**: This experimental feature composes a + follow-up prompt and asks the PaLM 2 model to “fact-check” its own previous response. + (See the [Using a PaLM 2 model to fact-check its own response][fact-check-section] section.) +- **Generate 5 related questions**: In addition to displaying a response to the user + question, the web UI displays five questions generated by the PaLM 2 model based on + the context of the user question. (See the + [Using a PaLM 2 model to suggest related questions][related-questions-section] section.) +- **Display URLs of knowledge sources**: The vector database stores URLs as metadata for + embeddings. Whenever the vector database is used to retrieve context (for instance, to + provide context to user questions), the database can also return the URLs of the sources + that were originally used to generate the embeddings. +- **Submit rewrites and likes**: The web UI includes the buttons at the bottom of the + display that allow users to like generated responses or submit rewrites of + the responses. (See the + [Enabling users to submit a rewrite of a generated response][submit-a-rewrite] and + [Enabling users to like generated responses][like-generate-responses] sections.) + +## Flow of events + +The following events take place in the Docs Agent sample app: + +1. The [`markdown_to_plain_text.py`][markdown-to-plain-text] script converts input + Markdown documents into small plain text files, split by Markdown headings + (`#`, `##`, and `###`). +2. The [`populate_vector_database.py`][populate-vector-database] script generates + embeddings from the small plain text files and populates a vector database. +3. When the [`chatbot/launch.sh`][launch-script] script is run, it starts the + Docs Agent server and vector database, which loads generated embeddings and + metadata (URLs and filenames) stored in the `vector_store` directory. +4. When the user asks a question, the Docs Agent server uses the vector database to + perform semantic search on embeddings, which represent content in the source + documents. +5. Using this semantic search capability, the Docs Agent server finds a list of + text chunks that are most relevant to the user question. +6. The Docs Agent server adds this list of text chunks as context (plus a condition + for responses) to the user question and constructs them into a prompt. +7. The system sends the prompt to a PaLM 2 model via the PaLM API. +8. The PaLM 2 model generates a response and the Docs Agent server renders it on + the chat UI. + +Additional events for [“fact-checking” a generated response][fact-check-section]: + +9. The Docs Agent server prepares another prompt that compares the generated response + (in step 8) to the context (in step 6) and asks the PaLM model to look for + a discrepancy in the response. +10. The PaLM model generates a response that points out one major discrepancy + (if it exists) between its previous response and the context. +11. The Docs Agent server renders this response on the chat UI as a call-out note. +12. The Docs Agent server passes this second response to the vector database to + perform semantic search. +13. The vector database returns a list of relevant content (that is closely related + to the second response). +14. The Docs Agent server renders the top URL of this list on the chat UI and + suggests that the user checks out this URL for fact-checking. + +Additional events for +[suggesting 5 questions related to the user question][related-questions-section]: + +15. The Docs Agent server prepares another prompt that asks the PaLM model to + generate 5 questions based on the context (in step 6). +16. The PaLM model generates a response that contains a list of questions related + to the context. +17. The Docs Agent server renders the questions on the chat UI. + +## Supplementary features + +This section describes additional features implemented on the Docs Agent sample app for +enhancing the usability of the Q&A experience powered by generative AI. + +![Docs Agent UI](docs/images/docs-agent-ui-screenshot-01.png) + +**Figure 6**. A screenshot of the Docs Agent chat UI showing the sections generated by +three distinct prompts. + +### Using a PaLM 2 model to fact-check its own response + +In addition to using the prompt structure above (shown in Figure 3), we‘re currently +experimenting with the following prompt setup for “fact-checking” responses generated +by the PaLM model: + +- Condition: + + ``` + You are a helpful chatbot answering questions from users. Read the following context + first and answer the question at the end: + ``` + +- Context: + + ``` + + ``` + +- Additional condition (for fact-checking): + + ``` + Compare the following body of text to the context provided in this prompt and write + a short message that warns the readers about which part of the text below they + should consider fact-checking for themselves? (please keep your response concise and + mention only one important point): + ``` + +- Previously generated response + + ``` + + ``` + +This "fact-checking" prompt returns a response similar to the following example: + +``` +The text states that Flutter chose to use Dart because it is a fast, productive, object-oriented +language that is well-suited for building user interfaces. However, the context provided in the +prompt states that Flutter chose Dart because it is a fast, productive language that is well-suited +for Flutter's problem domain: creating visual user experiences. Therefore, readers should consider +fact-checking the claim that Dart is well-suited for building user interfaces. +``` + +After the second response, notice that the Docs Agent chat UI also suggests a URL to visit for +fact-checking (see Figure 6), which looks similar to the following example: + +``` +To verify this information, please check out: + +https://docs.flutter.dev/resources/faq +``` + +To identify this URL, the Docs Agent server takes the second response (which is the paragraph that +begins with “The text states that ...” in the example above) and uses it to query the vector +database. Once the vector database returns a list of the most relevant content to this response, +the UI only displays the top URL to the user. + +Keep in mind that this "fact-checking" prompt setup is currently considered **experimental** +because we‘ve seen cases where a PaLM model would end up adding incorrect information into its +second response as well. However, we saw that adding this second response (which brings attention +to the PaLM model’s possible hallucinations) seems to improve the usability of the system since it +serves as a reminder to the users that the PaLM model‘s response is far from being perfect, which +helps encourage the users to take more steps to validate generated responses for themselves. + +### Using a PaLM 2 model to suggest related questions + +The project‘s latest web UI includes the “Related questions” section, which displays five +questions that are related to the user question (see Figure 6). These five questions are also +generated by a PaLM model (via the PaLM API). Using the list of contents returned from the vector +database as context, the system prepares another prompt asking the PaLM model to generate five +questions from the included context. + +The following is the exact structure of this prompt: + +- Condition: + + ``` + You are a helpful chatbot answering questions from users. Read the following context first + and answer the question at the end: + ``` + +- Context: + + ``` + + ``` + +- Question: + + ``` + What are 5 questions developers might ask after reading the context? + ``` + +### Enabling users to submit a rewrite of a generated response + +The project‘s latest web UI includes the **Rewrite this response** button at the bottom of +the panel (see Figure 6). When this button is clicked, a widget opens up, expanding the +main UI panel, and reveals a textarea containing the generated response to the user's question. +The user is then allowed to edit this response in the textarea and click the **Submit** button +to submit the updated response to the system. + +The system stores the submitted response as a Markdown file in the project's local `rewrites` +directory. The user may re-click the **Submit** button to update the submitted rewrite multiple +times. + +### Enabling users to like generated responses + +The project's latest web UI includes the **Like this response** button at the bottom of the panel +(see Figure 6). When this button is clicked, the server logs the event of "like" for the response. +However, clicking the **Liked** button again will reset the button. Then the server logs this reset +event of "like" for the response. + +The user may click this like button multiple times to toggle the state of the like button. But when +examining the logs, only the final state of the like button will be considered for the response. + +## Issues identified + +The following issues have been identified and need to be worked on: + +- **Logical content chunking**: When splitting documents, content is divided into chunks only by the + current 1500-character limit. This approach splits large docs into small chunks, which results in + losing context, especially in large how-to guides with a long sequence of instructions. **[Done]** +- **Clean plain text for embeddings**: The current Markdown processing method doesn’t fully filter + out all Markdown and HTML syntax, which seems to have a negative influence on embeddings. +- **Database support for embeddings**: The system needs a proper database setup for faster lookup + and for enabling us to store metadata (such as URLs) next to embeddings. **[Done]** +- **Better prompting**: We haven’t widely explored all best practices in prompting. Also consider + supporting dynamic prompting given user questions. +- **Real-world feedback**: We need to set up a feedback loop to collect real-world user + interactions, including example prompts and responses, and start using them as part of embeddings. + +## Set up Docs Agent + +This section provides instructions on how to set up the Docs Agent project on a Linux host machine. + +### 1. Prerequisites + +1. Update the Linux package repositories on the host machine: + + ```posix-terminal + sudo apt update + ``` + +2. Install the following dependencies: + + ```posix-terminal + sudo apt install git pip python3-venv + ``` + +3. Install `poetry`: + + ```posix-terminal + curl -sSL https://install.python-poetry.org | python3 - + ``` + + **Important**: Make sure that `$HOME/.local/bin` is in your `PATH` variable. + +4. Set the following environment variable: + + ```posix-terminal + export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring + ``` + + This is a [known issue][poetry-known-issue] in `poetry`. + +5. Set the PaLM API key as a environment variable: + + ``` + export PALM_API_KEY= + ``` + + Replace `` with the API key to + [Generative Language API][genai-doc-site]. + + **Tip**: To avoid repeating these `export` lines, add them to your + `$HOME/.bashrc` file. + +### 2. Clone this project repository and install dependencies + +**Note**: This guide assumes that you're creating a new project directory +from your `$HOME` directory. + +1. Clone the `generative-ai-docs` repo: + + ```posix-terminal + git clone https://github.com/google/generative-ai-docs + ``` + +2. Go to the Docs Agent project directory: + + ```posix-terminal + cd ./generative-ai-docs/demos/palm/python/docs-agent + ``` + +3. (**Optional**) If you plan on contributing to the Docs agent project, + run the following command to set up your commit hook: + + ``` + curl -Lo `git rev-parse --git-dir`/hooks/commit-msg https://gerrit-review.googlesource.com/tools/hooks/commit-msg ; chmod +x `git rev-parse --git-dir`/hooks/commit-msg + ``` + +4. Install dependencies using `poetry`: + + ```posix-terminal + poetry install + ``` + + This may take some time to complete. + +5. Enter the `poetry` shell environment: + + ```posix-terminal + poetry shell + ``` + + **Important**: From this point, all command lines in the sections below need to run + in this `poetry shell` environment. + + +Now, the next step is to populate a vector database with your own documents. See the +[Populate a new vector database from Markdown files][populate-db-steps] section below. + +## Populate a new vector database from Markdown files + +This section provides instructions on how to bring your own set of documents and create and +populate a vector database (`vector_stores/chroma`) on your host machine. The Python scripts +in the project's `scripts` directory can help you populate documents, embeddings and metadata +from Markdown files (`.md`). + +This section uses the [open source Flutter documents][flutter-docs-src] as an example dataset, +which are the source Markdown files for the [Flutter website][flutter-docs-site]. To download +the open source Flutter documents on your host machine, run the following command: + +``` +git clone --recurse-submodules https://github.com/flutter/website.git +``` + +**Note**: The Flutter documents are used in this section as an example dataset only. The +Python scripts below are designed to work with any documents in the standard Markdown format. + +### 1. Convert Markdown files to plain text files + +Before generating embeddings, you need to process Markdown files into small chunks of +plain text files. + +To convert Markdown files to plain text files: + +1. Go to the project directory, for example: + + ``` + cd $HOME/docs-agent + ``` + +2. Open the `config.yaml` file using a text editor, for example: + + ``` + nano config.yaml + ``` + +3. (**Optional**) Edit `output_path` to a directory that will store plain text files, + for example: + + ``` + output_path: "data/plain_docs" + ``` + + The example above creates a new directory named `data/plain_docs` in the current + project directory (`$HOME/docs-agent`). Then the project uses this `output_path` + directory to store the plain text files processed from the input Markdown files. + +4. Under the `input` field, define the following entries to specify the directories + that contain your source Markdown files. + + - `path`: The directory where the source Markdown files are stored. + - `url_prefix`: The prefix used to create URLs for the source Markdown files. + If the URLs do not exist for the source files, provide a mock string. + - (**Optional**) `exclude_path`: The sub-directory to be excluded from + the path directory. + + The example below shows the entries for the Flutter documents downloaded on the + host machine (that is, in the `/home/downloads/website` directory): + + ``` + input: + - path: "/home/downloads/website/src" + url_prefix: "https://docs.flutter.dev" + ``` + + You can also provide a number of input directories (`path` and `url_prefix` sets) under + the input field, for example: + + ``` + input: + - path: "/home/downloads/website/src/ui" + url_prefix: "https://docs.flutter.dev/ui" + - path: "/home/downloads/website/src/codelabs" + url_prefix: "https://docs.flutter.dev/codelabs" + ``` + +5. Save the file and exit the text editor. + +6. Run the Python script: + + ``` + python3 scripts/markdown_to_plain_text.py + ``` + + For a large number of Markdown files, it may take a few minutes to process + Markdown files. + +### 2. Populate a new vector database + +**Important**: If the `vector_stores/chroma` directory already exists, delete +(or move) the `chroma` directory before populating a new vector database. Also, +if the Docs Agent chat app is already running using this `chroma` directory, shut down +the app before deleting the directory. + +Once you have plain text files processed and stored in the `output_path` directory, +you can run the `populat_vector_database.py` script to populate a vector database +with the contents of the plain text files and their embeddings (and metadata). + +To populate a new vector database: + +1. Go to the project directory, for example: + + ``` + cd $HOME/docs-agent + ``` + +2. Create and populate a new vector database: + + ``` + python3 ./scripts/populate_vector_database.py + ``` + + This script uses the `output_path` directory from the `input-values.yaml` file + to locate plain text files and creates a new directory at + `$HOME/docs-agent/vector_stores/chroma`, which contains embeddings and metadata. + +3. To test the new vector database, run the following script: + + **Note**: Adjust `QUESTION` in `scripts/test_vector_database.py` to be suitable for + the content in your database. + + ``` + python3 ./scripts/test_vector_database.py + ``` + +The next step is to launch the Docs Agent chat app to use the new vector database. See +the [Start the Docs Agent chat app][start-the-app-steps] section below. + +## Start the Docs Agent chat app + +**Important**: This section assumes that you've already created a `vector_stores/chroma` +directory, which contains artifacts for the vector database. If you haven't, see the +[Populate a new vector database from Markdown files][populate-db-steps] section above. + +This Flask app lets users interact with the Docs Agent service through a web browser. The +`launch.sh` script deploys the Flask app in a Python virtual environment (`poetry`), +allowing you to easily bring up and destory the Flask app instance. + +### 1. Configure the Docs Agent chat app + +To customize settings in the Docs Agent chat app, do the following: + +1. (**Optional**) Update the `condition.txt` file to provide a more specific prompt condition + for your custom dataset, for example: + + ``` + You are a helpful chatbot answering questions from developers working on Flutter apps. + Read the following context first and answer the question at the end: + ``` + +2. Edit the `config.yaml` file to update the following field: + + ``` + product_name: "My product" + ``` + + Replace `My product` with your product name (which shows up as the main label on the UI), + for example: + + ``` + product_name: "Flutter" + ``` + +### 2. Launch the Docs Agent chat app + +To launch the Docs Agent chat app, do the following: + +1. Go to the project directory, for example: + + ``` + cd $HOME/docs-agent + ``` + +2. Launch the Docs Agent chat app: + + ``` + poetry run ./chatbot/launch.sh + ``` + + **Note**: The Docs Agent chat app runs on port 5000 by default. If you have an application + already running on port 5000 on your host machine, you can use the `-p` flag to specify + a different port (for example, `poetry run ./chatbot/launch.sh -p 5050`). + + Once the app starts running, this command prints output similar to the following: + + ``` + $ poetry run ./chatbot/launch.sh + This script starts your flask app in a virtual environment + Installing all dependencies through pip... + Using the local vector database created at /home/alice/docs-agent/vector_database + Using embedded DuckDB with persistence: data will be stored in: /home/alice/docs-agent/vector_database + * Serving Flask app 'chatbot' + * Debug mode: on + WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on http://example.com:5000 + Press CTRL+C to quit + * Restarting with stat + Using the local vector database created at /home/alice/docs-agent/vector_database + Using embedded DuckDB with persistence: data will be stored in: /home/alice/docs-agent/vector_database + * Debugger is active! + * Debugger PIN: 129-640-957 + ``` + + Notice the line that shows the URL of this server (`http://example.com:5000` in + the example above). + +3. Open the URL above on a browser. + + Now, users can start asking questions related to the source dataset. + +**The Docs Agent chat app is all set!** + +## Contribute to Docs Agent + +The section provides instructions on how to set up an account with Google +and start contributing to the Docs Agent project. + +To set up your account and contribute to the Docs Agent project, +do the following: + +1. Visit https://cla.developers.google.com/ to see your current agreements + or to sign a new one. + +1. Clone the `generative-ai-docs` repository on your host machine: + + ``` + git clone https://github.com/google/generative-ai-docs + ``` + +1. Go to the Docs Agent project directory: + + ``` + cd ./generative-ai-docs/demos/palm/python/docs-agent + ``` + +1. To set up your commit hook, run the following command: + + ``` + curl -Lo `git rev-parse --git-dir`/hooks/commit-msg https://gerrit-review.googlesource.com/tools/hooks/commit-msg ; chmod +x `git rev-parse --git-dir`/hooks/commit-msg + ``` + +1. Create a new Gerrit change, for example: + + ``` + git add + ``` + + ``` + git commit [--amend] + ``` + +1. Upload the change for review: + + ``` + git push origin HEAD:refs/for/main + ``` + +## Contributors + +Rundong Du (`rundongdu@`), Kyo Lee (`kyol@`), Nick Van der Auwermeulen (`nickvander@`), +and Meggin Kearney (`mkearney@`). + + + +[contribute-to-docs-agent]: #contribute-to-docs-agent +[set-up-docs-agent]: #set-up-docs-agent +[markdown-to-plain-text]: ./scripts/markdown_to_plain_text.py +[populate-vector-database]: ./scripts/populate_vector_database.py +[condition-txt]: ./condition.txt +[context-source-01]: http://eventhorizontelescope.org +[fact-check-section]: #using-a-palm-2-model-to-fact_check-its-own-response +[related-questions-section]: #using-a-palm-2-model-to-suggest-related-questions +[submit-a-rewrite]: #enabling-users-to-submit-a-rewrite-of-a-generated-response +[like-generate-responses]: #enabling-users-to-like-generated-responses +[populate-db-steps]: #populate-a-new-vector-database-from-markdown-files +[start-the-app-steps]: #start-the-docs-agent-chat-app +[launch-script]: ./chatbot/launch.sh +[genai-doc-site]: https://developers.generativeai.google/ +[chroma-docs]: https://docs.trychroma.com/ +[flutter-docs-src]: https://github.com/flutter/website/tree/main/src +[flutter-docs-site]: https://docs.flutter.dev/ +[poetry-known-issue]: https://github.com/python-poetry/poetry/issues/1917 diff --git a/demos/palm/python/docs-agent/chatbot/__init__.py b/demos/palm/python/docs-agent/chatbot/__init__.py new file mode 100644 index 000000000..e99469e0c --- /dev/null +++ b/demos/palm/python/docs-agent/chatbot/__init__.py @@ -0,0 +1,21 @@ +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from flask import Flask +from chatbot import chatui + +app = Flask(__name__) +app.register_blueprint(chatui.bp) diff --git a/demos/palm/python/docs-agent/chatbot/chatui.py b/demos/palm/python/docs-agent/chatbot/chatui.py new file mode 100644 index 000000000..c0f0a3322 --- /dev/null +++ b/demos/palm/python/docs-agent/chatbot/chatui.py @@ -0,0 +1,286 @@ +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Chatbot web service for Docs Agent""" + +from flask import ( + Blueprint, + render_template, + request, + redirect, + url_for, + json, +) +import markdown +from bs4 import BeautifulSoup +import urllib +import os +from datetime import datetime +from pytz import timezone +import pytz +import uuid +from scripts import read_config + +from chroma import Format +from docs_agent import DocsAgent + + +# Read the configuration file +config = read_config.ReadConfig() +# Create the 'rewrites' directory if it does not exist. +rewrites_dir = "rewrites" +is_exist = os.path.exists(rewrites_dir) +if not is_exist: + os.makedirs(rewrites_dir) + +product = config.returnConfigValue("product_name") +bp = Blueprint("chatui", __name__) +docs_agent = DocsAgent() + + +@bp.route("/", methods=["GET", "POST"]) +def index(): + server_url = request.url_root.replace("http", "https") + return render_template("chatui/index.html", product=product, server_url=server_url) + + +@bp.route("/like", methods=["GET", "POST"]) +def like(): + if request.method == "POST": + json_data = json.loads(request.data) + is_like = json_data.get("like") + uuid_found = json_data.get("uuid") + log_like(is_like, str(uuid_found).strip()) + return "OK" + else: + return redirect(url_for("chatui.index")) + + +@bp.route("/rewrite", methods=["GET", "POST"]) +def rewrite(): + if request.method == "POST": + json_data = json.loads(request.data) + user_id = json_data.get("user_id") + question_captured = json_data.get("question") + original_response = json_data.get("original_response") + rewrite_captured = json_data.get("rewrite") + date_format = "%m%d%Y-%H%M%S" + date = datetime.now(tz=pytz.utc) + date = date.astimezone(timezone("US/Pacific")) + print("[" + date.strftime(date_format) + "] A user has submitted a rewrite.") + print("Submitted by: " + user_id + "\n") + print("# " + question_captured.strip() + "\n") + print("## Original response\n") + print(original_response.strip() + "\n") + print("## Rewrite\n") + print(rewrite_captured + "\n") + filename = ( + rewrites_dir + + "/" + + question_captured.strip() + .replace(" ", "-") + .replace("?", "") + .replace("'", "") + .lower() + + "-" + + date.strftime(date_format) + + ".md" + ) + with open(filename, "w", encoding="utf-8") as file: + file.write("Submitted by: " + user_id + "\n\n") + file.write("# " + question_captured.strip() + "\n\n") + file.write("## Original response\n\n") + file.write(original_response.strip() + "\n\n") + file.write("## Rewrite\n\n") + file.write(rewrite_captured + "\n") + file.close() + return "OK" + else: + return redirect(url_for("chatui.index")) + + +@bp.route("/result", methods=["GET", "POST"]) +def result(): + if request.method == "POST": + uuid_value = uuid.uuid1() + question_captured = request.form["question"] + query_result = docs_agent.query_vector_store(question_captured) + context = markdown.markdown(query_result.fetch_formatted(Format.CONTEXT)) + context_with_prefix = docs_agent.add_instruction_to_context(context) + response_in_markdown = docs_agent.ask_text_model_with_context( + context_with_prefix, question_captured + ) + if response_in_markdown is None: + response_in_markdown = ( + "The PaLM API is not able to answer this question at the moment. " + "Try to rephrase the question and ask again." + ) + response_in_html = markdown.markdown(response_in_markdown) + metadatas = markdown.markdown( + query_result.fetch_formatted(Format.CLICKABLE_URL) + ) + fact_checked_answer_in_markdown = docs_agent.ask_text_model_to_fact_check( + context_with_prefix, response_in_markdown + ) + if fact_checked_answer_in_markdown is None: + fact_checked_answer_in_markdown = ( + "The PaLM API is not able to answer this question at the moment. " + "Try to rephrase the question and ask again." + ) + fact_checked_answer_in_html = markdown.markdown(fact_checked_answer_in_markdown) + new_question = ( + "What are 5 questions developers might ask after reading the context?" + ) + related_questions = markdown.markdown( + docs_agent.ask_text_model_with_context(response_in_markdown, new_question) + ) + soup = BeautifulSoup(related_questions, "html.parser") + for item in soup.find_all("li"): + if item.string is not None: + link = soup.new_tag( + "a", + href=url_for( + "chatui.question", ask=urllib.parse.quote_plus(item.string) + ), + ) + link.string = item.string + item.string = "" + item.append(link) + related_questions = soup + fact_link = markdown.markdown( + query_result.fetch_nearest_formatted(Format.CLICKABLE_URL) + ) + server_url = request.url_root.replace("http", "https") + # Log the question and response to the log file. + log_question(uuid_value, question_captured, response_in_markdown) + return render_template( + "chatui/index.html", + question=question_captured, + context=context, + context_with_prefix=context_with_prefix, + response_in_markdown=response_in_markdown, + response_in_html=response_in_html, + product=product, + metadatas=metadatas, + fact_checked_answer=fact_checked_answer_in_html, + fact_link=fact_link, + related_questions=related_questions, + server_url=server_url, + uuid=uuid_value, + ) + else: + return redirect(url_for("chatui.index")) + + +@bp.route("/question/", methods=["GET", "POST"]) +def question(ask): + if request.method == "GET": + uuid_value = uuid.uuid1() + question_captured = urllib.parse.unquote_plus(ask) + query_result = docs_agent.query_vector_store(question_captured) + context = markdown.markdown(query_result.fetch_formatted(Format.CONTEXT)) + context_with_prefix = docs_agent.add_instruction_to_context(context) + response_in_markdown = docs_agent.ask_text_model_with_context( + context_with_prefix, question_captured + ) + if response_in_markdown is None: + response_in_markdown = ( + "The PaLM API is not able to answer this question at the moment. " + "Try to rephrase the question and ask again." + ) + response_in_html = markdown.markdown(response_in_markdown) + metadatas = markdown.markdown( + query_result.fetch_formatted(Format.CLICKABLE_URL) + ) + fact_checked_answer_in_markdown = docs_agent.ask_text_model_to_fact_check( + context_with_prefix, response_in_markdown + ) + if fact_checked_answer_in_markdown is None: + fact_checked_answer_in_markdown = ( + "The PaLM API is not able to answer this question at the moment. " + "Try to rephrase the question and ask again." + ) + fact_checked_answer_in_html = markdown.markdown(fact_checked_answer_in_markdown) + new_question = ( + "What are 5 questions developers might ask after reading the context?" + ) + related_questions = markdown.markdown( + docs_agent.ask_text_model_with_context(response_in_markdown, new_question) + ) + soup = BeautifulSoup(related_questions, "html.parser") + for item in soup.find_all("li"): + if item.string is not None: + link = soup.new_tag( + "a", + href=url_for( + "chatui.question", ask=urllib.parse.quote_plus(item.string) + ), + ) + link.string = item.string + item.string = "" + item.append(link) + related_questions = soup + fact_link = markdown.markdown( + query_result.fetch_nearest_formatted(Format.CLICKABLE_URL) + ) + server_url = request.url_root.replace("http", "https") + # Log the question and response to the log file. + log_question(uuid_value, question_captured, response_in_markdown) + return render_template( + "chatui/index.html", + question=question_captured, + context=context, + context_with_prefix=context_with_prefix, + response_in_markdown=response_in_markdown, + response_in_html=response_in_html, + product=product, + metadatas=metadatas, + fact_checked_answer=fact_checked_answer_in_html, + fact_link=fact_link, + related_questions=related_questions, + server_url=server_url, + uuid=uuid_value, + ) + else: + return redirect(url_for("chatui.index")) + + +# Log the question and response to the server's log file. +def log_question(uid, user_question, response): + date_format = "%m/%d/%Y %H:%M:%S %Z" + date = datetime.now(tz=pytz.utc) + date = date.astimezone(timezone("US/Pacific")) + print("UID: " + str(uid)) + print("Question: " + user_question.strip() + "\n") + print("Response:") + print(response.strip() + "\n") + with open("chatui_logs.txt", "a", encoding="utf-8") as log_file: + log_file.write("[" + date.strftime(date_format) + "][UID " + str(uid) + "]\n") + log_file.write("# " + user_question.strip() + "\n\n") + log_file.write(response.strip() + "\n\n") + log_file.close() + + +def log_like(is_like, uid): + date_format = "%m/%d/%Y %H:%M:%S %Z" + date = datetime.now(tz=pytz.utc) + date = date.astimezone(timezone("US/Pacific")) + print("UID: " + str(uid)) + print("Like: " + str(is_like)) + with open("chatui_logs.txt", "a", encoding="utf-8") as log_file: + log_file.write("[" + date.strftime(date_format) + "][UID " + str(uid) + "]\n") + log_file.write("Like: " + str(is_like) + "\n\n") + log_file.close() diff --git a/demos/palm/python/docs-agent/chatbot/launch.sh b/demos/palm/python/docs-agent/chatbot/launch.sh new file mode 100755 index 000000000..ae94cd222 --- /dev/null +++ b/demos/palm/python/docs-agent/chatbot/launch.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default values +port=5000 +name='chatbot' +# Specify port number with -p argument `launch.sh -p 5555` +while getopts "n:p:h" opt; do + case $opt in + p) port="${OPTARG}";; + h) echo "Usage: $0 [-p port]"; exit 1;; + \?) echo "Invalid option: -$OPTARG"; exit 1;; + esac +done +# Define your hostname +if [[ -z "$HOSTNAME" ]]; then + export HOSTNAME="localhost" +fi +export FLASK_PORT=$port +export FLASK_APP=$name +export FLASK_DEBUG=true + +flask run --host=$HOSTNAME --port=$FLASK_PORT diff --git a/demos/palm/python/docs-agent/chatbot/static/css/chatbox.css b/demos/palm/python/docs-agent/chatbot/static/css/chatbox.css new file mode 120000 index 000000000..3d344232e --- /dev/null +++ b/demos/palm/python/docs-agent/chatbot/static/css/chatbox.css @@ -0,0 +1 @@ +../../../third_party/css/chatbox.css \ No newline at end of file diff --git a/demos/palm/python/docs-agent/chatbot/static/css/style.css b/demos/palm/python/docs-agent/chatbot/static/css/style.css new file mode 100644 index 000000000..066e442f0 --- /dev/null +++ b/demos/palm/python/docs-agent/chatbot/static/css/style.css @@ -0,0 +1,324 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ======= General style for HTML elements ======= */ + +body { + font: 16px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; + color: #333; + font-weight: 300; + max-width: 960px; + margin: auto; + background-color: #d9d9d9; + padding-top: 15px; + padding-bottom: 15px; +} + +a { + color: #22578c; +} + +p { + margin: 0 0 1em; + line-height: 130%; +} + +h1 { + margin: 0 0 0.5em; + font-weight: 500; + font-size: 2.0em; + margin-left: 0.8em; + margin-top: 0.3em; +} + +h2 { + margin: 0; + margin-top: 17px; + margin-bottom: 15px; +} + +h3 { + margin: 0; + margin-top: 10px; + margin-bottom: 10px; +} + +h4 { + color: #505050; + margin: 0; + margin-top: 3px; + margin-bottom: 10px; +} + +li { + margin: 0 0 0.3em; +} + +/* ======= Style layout by ID ======= */ + +#callout-box { + margin: auto; + max-width: 800px; + font: 13px arial, sans-serif; + background-color: white; + border-style: solid; + border-width: 1px; + padding: 10px 25px; + box-shadow: 5px 5px 5px grey; + border-radius: 15px; +} + +/* ======= Style by class ======= */ + +.hidden { + display: none; +} + +.disable { + display: none; +} + +.header-wrapper { + display: flex; +} + +.loading { + font: 15px arial, sans-serif; + width: 100%; + margin-left: 12px; + color: #505050; + padding: 2px; +} + +.notselected { + background-color: #303936e6; + padding-top: 3px; + padding-bottom: 5px; +} + +.notselected:hover { + background-color: #121a17e6; + cursor:pointer; +} + +.selected { + background-color: #1e6a9c; + padding-top: 7px; + padding-bottom: 7px; +} + +.selected:hover { + background-color: #0a619a; + cursor:pointer; +} + +.rewrite { + padding: 15px; + border: 2px solid #000; + margin-top: 6px; + border-radius: 15px; +} + +.question, .response, .response-text, .fact-checked-text, .related-questions { + max-width: 700px; + margin-left: 3px; +} + +.full-response { + max-width: 700px; + margin-left: 10px; +} + +/* ======= Style buttons by ID ======= */ + +#rewrite-button { + border: 0; + background-color: #cf633ff2; + color: #fff; + padding: 7px; + border-radius: 5px; + cursor:pointer; +} + +#rewrite-button:hover { + background: #ce3705f2; + cursor:pointer; +} + +#like-button { + border: 0; + color: #fff; + padding-left: 7px; + padding-right: 7px; + border-radius: 5px; + cursor:pointer; +} + +#submit-button { + border: 0; + background: none; + background-color: #CF5C3F; + color: #fff; + padding: 7px; + border-radius: 5px; + cursor:pointer; +} + +#submit-button:hover { + background: #ce3705f2; + cursor:pointer; +} + +#submit-result { + color: #027f02d6; +} + +#edit-text-area { + font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; + max-height: 500px; + max-width: 650px; + height: 300px; + width: 650px; + padding: 8px; +} + +#rewrite-question-header { + margin: 0; + margin-bottom: 5px; +} + +#rewrite-response-header { + margin: 0; + margin-top: 10px; + margin-bottom: 5px; +} + +#user-id { + margin: 0; + margin-top: 10px; + margin-bottom: 15px; +} + +#fact-check-url { + margin: 0 0 0.7em; +} + +/* ======= Search Box ======= */ + +.search { + border: 2px solid #CF5C3F; + overflow: auto; + max-width: 700px; + margin-top: 15px; + margin-left: 10px; + margin-bottom: 10px; + border-radius: 5px; +} + +.search input[type="text"] { + border: 0; + width: 91%; + padding: 10px; +} + +.search input[type="text"]:focus { + outline: 0; +} + +.search input[type="submit"] { + border: 0; + background: none; + background-color: #CF5C3F; + color: #fff; + float: right; + padding: 10px; + -moz-border-radius-top-right: 5px; + -webkit-border-radius-top-right: 5px; + -moz-border-radius-bottom-right: 5px; + -webkit-border-radius-bottom-right: 5px; + cursor:pointer; +} + +/* ======= Accordion ======= */ + +.accordion { + max-width: 65em; + margin-bottom: 1em; +} + +.accordion > input[type="checkbox"] { + position: absolute; + left: -100vw; +} + +.accordion .content { + overflow-y: hidden; + height: 0; + transition: height 0.3s ease; +} + +.accordion .reference-content { + font-size: 13px; +} + +.accordion > input[type="checkbox"]:checked ~ .content { + height: auto; + overflow: visible; + padding: 15px; + border: 2px solid #000; + margin-top: 6px; + border-radius: 15px; +} + +.accordion .handle { + margin: 0; + font-size: 1.125em; + line-height: 1.2em; +} + +.accordion label { + display: block; + font-weight: normal; + border: 2px solid #000; + padding: 12px; + background: #4490b8ab; + border-radius: 15px; +} + +.accordion label:hover, +.accordion label:focus { + background: #d9d9d9; + cursor:pointer; +} + +.accordion .handle label::before { + font-family: fontawesome, sans-serif; + display: inline-block; + content: "\2964"; + margin-right: 10px; + font-size: .58em; + line-height: 1.556em; + vertical-align: middle; +} + +.accordion > input[type="checkbox"]:checked ~ .handle label::before { + content: "\2965"; +} + +.accordion p:last-child { + margin-bottom: 0; +} + diff --git a/demos/palm/python/docs-agent/chatbot/static/images/favicon.png b/demos/palm/python/docs-agent/chatbot/static/images/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..dd83d19ceb4ed5362644b083dda458b2455a8ad6 GIT binary patch literal 608 zcmV-m0-ybfP)Px%8c9S!RA@u(m}^c0F${#$lL03KP6nI|G!jNrukAQ5br+?-+OCtG&mMcz-pa*a zw~Om98{p9u+!gq56?ppvdMc$n6Wk}ge=nu*=b;?Tu#kv;ZBzn?VVDtsUg4T^2&5MP z;;oD$qj*6S+l1|eb^KX?uvU_iE@lsaCBEn8);@rKNvkiV`9b!fvezSA>(d}j5G@tR ziiaRfA-u#YVkfp3gLFe=0rU+}W<3fy2G=Z~M}9{QmuBNVJ*wi%uoqdki*F=idI4EiWoPExeCmvd@ z{?G$JGewo{28gon35ssk3c*!y6CG860oWE<>sZc-dZMce5kRYbEbDmouQfl`ag1Bp z9stVLf4|xtqk5Tp@3*_&c9{Y)Dq(iEG5LLiiEZ@&K2i2IC0Kl92*o)8l z2px&b#(OUK+uN&v_IY+%Wvas7uEpzz;Gx$nVOEny06Z-+&D;{N0S>@30OZ(=%w~U4 zAvP0l9YgLkTL9HXwa%M(y{cscOw6{m)vkbKrFQIIRfw(%SO=}M0341ko-j>*7J$@) zOy)ZOgLVc0^C9^tVcDJ;I?QEnvNCQ1jAS?ScS?4D_lMKU9+(N>o^sC)fXwsn~S)MVay(rcI0000