diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 57b376c..f4af91b 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -21,6 +21,5 @@ jobs: - name: Python pylint run: | pip install pylint==2.10.2 - pylint --rcfile=.pylintrc --output-format=colorized src_towhee - pylint --rcfile=.pylintrc --output-format=colorized src_langchain + pylint --rcfile=.pylintrc --output-format=colorized src pylint --rcfile=.pylintrc --output-format=colorized offline_tools diff --git a/.gitignore b/.gitignore index 1af3908..5446e6d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ **/__pycache__ **/tmp +**/*.egg-info +**/*.db +**/build diff --git a/Contributing.md b/Contributing.md index 0302fd0..4fdc619 100644 --- a/Contributing.md +++ b/Contributing.md @@ -65,8 +65,8 @@ If you're interested in contributing to the `zilliztech/akcio` codebase, follow 4. During development, you might want to run `pylint`. You can do so with one of the commands below: ```bash $ pip install pylint==2.10.2 - $ pylint --rcfile=.pylintrc --output-format=colorized src_towhee - $ pylint --rcfile=.pylintrc --output-format=colorized src_langchain + $ pylint --rcfile=.pylintrc --output-format=colorized src.towhee + $ pylint --rcfile=.pylintrc --output-format=colorized src.langchain $ pylint --rcfile=.pylintrc --output-format=colorized offline_tools ``` diff --git a/README.md b/README.md index 75f8a78..6044f7a 100644 --- a/README.md +++ b/README.md @@ -71,34 +71,34 @@ It also supports different integrations of LLM service and databases: The option using Towhee simplifies the process of building a system by providing [pre-defined pipelines](https://towhee.io/tasks/pipeline). These built-in pipelines require less coding and make system building much easier. If you require customization, you can either simply modify configuration or create your own pipeline with rich options of [Towhee Operators](https://towhee.io/tasks/operator). -- [Pipelines](./src_towhee/pipelines) +- [Pipelines](./src.towhee/pipelines) - **Insert:** The insert pipeline builds a knowledge base by saving documents and corresponding data in database(s). - **Search:** The search pipeline enables the question-answering capability powered by information retrieval (semantic search and optional keyword match) and LLM service. - **Prompt:** a prompt operator prepares messages for LLM by assembling system message, chat history, and the user's query processed by template. -- [Memory](./src_towhee/memory): - The memory storage stores chat history to support context in conversation. (available: [most SQL](./src_towhee/memory/sql.py)) +- [Memory](./src.towhee/memory): + The memory storage stores chat history to support context in conversation. (available: [most SQL](./src.towhee/memory/sql.py)) ### Option 2: LangChain The option using LangChain employs the use of [Agent](https://python.langchain.com/docs/modules/agents) in order to enable LLM to utilize specific tools, resulting in a greater demand for LLM's ability to comprehend tasks and make informed decisions. -- [Agent](./src_langchain/agent) +- [Agent](./src.langchain/agent) - **ChatAgent:** agent ensembles all modules together to build up qa system. - Other agents (todo) -- [LLM](./src_langchain/llm) +- [LLM](./src.langchain/llm) - **ChatLLM:** large language model or service to generate answers. -- [Embedding](./src_langchain/embedding/) +- [Embedding](./src.langchain/embedding/) - **TextEncoder:** encoder converts each text input to a vector. - Other encoders (todo) -- [Store](./src_langchain/store) +- [Store](./src.langchain/store) - **VectorStore:** vector database stores document chunks in embeddings, and performs document retrieval via semantic search. - - **ScalarStore:** optional, database stores metadata for each document chunk, which supports additional information retrieval. (available: [Elastic](src_langchain/store/scalar_store/es.py)) + - **ScalarStore:** optional, database stores metadata for each document chunk, which supports additional information retrieval. (available: [Elastic](src.langchain/store/scalar_store/es.py)) - **MemoryStore:** memory storage stores chat history to support context in conversation. -- [DataLoader](./src_langchain/data_loader/) +- [DataLoader](./src.langchain/data_loader/) - **DataParser:** tool loads data from given source and then splits documents into processed doc chunks. ## Deployment @@ -228,7 +228,7 @@ The option using LangChain employs the use of [Agent](https://python.langchain.c ## Load data -The `insert` function in [operations](./src_langchain/operations.py) loads project data from url(s) or file(s). +The `insert` function in [operations](./src.langchain/operations.py) loads project data from url(s) or file(s). There are 2 options to load project data: diff --git a/config.py b/config.py index 4f778c5..831e0ce 100644 --- a/config.py +++ b/config.py @@ -115,7 +115,7 @@ raise NotImplementedError RERANK_CONFIG = { - 'rerank': True, # or False + 'rerank': False, # or False 'rerank_model': rerank_model, 'threshold': 0.0, 'rerank_device': -1 # -1 will use cpu @@ -126,7 +126,7 @@ 'chunk_size': 300 } -QUESTIONGENERATOR_CONFIG = { - 'model_name': 'gpt-3.5-turbo', - 'temperature': 0, -} +# QUESTIONGENERATOR_CONFIG = { +# 'model_name': 'gpt-3.5-turbo', +# 'temperature': 0, +# } diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/gradio_demo.py b/gradio_demo.py index a59282a..8390559 100644 --- a/gradio_demo.py +++ b/gradio_demo.py @@ -17,9 +17,9 @@ 'The service should start with either "--langchain" or "--towhee".' if USE_LANGCHAIN: - from src_langchain.operations import chat, insert, check, drop, get_history, clear_history, count # pylint: disable=C0413 + from src.langchain.operations import chat, insert, check, drop, get_history, clear_history, count # pylint: disable=C0413 if USE_TOWHEE: - from src_towhee.operations import chat, insert, check, drop, get_history, clear_history, count # pylint: disable=C0413 + from src.towhee.operations import chat, insert, check, drop, get_history, clear_history, count # pylint: disable=C0413 def create_session_id(): diff --git a/main.py b/main.py index 76e3655..b1894cc 100644 --- a/main.py +++ b/main.py @@ -40,10 +40,10 @@ 'The service should start with either "--langchain" or "--towhee".' if USE_LANGCHAIN: - from src_langchain.operations import chat, insert, drop, check, get_history, clear_history, count # pylint: disable=C0413 + from src.langchain.operations import chat, insert, drop, check, get_history, clear_history, count # pylint: disable=C0413 chat = partial(chat, enable_agent=ENABLE_AGENT) if USE_TOWHEE: - from src_towhee.operations import chat, insert, drop, check, get_history, clear_history, count # pylint: disable=C0413 + from src.towhee.operations import chat, insert, drop, check, get_history, clear_history, count # pylint: disable=C0413 if ENABLE_MONITER: from moniter import enable_moniter # pylint: disable=C0413 from prometheus_client import generate_latest, REGISTRY # pylint: disable=C0413 diff --git a/offline_tools/insert.py b/offline_tools/insert.py index 4e2f8d8..d6bc0c0 100644 --- a/offline_tools/insert.py +++ b/offline_tools/insert.py @@ -7,7 +7,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -from src_langchain.embedding import TextEncoder # pylint: disable=C0413 +from src.langchain.embedding import TextEncoder # pylint: disable=C0413 from offline_tools.generator_questions import get_output_csv # pylint: disable=C0413 from offline_tools.utils.stackoverflow_json2csv import stackoverflow_json2csv # pylint: disable=C0413 from offline_tools.utils.load_npy import langchain_load # pylint: disable=C0413 diff --git a/offline_tools/utils/load_npy.py b/offline_tools/utils/load_npy.py index c96e43e..364fe40 100644 --- a/offline_tools/utils/load_npy.py +++ b/offline_tools/utils/load_npy.py @@ -5,7 +5,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -from src_langchain.store import DocStore # pylint: disable=C0413 +from src.langchain.store import DocStore # pylint: disable=C0413 class DBReader(object): diff --git a/requirements.txt b/requirements.txt index f73fc10..c467913 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,11 +4,12 @@ pexpect pdf2image SQLAlchemy>=2.0.15 psycopg2-binary -openai +openai==0.28 gradio>=3.30.0 fastapi uvicorn towhee>=1.1.0 +pydantic<2.0 pymilvus elasticsearch>=8.0.0 prometheus-client diff --git a/src_langchain/__init__.py b/src/__init__.py similarity index 100% rename from src_langchain/__init__.py rename to src/__init__.py diff --git a/src_langchain/store/memory_store/__init__.py b/src/langchain/__init__.py similarity index 100% rename from src_langchain/store/memory_store/__init__.py rename to src/langchain/__init__.py diff --git a/src_langchain/agent/README.md b/src/langchain/agent/README.md similarity index 97% rename from src_langchain/agent/README.md rename to src/langchain/agent/README.md index fbab754..c429ff8 100644 --- a/src_langchain/agent/README.md +++ b/src/langchain/agent/README.md @@ -47,8 +47,7 @@ agent = ChatAgent.from_llm_and_tools( # Define a chain agent_chain = AgentExecutor.from_agent_and_tools( agent=agent, - tools=tools, - verbose=False + tools=tools ) # Run a test diff --git a/src_langchain/agent/__init__.py b/src/langchain/agent/__init__.py similarity index 100% rename from src_langchain/agent/__init__.py rename to src/langchain/agent/__init__.py diff --git a/src_langchain/agent/chat_agent.py b/src/langchain/agent/chat_agent.py similarity index 100% rename from src_langchain/agent/chat_agent.py rename to src/langchain/agent/chat_agent.py diff --git a/src_langchain/agent/output_parser.py b/src/langchain/agent/output_parser.py similarity index 100% rename from src_langchain/agent/output_parser.py rename to src/langchain/agent/output_parser.py diff --git a/src_langchain/agent/prompt.py b/src/langchain/agent/prompt.py similarity index 100% rename from src_langchain/agent/prompt.py rename to src/langchain/agent/prompt.py diff --git a/src_langchain/data_loader/README.md b/src/langchain/data_loader/README.md similarity index 100% rename from src_langchain/data_loader/README.md rename to src/langchain/data_loader/README.md diff --git a/src_langchain/data_loader/__init__.py b/src/langchain/data_loader/__init__.py similarity index 100% rename from src_langchain/data_loader/__init__.py rename to src/langchain/data_loader/__init__.py diff --git a/src_langchain/data_loader/data_parser.py b/src/langchain/data_loader/data_parser.py similarity index 100% rename from src_langchain/data_loader/data_parser.py rename to src/langchain/data_loader/data_parser.py diff --git a/src_langchain/data_loader/data_splitter.py b/src/langchain/data_loader/data_splitter.py similarity index 100% rename from src_langchain/data_loader/data_splitter.py rename to src/langchain/data_loader/data_splitter.py diff --git a/src_langchain/embedding/README.md b/src/langchain/embedding/README.md similarity index 100% rename from src_langchain/embedding/README.md rename to src/langchain/embedding/README.md diff --git a/src_langchain/embedding/__init__.py b/src/langchain/embedding/__init__.py similarity index 100% rename from src_langchain/embedding/__init__.py rename to src/langchain/embedding/__init__.py diff --git a/src_langchain/embedding/langchain_huggingface.py b/src/langchain/embedding/langchain_huggingface.py similarity index 100% rename from src_langchain/embedding/langchain_huggingface.py rename to src/langchain/embedding/langchain_huggingface.py diff --git a/src_langchain/embedding/openai_embedding.py b/src/langchain/embedding/openai_embedding.py similarity index 100% rename from src_langchain/embedding/openai_embedding.py rename to src/langchain/embedding/openai_embedding.py diff --git a/src_langchain/llm/README.md b/src/langchain/llm/README.md similarity index 100% rename from src_langchain/llm/README.md rename to src/langchain/llm/README.md diff --git a/src_langchain/llm/__init__.py b/src/langchain/llm/__init__.py similarity index 100% rename from src_langchain/llm/__init__.py rename to src/langchain/llm/__init__.py diff --git a/src_langchain/llm/dolly_chat.py b/src/langchain/llm/dolly_chat.py similarity index 100% rename from src_langchain/llm/dolly_chat.py rename to src/langchain/llm/dolly_chat.py diff --git a/src_langchain/llm/ernie.py b/src/langchain/llm/ernie.py similarity index 100% rename from src_langchain/llm/ernie.py rename to src/langchain/llm/ernie.py diff --git a/src_langchain/llm/minimax_chat.py b/src/langchain/llm/minimax_chat.py similarity index 100% rename from src_langchain/llm/minimax_chat.py rename to src/langchain/llm/minimax_chat.py diff --git a/src_langchain/llm/openai_chat.py b/src/langchain/llm/openai_chat.py similarity index 100% rename from src_langchain/llm/openai_chat.py rename to src/langchain/llm/openai_chat.py diff --git a/src_langchain/operations.py b/src/langchain/operations.py similarity index 99% rename from src_langchain/operations.py rename to src/langchain/operations.py index 8d6ab61..702024f 100644 --- a/src_langchain/operations.py +++ b/src/langchain/operations.py @@ -43,7 +43,6 @@ def chat(session_id, project, question, enable_agent=False): agent=agent, tools=tools, memory=memory_db.memory, - verbose=False ) try: final_answer = agent_chain.run(input=question) diff --git a/src_langchain/store/README.md b/src/langchain/store/README.md similarity index 100% rename from src_langchain/store/README.md rename to src/langchain/store/README.md diff --git a/src_langchain/store/__init__.py b/src/langchain/store/__init__.py similarity index 100% rename from src_langchain/store/__init__.py rename to src/langchain/store/__init__.py diff --git a/src_langchain/store/scalar_store/__init__.py b/src/langchain/store/memory_store/__init__.py similarity index 100% rename from src_langchain/store/scalar_store/__init__.py rename to src/langchain/store/memory_store/__init__.py diff --git a/src_langchain/store/memory_store/pg.py b/src/langchain/store/memory_store/pg.py similarity index 100% rename from src_langchain/store/memory_store/pg.py rename to src/langchain/store/memory_store/pg.py diff --git a/src_langchain/store/memory_store/sql.py b/src/langchain/store/memory_store/sql.py similarity index 100% rename from src_langchain/store/memory_store/sql.py rename to src/langchain/store/memory_store/sql.py diff --git a/src_langchain/store/vector_store/__init__.py b/src/langchain/store/scalar_store/__init__.py similarity index 100% rename from src_langchain/store/vector_store/__init__.py rename to src/langchain/store/scalar_store/__init__.py diff --git a/src_langchain/store/scalar_store/es.py b/src/langchain/store/scalar_store/es.py similarity index 100% rename from src_langchain/store/scalar_store/es.py rename to src/langchain/store/scalar_store/es.py diff --git a/src_towhee/prompts/__init__.py b/src/langchain/store/vector_store/__init__.py similarity index 100% rename from src_towhee/prompts/__init__.py rename to src/langchain/store/vector_store/__init__.py diff --git a/src_langchain/store/vector_store/milvus.py b/src/langchain/store/vector_store/milvus.py similarity index 100% rename from src_langchain/store/vector_store/milvus.py rename to src/langchain/store/vector_store/milvus.py diff --git a/tests/unit_tests/src_langchain/__init__.py b/src/llamaindex/__init__.py similarity index 100% rename from tests/unit_tests/src_langchain/__init__.py rename to src/llamaindex/__init__.py diff --git a/src_towhee/base.py b/src/towhee/base.py similarity index 100% rename from src_towhee/base.py rename to src/towhee/base.py diff --git a/src_towhee/memory/README.md b/src/towhee/memory/README.md similarity index 100% rename from src_towhee/memory/README.md rename to src/towhee/memory/README.md diff --git a/src_towhee/memory/__init__.py b/src/towhee/memory/__init__.py similarity index 100% rename from src_towhee/memory/__init__.py rename to src/towhee/memory/__init__.py diff --git a/src_towhee/memory/sql.py b/src/towhee/memory/sql.py similarity index 98% rename from src_towhee/memory/sql.py rename to src/towhee/memory/sql.py index 05cf32d..d4888b2 100644 --- a/src_towhee/memory/sql.py +++ b/src/towhee/memory/sql.py @@ -8,7 +8,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) -from src_towhee.base import BaseMemory # pylint: disable=C0413 +from src.towhee.base import BaseMemory # pylint: disable=C0413 from config import MEMORYDB_CONFIG # pylint: disable=C0413 diff --git a/src_towhee/operations.py b/src/towhee/operations.py similarity index 97% rename from src_towhee/operations.py rename to src/towhee/operations.py index 8b7f87a..ef0a7af 100644 --- a/src_towhee/operations.py +++ b/src/towhee/operations.py @@ -4,8 +4,8 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -from src_towhee.pipelines import TowheePipelines # pylint: disable=C0413 -from src_towhee.memory import MemoryStore # pylint: disable=C0413 +from src.towhee.pipelines import TowheePipelines # pylint: disable=C0413 +from src.towhee.memory import MemoryStore # pylint: disable=C0413 logger = logging.getLogger(__name__) diff --git a/src_towhee/pipelines/README.md b/src/towhee/pipelines/README.md similarity index 100% rename from src_towhee/pipelines/README.md rename to src/towhee/pipelines/README.md diff --git a/src_towhee/pipelines/__init__.py b/src/towhee/pipelines/__init__.py similarity index 97% rename from src_towhee/pipelines/__init__.py rename to src/towhee/pipelines/__init__.py index 859465c..623fe33 100644 --- a/src_towhee/pipelines/__init__.py +++ b/src/towhee/pipelines/__init__.py @@ -14,9 +14,9 @@ RERANK_CONFIG, QUERY_MODE, INSERT_MODE, DATAPARSER_CONFIG ) -from src_towhee.base import BasePipelines # pylint: disable=C0413 -from src_towhee.pipelines.search import build_search_pipeline # pylint: disable=C0413 -from src_towhee.pipelines.insert import build_insert_pipeline # pylint: disable=C0413 +from src.towhee.base import BasePipelines # pylint: disable=C0413 +from src.towhee.pipelines.search import build_search_pipeline # pylint: disable=C0413 +from src.towhee.pipelines.insert import build_insert_pipeline # pylint: disable=C0413 class TowheePipelines(BasePipelines): diff --git a/src_towhee/pipelines/insert/__init__.py b/src/towhee/pipelines/insert/__init__.py similarity index 100% rename from src_towhee/pipelines/insert/__init__.py rename to src/towhee/pipelines/insert/__init__.py diff --git a/src_towhee/pipelines/insert/generate_questions.py b/src/towhee/pipelines/insert/generate_questions.py similarity index 100% rename from src_towhee/pipelines/insert/generate_questions.py rename to src/towhee/pipelines/insert/generate_questions.py diff --git a/src_towhee/pipelines/search/__init__.py b/src/towhee/pipelines/search/__init__.py similarity index 100% rename from src_towhee/pipelines/search/__init__.py rename to src/towhee/pipelines/search/__init__.py diff --git a/src_towhee/pipelines/search/rewrite_query.py b/src/towhee/pipelines/search/rewrite_query.py similarity index 100% rename from src_towhee/pipelines/search/rewrite_query.py rename to src/towhee/pipelines/search/rewrite_query.py diff --git a/src_towhee/pipelines/utils.py b/src/towhee/pipelines/utils.py similarity index 100% rename from src_towhee/pipelines/utils.py rename to src/towhee/pipelines/utils.py diff --git a/tests/unit_tests/src_langchain/agent/__init__.py b/src/towhee/prompts/__init__.py similarity index 100% rename from tests/unit_tests/src_langchain/agent/__init__.py rename to src/towhee/prompts/__init__.py diff --git a/src_towhee/prompts/en.py b/src/towhee/prompts/en.py similarity index 100% rename from src_towhee/prompts/en.py rename to src/towhee/prompts/en.py diff --git a/src_towhee/prompts/zh.py b/src/towhee/prompts/zh.py similarity index 100% rename from src_towhee/prompts/zh.py rename to src/towhee/prompts/zh.py diff --git a/tests/unit_tests/src_langchain/data_loader/__init__.py b/tests/unit_tests/src/__init__.py similarity index 100% rename from tests/unit_tests/src_langchain/data_loader/__init__.py rename to tests/unit_tests/src/__init__.py diff --git a/tests/unit_tests/src_langchain/embedding/__init__.py b/tests/unit_tests/src/langchain/__init__.py similarity index 100% rename from tests/unit_tests/src_langchain/embedding/__init__.py rename to tests/unit_tests/src/langchain/__init__.py diff --git a/tests/unit_tests/src_langchain/llm/__init__.py b/tests/unit_tests/src/langchain/agent/__init__.py similarity index 100% rename from tests/unit_tests/src_langchain/llm/__init__.py rename to tests/unit_tests/src/langchain/agent/__init__.py diff --git a/tests/unit_tests/src_langchain/agent/test_agent.py b/tests/unit_tests/src/langchain/agent/test_agent.py similarity index 82% rename from tests/unit_tests/src_langchain/agent/test_agent.py rename to tests/unit_tests/src/langchain/agent/test_agent.py index 3d6145c..cd8ff10 100644 --- a/tests/unit_tests/src_langchain/agent/test_agent.py +++ b/tests/unit_tests/src/langchain/agent/test_agent.py @@ -1,13 +1,8 @@ -import os -import sys import unittest - from langchain.agents import AgentExecutor, Tool from langchain.llms.fake import FakeListLLM -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..')) - -from src_langchain.agent import ChatAgent +from src.langchain.agent import ChatAgent class TestChatAgent(unittest.TestCase): @@ -25,8 +20,7 @@ class TestChatAgent(unittest.TestCase): def test_run_chat_agent(self): agent_executor = AgentExecutor.from_agent_and_tools( agent=self.chat_agent, - tools=self.tools, - verbose=False + tools=self.tools ) final_answer = agent_executor.run(input='whats 2 + 2', chat_history=[]) assert final_answer == self.responses[1] diff --git a/tests/unit_tests/src_langchain/agent/test_output_parser.py b/tests/unit_tests/src/langchain/agent/test_output_parser.py similarity index 82% rename from tests/unit_tests/src_langchain/agent/test_output_parser.py rename to tests/unit_tests/src/langchain/agent/test_output_parser.py index 9396bec..2a3eadc 100644 --- a/tests/unit_tests/src_langchain/agent/test_output_parser.py +++ b/tests/unit_tests/src/langchain/agent/test_output_parser.py @@ -1,13 +1,8 @@ -import os -import sys import unittest - from langchain.schema import AgentAction, AgentFinish -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..')) - -from src_langchain.agent.prompt import FORMAT_INSTRUCTIONS -from src_langchain.agent.output_parser import OutputParser +from src.langchain.agent.prompt import FORMAT_INSTRUCTIONS +from src.langchain.agent.output_parser import OutputParser class TestOutputParser(unittest.TestCase): diff --git a/tests/unit_tests/src_towhee/__init__.py b/tests/unit_tests/src/langchain/data_loader/__init__.py similarity index 100% rename from tests/unit_tests/src_towhee/__init__.py rename to tests/unit_tests/src/langchain/data_loader/__init__.py diff --git a/tests/unit_tests/src_langchain/data_loader/test_data_parser.py b/tests/unit_tests/src/langchain/data_loader/test_data_parser.py similarity index 89% rename from tests/unit_tests/src_langchain/data_loader/test_data_parser.py rename to tests/unit_tests/src/langchain/data_loader/test_data_parser.py index 9a9a168..dfca605 100644 --- a/tests/unit_tests/src_langchain/data_loader/test_data_parser.py +++ b/tests/unit_tests/src/langchain/data_loader/test_data_parser.py @@ -1,6 +1,4 @@ import io -import os -import sys import tempfile import unittest from unittest.mock import patch @@ -8,9 +6,8 @@ from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..')) -from src_langchain.data_loader import DataParser +from src.langchain.data_loader import DataParser class TestDataParser(unittest.TestCase): diff --git a/tests/unit_tests/src_langchain/data_loader/test_data_splitter.py b/tests/unit_tests/src/langchain/data_loader/test_data_splitter.py similarity index 73% rename from tests/unit_tests/src_langchain/data_loader/test_data_splitter.py rename to tests/unit_tests/src/langchain/data_loader/test_data_splitter.py index 01e2a57..6261b8d 100644 --- a/tests/unit_tests/src_langchain/data_loader/test_data_splitter.py +++ b/tests/unit_tests/src/langchain/data_loader/test_data_splitter.py @@ -1,10 +1,6 @@ -import os -import sys import unittest -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..')) - -from src_langchain.data_loader.data_splitter import MarkDownSplitter +from src.langchain.data_loader.data_splitter import MarkDownSplitter class TestMarkDownSplitter(unittest.TestCase): diff --git a/tests/unit_tests/src_towhee/memory/__init__.py b/tests/unit_tests/src/langchain/embedding/__init__.py similarity index 100% rename from tests/unit_tests/src_towhee/memory/__init__.py rename to tests/unit_tests/src/langchain/embedding/__init__.py diff --git a/tests/unit_tests/src_langchain/embedding/test_langchain_huggingface.py b/tests/unit_tests/src/langchain/embedding/test_langchain_huggingface.py similarity index 85% rename from tests/unit_tests/src_langchain/embedding/test_langchain_huggingface.py rename to tests/unit_tests/src/langchain/embedding/test_langchain_huggingface.py index 4eb4ddc..81c0c36 100644 --- a/tests/unit_tests/src_langchain/embedding/test_langchain_huggingface.py +++ b/tests/unit_tests/src/langchain/embedding/test_langchain_huggingface.py @@ -1,12 +1,8 @@ -import os -import sys import unittest from unittest.mock import patch - import numpy as np -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../../..')) -from src_langchain.embedding.langchain_huggingface import TextEncoder +from src.langchain.embedding.langchain_huggingface import TextEncoder class TestLangchainHuggingface(unittest.TestCase): diff --git a/tests/unit_tests/src_langchain/embedding/test_openai_embedding.py b/tests/unit_tests/src/langchain/embedding/test_openai_embedding.py similarity index 85% rename from tests/unit_tests/src_langchain/embedding/test_openai_embedding.py rename to tests/unit_tests/src/langchain/embedding/test_openai_embedding.py index 176324e..4854d55 100644 --- a/tests/unit_tests/src_langchain/embedding/test_openai_embedding.py +++ b/tests/unit_tests/src/langchain/embedding/test_openai_embedding.py @@ -1,12 +1,8 @@ -import os -import sys import unittest from unittest.mock import patch - import numpy as np -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../../..')) -from src_langchain.embedding.openai_embedding import TextEncoder +from src.langchain.embedding.openai_embedding import TextEncoder class TestOpenAIEmbedding(unittest.TestCase): diff --git a/tests/unit_tests/src_towhee/pipelines/__init__.py b/tests/unit_tests/src/langchain/llm/__init__.py similarity index 100% rename from tests/unit_tests/src_towhee/pipelines/__init__.py rename to tests/unit_tests/src/langchain/llm/__init__.py diff --git a/tests/unit_tests/src_langchain/llm/test_dolly_chat.py b/tests/unit_tests/src/langchain/llm/test_dolly_chat.py similarity index 82% rename from tests/unit_tests/src_langchain/llm/test_dolly_chat.py rename to tests/unit_tests/src/langchain/llm/test_dolly_chat.py index 41ac8ad..52a0e78 100644 --- a/tests/unit_tests/src_langchain/llm/test_dolly_chat.py +++ b/tests/unit_tests/src/langchain/llm/test_dolly_chat.py @@ -1,11 +1,8 @@ -import os -import sys import unittest from unittest.mock import patch from langchain.schema import HumanMessage -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../../..')) MOCK_ANSWER = 'mock answer' @@ -18,7 +15,7 @@ def __call__(self, prompt): with patch('transformers.pipeline') as mock_pipelines: mock_pipelines.return_value = MockGenerateText() - from src_langchain.llm.dolly_chat import ChatLLM + from src.langchain.llm.dolly_chat import ChatLLM chat_llm = ChatLLM(model_name='mock', device='cpu', ) messages = [HumanMessage(content='hello')] diff --git a/tests/unit_tests/src_langchain/llm/test_ernie.py b/tests/unit_tests/src/langchain/llm/test_ernie.py similarity index 93% rename from tests/unit_tests/src_langchain/llm/test_ernie.py rename to tests/unit_tests/src/langchain/llm/test_ernie.py index 62e52aa..9cc23e3 100644 --- a/tests/unit_tests/src_langchain/llm/test_ernie.py +++ b/tests/unit_tests/src/langchain/llm/test_ernie.py @@ -1,11 +1,7 @@ -import os -import sys import unittest from unittest.mock import patch from langchain.schema import HumanMessage, AIMessage -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..')) - class TestERNIE(unittest.TestCase): def test_generate(self): @@ -27,7 +23,7 @@ def test_generate(self): ) mock_post.return_value = mock_res - from src_langchain.llm.ernie import ChatLLM + from src.langchain.llm.ernie import ChatLLM EB_API_TYPE = 'mock_type' EB_ACCESS_TOKEN = 'mock_token' diff --git a/tests/unit_tests/src_langchain/llm/test_openai_chat.py b/tests/unit_tests/src/langchain/llm/test_openai_chat.py similarity index 61% rename from tests/unit_tests/src_langchain/llm/test_openai_chat.py rename to tests/unit_tests/src/langchain/llm/test_openai_chat.py index c2ee7c1..ce90666 100644 --- a/tests/unit_tests/src_langchain/llm/test_openai_chat.py +++ b/tests/unit_tests/src/langchain/llm/test_openai_chat.py @@ -1,13 +1,9 @@ -import os -import sys import unittest -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../../..')) - class TestOpenAIChat(unittest.TestCase): def test_init(self): - from src_langchain.llm.openai_chat import ChatLLM + from src.langchain.llm.openai_chat import ChatLLM chat_llm = ChatLLM(openai_api_key='mock-key') self.assertEqual(chat_llm.__class__.__name__, 'ChatLLM') diff --git a/tests/unit_tests/src/towhee/__init__.py b/tests/unit_tests/src/towhee/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/src/towhee/akcio_ut.txt b/tests/unit_tests/src/towhee/akcio_ut.txt new file mode 100644 index 0000000..f711cfb --- /dev/null +++ b/tests/unit_tests/src/towhee/akcio_ut.txt @@ -0,0 +1 @@ +This is test content. \ No newline at end of file diff --git a/tests/unit_tests/src/towhee/memory/__init__.py b/tests/unit_tests/src/towhee/memory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/src_towhee/memory/test_sql.py b/tests/unit_tests/src/towhee/memory/test_sql.py similarity index 86% rename from tests/unit_tests/src_towhee/memory/test_sql.py rename to tests/unit_tests/src/towhee/memory/test_sql.py index 1e42ff8..bae5794 100644 --- a/tests/unit_tests/src_towhee/memory/test_sql.py +++ b/tests/unit_tests/src/towhee/memory/test_sql.py @@ -1,11 +1,8 @@ import os -import sys import unittest -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..')) - -from src_towhee.base import BaseMemory # pylint: disable=C0413 -from src_towhee.memory.sql import MemoryStore # pylint: disable=C0413 +from src.towhee.base import BaseMemory +from src.towhee.memory.sql import MemoryStore class TestSql(unittest.TestCase): diff --git a/tests/unit_tests/src/towhee/pipelines/__init__.py b/tests/unit_tests/src/towhee/pipelines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/src_towhee/pipelines/test_pipelines.py b/tests/unit_tests/src/towhee/pipelines/test_pipelines.py similarity index 98% rename from tests/unit_tests/src_towhee/pipelines/test_pipelines.py rename to tests/unit_tests/src/towhee/pipelines/test_pipelines.py index 0266a74..b81a3e9 100644 --- a/tests/unit_tests/src_towhee/pipelines/test_pipelines.py +++ b/tests/unit_tests/src/towhee/pipelines/test_pipelines.py @@ -1,19 +1,15 @@ import unittest from unittest.mock import patch - import json -import sys import os - from milvus import MilvusServer -sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..')) from config import ( # pylint: disable=C0413 CHAT_CONFIG, TEXTENCODER_CONFIG, VECTORDB_CONFIG, RERANK_CONFIG, ) -from src_towhee.pipelines import TowheePipelines # pylint: disable=C0413 +from src.towhee.pipelines import TowheePipelines # pylint: disable=C0413 milvus_server = MilvusServer() diff --git a/tests/unit_tests/src_towhee/test_operations.py b/tests/unit_tests/src/towhee/test_operations.py similarity index 85% rename from tests/unit_tests/src_towhee/test_operations.py rename to tests/unit_tests/src/towhee/test_operations.py index d6e8063..74ab7f2 100644 --- a/tests/unit_tests/src_towhee/test_operations.py +++ b/tests/unit_tests/src/towhee/test_operations.py @@ -3,11 +3,6 @@ from towhee.runtime.data_queue import DataQueue, ColumnType -import sys -import os - -sys.path.append(os.path.join(os.path.dirname(__file__), '../../..')) - class MockStore: def __init__(self, *args, **kwargs): @@ -69,20 +64,20 @@ class TestOperations(unittest.TestCase): def test_chat(self): - with patch('src_towhee.pipelines.TowheePipelines') as mock_pipelines, \ - patch('src_towhee.memory.MemoryStore') as mock_memory: + with patch('src.towhee.pipelines.TowheePipelines') as mock_pipelines, \ + patch('src.towhee.memory.MemoryStore') as mock_memory: mock_pipelines.return_value = MockPipeline() mock_memory.return_value = MockStore() - from src_towhee.pipelines import TowheePipelines - from src_towhee.memory import MemoryStore + from src.towhee.pipelines import TowheePipelines + from src.towhee.memory import MemoryStore with patch.object(TowheePipelines, 'search_pipeline', mock_pipelines.search_pipeline), \ patch.object(MemoryStore, 'add_history', mock_memory.add_history), \ patch.object(MemoryStore, 'get_history', mock_memory.get_history), \ patch.object(MemoryStore, 'drop', mock_memory.drop): - from src_towhee.operations import chat, get_history, clear_history + from src.towhee.operations import chat, get_history, clear_history question, answer = chat( self.session_id, self.project, self.question) @@ -97,13 +92,13 @@ def test_chat(self): def test_insert(self): - with patch('src_towhee.pipelines.TowheePipelines') as mock_pipelines, \ - patch('src_towhee.memory.MemoryStore') as mock_memory: + with patch('src.towhee.pipelines.TowheePipelines') as mock_pipelines, \ + patch('src.towhee.memory.MemoryStore') as mock_memory: mock_pipelines.return_value = MockPipeline() mock_memory.return_value = MockStore() - from src_towhee.pipelines import TowheePipelines - from src_towhee.memory import MemoryStore + from src.towhee.pipelines import TowheePipelines + from src.towhee.memory import MemoryStore with patch.object(TowheePipelines, 'insert_pipeline', mock_pipelines.insert_pipeline), \ patch.object(TowheePipelines, 'count_entities', mock_pipelines.count_entities), \ @@ -112,7 +107,7 @@ def test_insert(self): patch.object(MemoryStore, 'check', mock_memory.check), \ patch.object(MemoryStore, 'drop', mock_memory.drop): - from src_towhee.operations import insert, check, drop + from src.towhee.operations import insert, check, drop chunk_count, token_count = insert(self.test_src, self.project) assert chunk_count == self.expect_len, token_count == self.expect_token_count