Merge pull request #42 from Handoni/develop

v0.1.1-demo
Handoni · May 30, 2024 · 9a5da31 · 9a5da31
2 parents bce9503 + c68a701
commit 9a5da31
Show file tree

Hide file tree

Showing 34 changed files with 4,717 additions and 427 deletions.
diff --git a/backend/app/api/routers/user_router.py b/backend/app/api/routers/user_router.py
@@ -0,0 +1,47 @@
+from fastapi import APIRouter, HTTPException, Depends
+from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
+from api.schemas.user import UserCreate, User, Token
+from services.user_service import create_user, get_user_by_email, authenticate_user
+from utils.jwt_handler import create_access_token, decode_access_token
+import jwt
+
+router = APIRouter()
+
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+
+@router.post("/users/", response_model=User)
+def register_user(user: UserCreate):
+    existing_user = get_user_by_email(user.email)
+    if existing_user:
+        raise HTTPException(status_code=400, detail="Email already registered")
+
+    try:
+        user_record = create_user(user)
+        return User(id=user_record['id'], email=user_record['email'], nickname=user_record['nickname'], sex=user_record['sex'], age=user_record['age'])
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+@router.post("/token", response_model=Token)
+def login_user(form_data: OAuth2PasswordRequestForm = Depends()):
+    user = authenticate_user(form_data.username, form_data.password)
+    if not user:
+        raise HTTPException(status_code=400, detail="Invalid email or password")
+
+    access_token = create_access_token(data={"sub": user['email']})
+    return {"access_token": access_token, "token_type": "bearer"}
+
+@router.get("/users/me/", response_model=User)
+def read_users_me(token: str = Depends(oauth2_scheme)):
+    credentials_exception = HTTPException(
+        status_code=401,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    try:
+        email = decode_access_token(token)
+        user = get_user_by_email(email)
+        if user is None:
+            raise credentials_exception
+        return User(id=user['id'], email=user['email'], sex=user['sex'], age=user['age'])
+    except jwt.PyJWTError:
+        raise credentials_exception
diff --git a/backend/app/api/schemas/disease_prediction_session.py b/backend/app/api/schemas/disease_prediction_session.py
@@ -2,11 +2,7 @@
 from api.schemas.primary_disease_prediction import (
     PrimaryDiseasePredictionResponse,
 )
-from api.schemas.secondary_disease_prediction import (
-    PredictedDisease,
-    UserQuestionResponse,
-)
-from typing import List, Dict, Optional
+from typing import Dict, Optional
 from uuid import uuid4
 from datetime import datetime
 
@@ -21,13 +17,16 @@ class DiseasePredictionSession(BaseModel):
     created_at: datetime = Field(default_factory=datetime.now)
     updated_at: datetime = Field(default_factory=datetime.now)
 
+    user_input: str = None
     primary_symptoms: Dict[str, str] = {}  # 증상ID:증상내용
     primary_diseases: Dict[str, str] = {}  # 질병 코드:질병 이름
     primary_questions: Dict[str, Dict[str, str]] = {}  # 질병 코드:{질문ID:질문내용}
 
-    secondary_symptoms: UserQuestionResponse = None
+    secondary_symptoms: Optional[Dict[str, str]] = {}  # 증상ID:응답
 
-    final_diseases: PredictedDisease = None
+    final_diseases: Optional[str] = None
+    recommended_department: Optional[str] = None 
+    final_disease_description: Optional[str] = None
 
     def prepare_primary_disease_prediction_response(
         self,

diff --git a/backend/app/api/schemas/primary_disease_prediction.py b/backend/app/api/schemas/primary_disease_prediction.py
@@ -1,4 +1,4 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from typing import List, Dict
 from uuid import uuid4
 

diff --git a/backend/app/api/schemas/secondary_disease_prediction.py b/backend/app/api/schemas/secondary_disease_prediction.py
@@ -1,5 +1,5 @@
-from pydantic import BaseModel, field_validator, Field
-from typing import List, Dict
+from pydantic import BaseModel
+from typing import Dict
 from uuid import uuid4
 
 
@@ -16,7 +16,3 @@ class PredictedDisease(BaseModel):
     Disease: str
     recommended_department: str
     description: str
-
-
-# class FinalResponse(BaseModel):
-#     response: Dict[str, PredictedDisease]
diff --git a/backend/app/api/schemas/user.py b/backend/app/api/schemas/user.py
@@ -0,0 +1,31 @@
+from pydantic import BaseModel, EmailStr, validator
+
+class Token(BaseModel):
+    access_token: str
+    token_type: str
+
+class TokenData(BaseModel):
+    email: str | None = None
+
+class UserCreate(BaseModel):
+    email: EmailStr
+    nickname: str
+    password: str
+    sex: str
+    age: int
+
+    @validator("sex")
+    def validate_sex(cls, v):
+        if v not in ['male', 'female']:
+            raise ValueError('Sex field must be either "male" or "female".')
+        return v
+
+class User(BaseModel):
+    id: str
+    nickname: str
+    email: EmailStr
+    sex: str
+    age: int
+
+    class Config:
+        from_attributes = True
diff --git a/backend/app/core/config.py b/backend/app/core/config.py
@@ -9,8 +9,10 @@ class Settings(BaseSettings):
     )
     gpt_api_key: str
     gpt_api_url: str = "https://api.openai.com/v4/completions"
-    google_application_credentials: str
-
+    mongo_uri: str
+    token_secret: str
+    token_algorithm: str
+    token_expire_minutes: int
 
 def get_settings() -> Settings:
     settings = Settings()  # type: ignore

diff --git a/backend/app/core/prompt.py b/backend/app/core/prompt.py
@@ -27,6 +27,8 @@
 As a medical assistant, your role involves extracting main symptoms from user descriptions, predicting potential diseases based on these symptoms, and recommending relevant diagnostic departments.
 This is first step of the process. Carefully follow these instructions:
 Convert user-described "main symptoms" into a list of keywords.
+Include mental and emotional symptoms by recognizing phrases that indicate psychological distress or mental health conditions.
+If the input is not related to the disease or symptoms at all, return 'no symptoms'.
 Use the format Korean symptom name (English Symptom Name), e.g., 두통(headache).
 List no more than 10 symptoms, separated by '|'.
 Example format for "I have a headache and a cough" should be: "두통(headache)|기침(cough)".
@@ -38,7 +40,7 @@
 Ensure the disease names are explicitly detailed, avoiding generic or nonspecific symptomatic descriptions.
 List each disease with its ICD code, formatted as "(ICD Code):Disease Name(Disease in English)", separated by '|'.
 Avoid vague terms like 'other' or 'unspecified'.
-Input Example: "두통(headache)|기침(cough)"
+Input Example: "input of user: 머리가 아프고 기침이 나요. expected symptoms: 두통(headache)|기침(cough)"
 Output Example: "J00:감기(cold)|J45:천식(asthma)|...".
 """
 PRIMARY_DISEASE_PREDICTION_PROMPT3 = """
@@ -101,21 +103,22 @@
 As a knowledgeable medical assistant, your task is to analyze user-reported symptoms, suggest the most probable disease, and recommend an appropriate diagnostic department for further examination.
 
 Input format:
-1. Main Symptoms: A comma-separated list of primary symptoms as reported by the user.
-2. Predicted Diseases: A list of potential diseases related to the main symptoms, formatted as 'ICD code:Disease name'.
-3. Additional Symptoms: A list of secondary symptoms derived from the main symptoms, selected and verified by the user with responses (Yes/No). This helps refine the disease prediction.
+1. User Input: plain text of the user's main symptoms.
+2. Main Symptoms: A comma-separated list of primary symptoms as reported by the user.
+3. Predicted Diseases: A list of potential diseases related to the main symptoms, formatted as 'ICD code:Disease name'.
+4. Additional Symptoms: A list of secondary symptoms derived from the main symptoms, selected and verified by the user with responses (Yes/No). This helps refine the disease prediction.
 
-Example input:
-허리통증(back pain), 다리저림(leg numbness)
-M54.5:요통(low back pain), M51.2:척추 디스크 변성(lumbar disc degeneration), G57.1:경골신경병증(tibial neuropathy), M47.8:기타 척추증(other spondylosis), M54.4:요천추통(lumbosacral pain)
-허리에 통증이 지속된다:yes, 움직일 때 통증이 심해진다:yes, 앉아 있을 때 통증이 느껴진다:no, 허리의 뻣뻣함이 느껴진다:no, 허리를 구부릴 때 통증이 있다: yes ...(and so on)
+-- Example input --
+User Input: 허리가 아프고 다리가 저린다.
+Extracted Symptoms: 허리통증(back pain), 다리저림(leg numbness)
+Predicted Diseases: M54.5:요통(low back pain), M51.2:척추 디스크 변성(lumbar disc degeneration), G57.1:경골신경병증(tibial neuropathy), M47.8:기타 척추증(other spondylosis), M54.4:요천추통(lumbosacral pain)
+Additional Symptoms: 허리에 통증이 지속된다:yes, 움직일 때 통증이 심해진다:yes, 앉아 있을 때 통증이 느껴진다:no, 허리의 뻣뻣함이 느껴진다:no, 허리를 구부릴 때 통증이 있다: yes ...(and so on)
 
 -- Instructions --
 Analyze the input to predict the most likely disease based on the symptoms. Select the most appropriate diagnostic department for further investigation. Ensure that your prediction considers the additional symptoms and is relevant to the disease's common diagnosis pathway.
 - Write down only the information in the instruction without any additional explanation.
 - Do not use delimiters like '|' unless required to distinguish between responses.
 - (Very important) In particular, refer to the example output and output it in the same format.
-- 
 
 Output format:
 'Disease name (in Korean) | Diagnostic department (in Korean and English) | Explanation for your prediction'

diff --git a/backend/app/dependencies.py → backend/app/data/__init__.py b/backend/app/dependencies.py → backend/app/data/__init__.py
diff --git a/backend/app/data/data_processing.py b/backend/app/data/data_processing.py
@@ -0,0 +1,36 @@
+import pandas as pd
+import csv
+
+def clean(symptom):
+    # 증상이 None이거나 NaN인 경우 처리
+    if pd.isna(symptom):
+        return None
+    parts = symptom.split('^')
+    cleaned_parts = [part.split('_')[1] if '_' in part else part for part in parts]
+    return '^'.join(cleaned_parts)
+
+def process_csv(input_file, output_file):
+    try:
+        df = pd.read_csv(input_file)
+    except FileNotFoundError:
+        print(f"파일을 찾을 수 없습니다: {input_file}")
+        return
+
+    # 'Disease' 필드에서 UMLS 코드 제거
+    df['Disease'] = df['Disease'].apply(clean)
+
+    # 'Symptom' 필드에서 UMLS 코드 제거
+    df['Symptom'] = df['Symptom'].apply(clean)
+
+    # 'Count of Disease Occurrence' 열을 Int64로 변환하여 NaN 값이 유지되도록 함
+    df['Count of Disease Occurrence'] = df['Count of Disease Occurrence'].astype('Int64')
+
+    # 결과를 새로운 CSV 파일로 저장
+    df.to_csv(output_file, index=False, quoting=csv.QUOTE_ALL)
+
+# 파일 경로 설정
+input_file = "C:/Users/이상윤/Documents/coding/Apayo/backend/app/data/raw_data_2.csv"
+output_file = "C:/Users/이상윤/Documents/coding/Apayo/backend/app/data/output.csv"
+
+# 함수 실행
+process_csv(input_file, output_file)
diff --git a/backend/app/data/embedding.py b/backend/app/data/embedding.py
@@ -0,0 +1,64 @@
+from openai import OpenAI
+import pandas as pd
+import numpy as np
+from pymongo import MongoClient, ASCENDING, DESCENDING
+from core.config import get_settings
+
+settings = get_settings()
+GPT_API_KEY = settings.gpt_api_key
+client = OpenAI(api_key=GPT_API_KEY)
+
+mongo_client = MongoClient(settings.mongo_uri)
+db = mongo_client['disease_embedding_db']
+
+def get_embedding(text):
+    return client.embeddings.create(input=[text], model='text-embedding-3-small').data[0].embedding
+
+def get_last_saved_disease():
+    disease = db.diseases_embeddings.find_one(sort=[("id", DESCENDING)])
+    if disease:
+        return disease["id"]
+    return None
+
+def create_embedding_data():
+    df = pd.read_csv('C:/Users/이상윤/Documents/coding/Apayo/backend/app/data/output.csv')
+    df.fillna(method='ffill', inplace=True)
+    df.applymap(lambda x: x.replace('\xa0','').replace('\xa9','') if type(x) == str else x)
+
+    # last_saved_disease = get_last_saved_disease()
+    # start_saving = last_saved_disease is None
+    # print(f"Last saved disease: {last_saved_disease}")
+    start_saving = False
+
+    # 질병별로 그룹화 및 처리
+    for disease, group in df.groupby("Disease"):
+        if not start_saving:
+            if disease == 'obesity morbid':
+                start_saving = True
+            continue
+        print(f"Processing {disease}")
+        symptoms = group["Symptom"].tolist()
+        disease_embedding = {"embedding": get_embedding(disease)}
+
+        # 질병 문서 생성
+        disease_data = {
+            "_id": disease,
+            "embedding": disease_embedding
+        }
+        db.diseases_embeddings.insert_one(disease_data)
+
+        # 각 증상을 서브컬렉션에 추가
+        for symptom in symptoms:
+            # 증상 값이 유효한지 확인
+            symptom = symptom.strip()
+            if symptom:
+                symptom_embedding = {"embedding": get_embedding(symptom)}
+                symptom_data = {
+                    "disease_id": disease,
+                    "symptom": symptom,
+                    "embedding": symptom_embedding
+                }
+                db.symptoms.insert_one(symptom_data)
+
+# if __name__ == "__main__":
+#     create_embedding_data()