Skip to content

Commit

Permalink
Merge pull request #42 from Handoni/develop
Browse files Browse the repository at this point in the history
v0.1.1-demo
  • Loading branch information
Handoni authored May 30, 2024
2 parents bce9503 + c68a701 commit 9a5da31
Show file tree
Hide file tree
Showing 34 changed files with 4,717 additions and 427 deletions.
47 changes: 47 additions & 0 deletions backend/app/api/routers/user_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from fastapi import APIRouter, HTTPException, Depends
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
from api.schemas.user import UserCreate, User, Token
from services.user_service import create_user, get_user_by_email, authenticate_user
from utils.jwt_handler import create_access_token, decode_access_token
import jwt

router = APIRouter()

oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

@router.post("/users/", response_model=User)
def register_user(user: UserCreate):
existing_user = get_user_by_email(user.email)
if existing_user:
raise HTTPException(status_code=400, detail="Email already registered")

try:
user_record = create_user(user)
return User(id=user_record['id'], email=user_record['email'], nickname=user_record['nickname'], sex=user_record['sex'], age=user_record['age'])
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))

@router.post("/token", response_model=Token)
def login_user(form_data: OAuth2PasswordRequestForm = Depends()):
user = authenticate_user(form_data.username, form_data.password)
if not user:
raise HTTPException(status_code=400, detail="Invalid email or password")

access_token = create_access_token(data={"sub": user['email']})
return {"access_token": access_token, "token_type": "bearer"}

@router.get("/users/me/", response_model=User)
def read_users_me(token: str = Depends(oauth2_scheme)):
credentials_exception = HTTPException(
status_code=401,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
try:
email = decode_access_token(token)
user = get_user_by_email(email)
if user is None:
raise credentials_exception
return User(id=user['id'], email=user['email'], sex=user['sex'], age=user['age'])
except jwt.PyJWTError:
raise credentials_exception
13 changes: 6 additions & 7 deletions backend/app/api/schemas/disease_prediction_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@
from api.schemas.primary_disease_prediction import (
PrimaryDiseasePredictionResponse,
)
from api.schemas.secondary_disease_prediction import (
PredictedDisease,
UserQuestionResponse,
)
from typing import List, Dict, Optional
from typing import Dict, Optional
from uuid import uuid4
from datetime import datetime

Expand All @@ -21,13 +17,16 @@ class DiseasePredictionSession(BaseModel):
created_at: datetime = Field(default_factory=datetime.now)
updated_at: datetime = Field(default_factory=datetime.now)

user_input: str = None
primary_symptoms: Dict[str, str] = {} # 증상ID:증상내용
primary_diseases: Dict[str, str] = {} # 질병 코드:질병 이름
primary_questions: Dict[str, Dict[str, str]] = {} # 질병 코드:{질문ID:질문내용}

secondary_symptoms: UserQuestionResponse = None
secondary_symptoms: Optional[Dict[str, str]] = {} # 증상ID:응답

final_diseases: PredictedDisease = None
final_diseases: Optional[str] = None
recommended_department: Optional[str] = None
final_disease_description: Optional[str] = None

def prepare_primary_disease_prediction_response(
self,
Expand Down
2 changes: 1 addition & 1 deletion backend/app/api/schemas/primary_disease_prediction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import BaseModel, Field
from pydantic import BaseModel
from typing import List, Dict
from uuid import uuid4

Expand Down
8 changes: 2 additions & 6 deletions backend/app/api/schemas/secondary_disease_prediction.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydantic import BaseModel, field_validator, Field
from typing import List, Dict
from pydantic import BaseModel
from typing import Dict
from uuid import uuid4


Expand All @@ -16,7 +16,3 @@ class PredictedDisease(BaseModel):
Disease: str
recommended_department: str
description: str


# class FinalResponse(BaseModel):
# response: Dict[str, PredictedDisease]
31 changes: 31 additions & 0 deletions backend/app/api/schemas/user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pydantic import BaseModel, EmailStr, validator

class Token(BaseModel):
access_token: str
token_type: str

class TokenData(BaseModel):
email: str | None = None

class UserCreate(BaseModel):
email: EmailStr
nickname: str
password: str
sex: str
age: int

@validator("sex")
def validate_sex(cls, v):
if v not in ['male', 'female']:
raise ValueError('Sex field must be either "male" or "female".')
return v

class User(BaseModel):
id: str
nickname: str
email: EmailStr
sex: str
age: int

class Config:
from_attributes = True
6 changes: 4 additions & 2 deletions backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ class Settings(BaseSettings):
)
gpt_api_key: str
gpt_api_url: str = "https://api.openai.com/v4/completions"
google_application_credentials: str

mongo_uri: str
token_secret: str
token_algorithm: str
token_expire_minutes: int

def get_settings() -> Settings:
settings = Settings() # type: ignore
Expand Down
21 changes: 12 additions & 9 deletions backend/app/core/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
As a medical assistant, your role involves extracting main symptoms from user descriptions, predicting potential diseases based on these symptoms, and recommending relevant diagnostic departments.
This is first step of the process. Carefully follow these instructions:
Convert user-described "main symptoms" into a list of keywords.
Include mental and emotional symptoms by recognizing phrases that indicate psychological distress or mental health conditions.
If the input is not related to the disease or symptoms at all, return 'no symptoms'.
Use the format Korean symptom name (English Symptom Name), e.g., 두통(headache).
List no more than 10 symptoms, separated by '|'.
Example format for "I have a headache and a cough" should be: "두통(headache)|기침(cough)".
Expand All @@ -38,7 +40,7 @@
Ensure the disease names are explicitly detailed, avoiding generic or nonspecific symptomatic descriptions.
List each disease with its ICD code, formatted as "(ICD Code):Disease Name(Disease in English)", separated by '|'.
Avoid vague terms like 'other' or 'unspecified'.
Input Example: "두통(headache)|기침(cough)"
Input Example: "input of user: 머리가 아프고 기침이 나요. expected symptoms: 두통(headache)|기침(cough)"
Output Example: "J00:감기(cold)|J45:천식(asthma)|...".
"""
PRIMARY_DISEASE_PREDICTION_PROMPT3 = """
Expand Down Expand Up @@ -101,21 +103,22 @@
As a knowledgeable medical assistant, your task is to analyze user-reported symptoms, suggest the most probable disease, and recommend an appropriate diagnostic department for further examination.
Input format:
1. Main Symptoms: A comma-separated list of primary symptoms as reported by the user.
2. Predicted Diseases: A list of potential diseases related to the main symptoms, formatted as 'ICD code:Disease name'.
3. Additional Symptoms: A list of secondary symptoms derived from the main symptoms, selected and verified by the user with responses (Yes/No). This helps refine the disease prediction.
1. User Input: plain text of the user's main symptoms.
2. Main Symptoms: A comma-separated list of primary symptoms as reported by the user.
3. Predicted Diseases: A list of potential diseases related to the main symptoms, formatted as 'ICD code:Disease name'.
4. Additional Symptoms: A list of secondary symptoms derived from the main symptoms, selected and verified by the user with responses (Yes/No). This helps refine the disease prediction.
Example input:
허리통증(back pain), 다리저림(leg numbness)
M54.5:요통(low back pain), M51.2:척추 디스크 변성(lumbar disc degeneration), G57.1:경골신경병증(tibial neuropathy), M47.8:기타 척추증(other spondylosis), M54.4:요천추통(lumbosacral pain)
허리에 통증이 지속된다:yes, 움직일 때 통증이 심해진다:yes, 앉아 있을 때 통증이 느껴진다:no, 허리의 뻣뻣함이 느껴진다:no, 허리를 구부릴 때 통증이 있다: yes ...(and so on)
-- Example input --
User Input: 허리가 아프고 다리가 저린다.
Extracted Symptoms: 허리통증(back pain), 다리저림(leg numbness)
Predicted Diseases: M54.5:요통(low back pain), M51.2:척추 디스크 변성(lumbar disc degeneration), G57.1:경골신경병증(tibial neuropathy), M47.8:기타 척추증(other spondylosis), M54.4:요천추통(lumbosacral pain)
Additional Symptoms: 허리에 통증이 지속된다:yes, 움직일 때 통증이 심해진다:yes, 앉아 있을 때 통증이 느껴진다:no, 허리의 뻣뻣함이 느껴진다:no, 허리를 구부릴 때 통증이 있다: yes ...(and so on)
-- Instructions --
Analyze the input to predict the most likely disease based on the symptoms. Select the most appropriate diagnostic department for further investigation. Ensure that your prediction considers the additional symptoms and is relevant to the disease's common diagnosis pathway.
- Write down only the information in the instruction without any additional explanation.
- Do not use delimiters like '|' unless required to distinguish between responses.
- (Very important) In particular, refer to the example output and output it in the same format.
-
Output format:
'Disease name (in Korean) | Diagnostic department (in Korean and English) | Explanation for your prediction'
Expand Down
File renamed without changes.
36 changes: 36 additions & 0 deletions backend/app/data/data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pandas as pd
import csv

def clean(symptom):
# 증상이 None이거나 NaN인 경우 처리
if pd.isna(symptom):
return None
parts = symptom.split('^')
cleaned_parts = [part.split('_')[1] if '_' in part else part for part in parts]
return '^'.join(cleaned_parts)

def process_csv(input_file, output_file):
try:
df = pd.read_csv(input_file)
except FileNotFoundError:
print(f"파일을 찾을 수 없습니다: {input_file}")
return

# 'Disease' 필드에서 UMLS 코드 제거
df['Disease'] = df['Disease'].apply(clean)

# 'Symptom' 필드에서 UMLS 코드 제거
df['Symptom'] = df['Symptom'].apply(clean)

# 'Count of Disease Occurrence' 열을 Int64로 변환하여 NaN 값이 유지되도록 함
df['Count of Disease Occurrence'] = df['Count of Disease Occurrence'].astype('Int64')

# 결과를 새로운 CSV 파일로 저장
df.to_csv(output_file, index=False, quoting=csv.QUOTE_ALL)

# 파일 경로 설정
input_file = "C:/Users/이상윤/Documents/coding/Apayo/backend/app/data/raw_data_2.csv"
output_file = "C:/Users/이상윤/Documents/coding/Apayo/backend/app/data/output.csv"

# 함수 실행
process_csv(input_file, output_file)
64 changes: 64 additions & 0 deletions backend/app/data/embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from openai import OpenAI
import pandas as pd
import numpy as np
from pymongo import MongoClient, ASCENDING, DESCENDING
from core.config import get_settings

settings = get_settings()
GPT_API_KEY = settings.gpt_api_key
client = OpenAI(api_key=GPT_API_KEY)

mongo_client = MongoClient(settings.mongo_uri)
db = mongo_client['disease_embedding_db']

def get_embedding(text):
return client.embeddings.create(input=[text], model='text-embedding-3-small').data[0].embedding

def get_last_saved_disease():
disease = db.diseases_embeddings.find_one(sort=[("id", DESCENDING)])
if disease:
return disease["id"]
return None

def create_embedding_data():
df = pd.read_csv('C:/Users/이상윤/Documents/coding/Apayo/backend/app/data/output.csv')
df.fillna(method='ffill', inplace=True)
df.applymap(lambda x: x.replace('\xa0','').replace('\xa9','') if type(x) == str else x)

# last_saved_disease = get_last_saved_disease()
# start_saving = last_saved_disease is None
# print(f"Last saved disease: {last_saved_disease}")
start_saving = False

# 질병별로 그룹화 및 처리
for disease, group in df.groupby("Disease"):
if not start_saving:
if disease == 'obesity morbid':
start_saving = True
continue
print(f"Processing {disease}")
symptoms = group["Symptom"].tolist()
disease_embedding = {"embedding": get_embedding(disease)}

# 질병 문서 생성
disease_data = {
"_id": disease,
"embedding": disease_embedding
}
db.diseases_embeddings.insert_one(disease_data)

# 각 증상을 서브컬렉션에 추가
for symptom in symptoms:
# 증상 값이 유효한지 확인
symptom = symptom.strip()
if symptom:
symptom_embedding = {"embedding": get_embedding(symptom)}
symptom_data = {
"disease_id": disease,
"symptom": symptom,
"embedding": symptom_embedding
}
db.symptoms.insert_one(symptom_data)

# if __name__ == "__main__":
# create_embedding_data()
Loading

0 comments on commit 9a5da31

Please sign in to comment.