From 64df82ac7c92cd7dc7ed25669cf702c3bf045794 Mon Sep 17 00:00:00 2001 From: bigbell999 Date: Mon, 9 Dec 2024 17:11:11 +0900 Subject: [PATCH] =?UTF-8?q?PATCH:=20=EB=AA=85=EC=82=AC=ED=99=94=20?= =?UTF-8?q?=ED=95=A8=EC=88=98=20=EC=88=98=EC=A0=95=20=EB=A1=A4=EB=B0=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ai_server_code/ai_modules.py | 84 ------------------------------------ 1 file changed, 84 deletions(-) diff --git a/ai_server_code/ai_modules.py b/ai_server_code/ai_modules.py index a7d5f77..9a92c0d 100644 --- a/ai_server_code/ai_modules.py +++ b/ai_server_code/ai_modules.py @@ -64,7 +64,6 @@ def get_korean_time(): logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # 단어 명사화 함수. -''' def transformed_query(content): # 중복된 단어를 제거한 명사를 담을 리스트 query_nouns = [] @@ -266,89 +265,6 @@ def transformed_query(content): query_nouns = list(set(query_nouns)) return query_nouns -''' - -def transformed_query(content): - query_nouns = [] - okt = Okt() - - # 1. 패턴 매칭 및 사전 정의 키워드 - patterns = { - r'\d+(?:학년도|년|학년|월|일|학기|시|분|초|기|개|차)?': lambda x: query_nouns.extend(x), - r'[a-zA-Z]+': lambda x: query_nouns.extend(word.upper() for word in x), - } - - # 정규식 패턴 처리 - for pattern, action in patterns.items(): - matches = re.findall(pattern, content) - action(matches) - for match in matches: - content = content.replace(match, '') - - # 2. 키워드 매핑 처리 - keyword_map = { - '시간표': [], - 'EXIT': ['출구'], - '벤처아카데미': ['벤처아카데미'], - '군': ['군', '군휴학', '군입대'], - '인컴': ['인공지능컴퓨팅'], - '학부생': ['학부생'], - '공대': ['E'], - '설명회': ['설명회'], - '컴학': ['컴퓨터학부'], - '컴퓨터': ['컴퓨터학부', '컴퓨터비전'], - '종프': ['종합설계프로젝트'], - '대회': ['경진대회'], - '튜터': ['TUTOR'], - '탑싯': ['TOPCIT'], - '시험': ['시험'], - '하계': ['여름', '하계'], - '동계': ['겨울', '동계'], - '겨울': ['겨울', '동계'], - '여름': ['여름', '하계'], - '성인지': ['성인지'], - '첨성인': ['첨성인'], - '글솦': ['글솝'], - '수꾸': ['수강꾸러미'], - '장학금': ['장학생', '장학'], - '장학생': ['장학금', '장학'], - '에이빅': ['에이빅', 'ABEEK'], - '선이수': ['선이수'], - '학자금': ['학자금'], - '오픈 소스': ['오픈소스'], - '오픈소스': ['오픈소스'], - '재이수': ['재이수'], - '재 수강': ['재이수'], - '과목': ['강의', '강좌'], - '강의': ['과목', '강좌'], - '강좌': ['강좌'], - '외국어': ['외국어'], - '부전공': ['부전공'], - '계절학기': ['수업'], - '채용': ['모집', '공고'], - '공지': ['공지사항'], - '사항': ['공지사항'], - '공지사항': ['공지사항'], - '사원': ['신입'], - '신입사원': ['신입'], - '카테캠': ['카카오', '테크', '캠퍼스'] - } - - # 키워드 매핑 적용 - for keyword, additions in keyword_map.items(): - if keyword in content: - query_nouns.extend(additions) - content = content.replace(keyword, '') - - # 3. Okt 명사 추출 - additional_nouns = [noun for noun in okt.nouns(content) if len(noun) > 1] - query_nouns.extend(additional_nouns) - - # 4. 중복 제거 및 반환 - query_nouns = list(dict.fromkeys(query_nouns)) - return query_nouns - - ###################################################################################################