-
Notifications
You must be signed in to change notification settings - Fork 2
/
spam_detection_api.py
66 lines (49 loc) · 1.61 KB
/
spam_detection_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from joblib import load
import nltk
import string
import re
#launch server
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Set this to the origin of your frontend application
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
#pre-processing
stop_words = set(nltk.corpus.stopwords.words("english"))
spetial_chars = set(string.printable) - set(string.ascii_letters) - set(" ")
escaped_chars = [re.escape(c) for c in spetial_chars]
regex = re.compile(f"({'|'.join(escaped_chars)})")
stemmer = nltk.stem.porter.PorterStemmer()
url_regex = re.compile("(?P<url>https?://[^\s]+)")
class EmailInput(BaseModel):
email_text: str
#loading the model
model_path = "spam_detection_model.joblib"
spam_model = load(model_path)
def preprocess_text(text):
# capitalization
text = text.lower()
# remove urls
text = re.sub(url_regex," ",text)
# tokenization
text = nltk.word_tokenize(text, language='english')
# stop words removal
text = [word for word in text if word not in stop_words]
# noise removal
text = [word for word in text if word.isalpha()]
# stemming
text = [stemmer.stem(word) for word in text]
return ' '.join(text)
#rest api for spam detection
@app.post("/predict")
def detect_spam(email_input: EmailInput):
email_text = preprocess_text(email_input.email_text)
prediction = spam_model.predict([email_text])
is_spam = bool(prediction[0])
return {"is_spam": is_spam}