-
Notifications
You must be signed in to change notification settings - Fork 3
/
app.py
81 lines (65 loc) · 2.92 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import flask
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
app = flask.Flask(__name__, template_folder='templates')
df2 = pd.read_csv('./model/tmdb.csv')
tfidf = TfidfVectorizer(stop_words='english',analyzer='word')
#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(df2['soup'])
print(tfidf_matrix.shape)
#construct cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
print(cosine_sim.shape)
df2 = df2.reset_index()
indices = pd.Series(df2.index, index=df2['title']).drop_duplicates()
# create array with all movie titles
all_titles = [df2['title'][i] for i in range(len(df2['title']))]
def get_recommendations(title):
global sim_scores
# Get the index of the movie that matches the title
idx = indices[title]
# Get the pairwise similarity scores of all movies with that movie
sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the movies based on the similarity scores
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 10 most similar movies
sim_scores = sim_scores[1:11]
# print similarity scores
print("\n movieId score")
for i in sim_scores:
print(i)
# Get the movie indices
movie_indices = [i[0] for i in sim_scores]
# return list of similar movies
return_df = pd.DataFrame(columns=['Title','Homepage'])
return_df['Title'] = df2['title'].iloc[movie_indices]
return_df['Homepage'] = df2['homepage'].iloc[movie_indices]
return_df['ReleaseDate'] = df2['release_date'].iloc[movie_indices]
return return_df
# Set up the main route
@app.route('/', methods=['GET', 'POST'])
def main():
if flask.request.method == 'GET':
return(flask.render_template('index.html'))
if flask.request.method == 'POST':
m_name = " ".join(flask.request.form['movie_name'].split())
# check = difflib.get_close_matches(m_name,all_titles,cutout=0.50,n=1)
if m_name not in all_titles:
return(flask.render_template('notFound.html',name=m_name))
else:
result_final = get_recommendations(m_name)
names = []
homepage = []
releaseDate = []
for i in range(len(result_final)):
names.append(result_final.iloc[i][0])
releaseDate.append(result_final.iloc[i][2])
if(len(str(result_final.iloc[i][1]))>3):
homepage.append(result_final.iloc[i][1])
else:
homepage.append("#")
return flask.render_template('found.html',movie_names=names,movie_homepage=homepage,search_name=m_name, movie_releaseDate=releaseDate, movie_simScore=sim_scores)
if __name__ == '__main__':
app.run(host="127.0.0.1", port=8080, debug=True)
#app.run()