-
Notifications
You must be signed in to change notification settings - Fork 0
/
testNLTKmodule.py
50 lines (37 loc) · 1.6 KB
/
testNLTKmodule.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -*- coding: utf-8 -*-
"""
Christina Indudhara
Ignat Kulinka
PIC 16
Professor Ji
Final Project: Demonstration
"""
import FinalNLTKProj as np
# novels used for the demonstration
books = ['The Adventures of Sherlock Holmes.txt','TheGreatGatsby.txt','1984.txt','BraveNewWorld.txt',
'Frankenstein.txt','CrimeAndPunishment.txt','Harry Potter 7 - Deathly Hollows.txt']
# Import the .txt of the novel and store it
novel_text = np.import_txt(books[6])
# Break up the novel into a list of words
tokenized_novel = np.word_tokenize(novel_text)
# Classify each word based on the 3-class Stanford NER classifier
classified_novel = np.NER_tagger.tag(tokenized_novel)
# Extract the character list
character_list = np.find_characters(classified_novel)
# Use counter to make a count of all the elements in the character_list
character_counts = dict(np.Counter(character_list))
# combine the counts of full names with partial ones
combined_counts = np.flatten(character_counts)
# sort the names by mentions in the movel
sorted_counts = np.sort_counts(combined_counts)
# tag the text in sentences
novel_sent_tagged = np.sent_tagged(novel_text)
# make edges out of lists
edge_list = np.edge_maker(novel_sent_tagged, combined_counts)
# create a graph
G = np.nx.Graph()
# Add nodes and edges
G.add_nodes_from([i.title() for i in np.char_full_name(np.top_n(combined_counts, 15))])
G.add_edges_from([(i.title(), j.title()) for i,j in np.edge_maker(novel_sent_tagged, np.top_n(combined_counts, 15))])
# draw the graph!
np.drawGraph(G, 'Harry Potter 7 Deathly Hollows')