-
Notifications
You must be signed in to change notification settings - Fork 1
/
hashtag_cloud.py
51 lines (38 loc) · 1.34 KB
/
hashtag_cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# coding: utf-8
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from extracthashtags import HashtagsExtraction
import codecs
source_file = '/path/to/file/containing/tweets'
'''
hashtags are only extracted from the file containing tweets
'''
extracted_hashtags = HashtagsExtraction(source_file).extractHashtags()
hashtags_text = codecs.open((extracted_hashtags),'r','utf-8').read().splitlines()
def grey_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
return "hsl(0, 0%%, %d%%)" % random.randint(0, 60)
formatted_text = []
samples = 2000
for ic,l in enumerate(hashtags_text[:samples]):
try:
formatted_text.append(l.decode('unicode-escape'))
except:
pass
formatted_text = ' '.join(formatted_text)
'''
WordCloud library is used to plot the word cloud
'''
wc = WordCloud(
font_path = '/home/sivasurya/fonts/steelfish.ttf',
background_color='white',
min_font_size=25,
max_font_size=250,
random_state= 10,
width=1200,
height=800
).generate(formatted_text)
default_colors = wc.to_array()
plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3))
wc.to_file("hashtags_cloud.png")
plt.axis("off")
plt.show()