forked from ZiwenZhuang/Web-Text_mining_Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hot-topic.py
24 lines (17 loc) · 717 Bytes
/
hot-topic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import os
from Spiders import *
from Prasers import MainRanker
if __name__ == '__main__':
# crawl pages from data.gov
spider = DataGovSpider()
spider.start('./data/DataGov')
# crawl pages from USnews
spider = USnewsSpider()
spider.start('./data/USnews')
#ranker = MainRanker('./data/crawled', 29, True)
#print(ranker.simple_BOW_rank())
#print(ranker.BOW_stem_stop_rank())
#print(ranker.POS_rank('NNP'))
#print(ranker.ngrams_rank())
#print(ranker.tfidf_rank(myhottest=30, stemmer_name='Snowball', para='"english"', to_remove=['student', 'school', 'safe', 'educ', 'teacher', 'learn', ]))
print('Program ' + os.path.basename(__file__) + ' ends sucessfully')