-
Notifications
You must be signed in to change notification settings - Fork 0
/
similar_articles.py
54 lines (42 loc) · 1.61 KB
/
similar_articles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import requests
from newspaper import Article
import textrazor
from pprint import pprint
from datetime import datetime, timedelta
import json
def get(url = 'https://www.politico.com/news/2020/02/21/bernie-sanders-condemns-russian-116640'):
# characteristics = ['Location', 'Event', 'Person', 'Organization']
try:
article = Article(url)
article.download()
article.parse()
except:
return {}
text = article.title
date = article.publish_date
days_to_subtract = 2
try:
d = (date - timedelta(days=days_to_subtract)).strftime('%Y-%m-%d')
d2 = (date + timedelta(days=days_to_subtract)).strftime('%Y-%m-%d')
except TypeError:
date = datetime.now();
d = (date - timedelta(days=days_to_subtract)).strftime('%Y-%m-%d')
d2 = (date + timedelta(days=days_to_subtract)).strftime('%Y-%m-%d')
alt_api_key = 'feca0c9db3d492ac63a83761a41d003f306c5acfff3b828b8c1319da'
textrazor.api_key = '3db6ae4b1e8b2e04ee07657ca98d0de9eda7b885b3043dc11ab9b230'
client = textrazor.TextRazor(extractors=["words", "phrases"])
response = client.analyze(text)
query = ''
for np in response.noun_phrases():
query += '{} '.format(text[np.words[0].input_start_offset: np.words[-1].input_end_offset])
print(query)
news_parameters = {
'q': query,
'from': d,
'to': d2,
'sortBy': 'popularity',
'apiKey': 'a02790e5a3af4b5f8683318c276e702d'
}
response = requests.get('http://newsapi.org/v2/everything', params = news_parameters)
json_data = json.loads(response.text)
return json_data