-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
127 lines (121 loc) · 4.15 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# config.py
import os
from dotenv import load_dotenv
from typing import Dict
# Load environment variables from .env file
load_dotenv()
# API Configuration
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY not found in environment variables")
# Website configurations
WEBSITE_CONFIGS = {
"ai_magazine": {
"name": "AI Magazine",
"url": "https://aimagazine.com",
"article_selector": "article.post",
"title_selector": "h2.title",
"content_selector": "div.content",
"date_selector": "time.published",
"date_format": "%Y-%m-%d",
},
"analytics_insight": {
"name": "Analytics Insight",
"url": "https://www.analyticsinsight.net",
"article_selector": "div.td_module_10",
"title_selector": "h3.entry-title",
"content_selector": "div.td-post-content",
"date_selector": "time.entry-date",
"date_format": "%B %d, %Y",
},
"ai_trends": {
"name": "AI Trends",
"url": "https://www.aitrends.com",
"article_selector": "article.post",
"title_selector": "h2.entry-title",
"content_selector": "div.entry-content",
"date_selector": "time.entry-date",
"date_format": "%Y-%m-%d",
},
"mit_news": {
"name": "MIT News - AI",
"url": "https://news.mit.edu/topic/artificial-intelligence2",
"article_selector": "article.article-item",
"title_selector": "h3.title",
"content_selector": "div.article-content",
"date_selector": "time.article-date",
"date_format": "%B %d, %Y",
},
"wired": {
"name": "Wired - AI",
"url": "https://www.wired.com/tag/artificial-intelligence",
"article_selector": "div.summary-item",
"title_selector": "h3.summary-item__hed",
"content_selector": "div.body__inner-container",
"date_selector": "time.summary-item__timestamp",
"date_format": "%Y-%m-%d",
},
"dataversity": {
"name": "Dataversity",
"url": "https://www.dataversity.net/category/artificial-intelligence",
"article_selector": "article.post",
"title_selector": "h2.entry-title",
"content_selector": "div.entry-content",
"date_selector": "time.entry-date",
"date_format": "%B %d, %Y",
},
"openai": {
"name": "OpenAI Blog",
"url": "https://openai.com/blog",
"article_selector": "article.post",
"title_selector": "h2.post-title",
"content_selector": "div.post-content",
"date_selector": "time.post-date",
"date_format": "%Y-%m-%d",
},
"ai_news": {
"name": "AI News",
"url": "https://artificialintelligence-news.com",
"article_selector": "article.type-post",
"title_selector": "h2.entry-title",
"content_selector": "div.entry-content",
"date_selector": "time.entry-date",
"date_format": "%B %d, %Y",
},
"emerj": {
"name": "Emerj",
"url": "https://emerj.com/ai-sector-overviews",
"article_selector": "article.post",
"title_selector": "h2.entry-title",
"content_selector": "div.entry-content",
"date_selector": "time.entry-date",
"date_format": "%B %d, %Y",
},
"extreme_tech": {
"name": "ExtremeTech",
"url": "https://www.extremetech.com/tag/artificial-intelligence",
"article_selector": "article.article",
"title_selector": "h2.title",
"content_selector": "div.entry-content",
"date_selector": "time.date",
"date_format": "%Y-%m-%d",
},
}
# Additional configuration settings
SCRAPING_SETTINGS = {
"request_timeout": 30, # seconds
"max_retries": 3,
"retry_delay": 5, # seconds
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}
# Cache settings
CACHE_SETTINGS = {
"cache_duration": 3600, # 1 hour in seconds
"max_cache_items": 1000,
}
# Article processing settings
ARTICLE_SETTINGS = {
"max_summary_length": 500, # characters
"min_article_length": 100, # characters
"max_keywords": 10,
}