forked from allanlepp/te_rss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser_okidoki.py
21 lines (14 loc) · 1.01 KB
/
parser_okidoki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import parsers_common
def fill_article_dict(articleDataDict, pageTree, domain):
articleDataDict["descriptions"] = parsers_common.xpath_to("list", pageTree, '//ul/li/div/div/div[@class="horiz-offer-card__content"]/div[@class="horiz-offer-card__desc"]', parent=True)
articleDataDict["images"] = parsers_common.xpath_to("list", pageTree, '//ul/li/div/div/div[@class="horiz-offer-card__image"]/a/img/@data-src')
articleDataDict["titles"] = parsers_common.xpath_to("list", pageTree, '//ul/li/div/div/div[@class="horiz-offer-card__image"]/a/@title')
articleDataDict["urls"] = parsers_common.xpath_to("list", pageTree, '//ul/li/div/div/div[@class="horiz-offer-card__image"]/a/@href')
# remove unwanted content: titles
dictFilters = (
"radiaator",
)
articleDataDict = parsers_common.article_data_dict_clean(__file__, articleDataDict, dictFilters, "in", "titles")
# muudame suuna sobivaks
articleDataDict = parsers_common.dict_reverse_order(articleDataDict)
return articleDataDict