-
Notifications
You must be signed in to change notification settings - Fork 0
/
arteparser.vala
337 lines (288 loc) · 10.9 KB
/
arteparser.vala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
/*
* Totem Arte Plugin allows you to watch streams from arte.tv
* Copyright (C) 2009, 2010, 2011, 2012 Simon Wenner <simon@wenner.ch>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* The Totem Arte Plugin project hereby grants permission for non-GPL compatible
* GStreamer plugins to be used and distributed together with GStreamer, Totem
* and Totem Arte Plugin. This permission is above and beyond the permissions
* granted by the GPL license by which Totem Arte Plugin is covered.
* If you modify this code, you may extend this exception to your version of the
* code, but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version.
*
*/
using GLib;
using Soup;
using Json;
public abstract class ArteParser : GLib.Object
{
public bool has_data { get; protected set; default = false; } // more data available
protected string xml_fr;
protected string xml_de;
protected GLib.SList<Video> videos;
private const MarkupParser parser = {
open_tag,
close_tag,
process_text,
null,
null
};
public ArteParser () {}
public virtual void reset () {}
public virtual bool has_duplicates () { return false; }
public virtual uint get_error_threshold () { return 0; }
public virtual bool advance ()
{
return has_data;
}
public virtual unowned GLib.SList<Video> parse (Language lang) throws MarkupError, IOError
{
videos = new GLib.SList<Video> ();
if(!has_data) {
return videos;
}
Soup.Message msg;
if (lang == Language.GERMAN) {
msg = new Soup.Message ("GET", xml_de);
} else {
msg = new Soup.Message ("GET", xml_fr);
}
Soup.Session session = create_session ();
session.send_message (msg);
if (msg.status_code != Soup.Status.OK) {
throw new IOError.HOST_NOT_FOUND ("videos.arte.tv could not be accessed.");
}
var context = new MarkupParseContext (parser,
MarkupParseFlags.TREAT_CDATA_AS_TEXT, this, null);
context.parse ((string) msg.response_body.flatten ().data,
(ssize_t) msg.response_body.length);
context.end_parse ();
return videos;
}
protected virtual void open_tag (MarkupParseContext ctx,
string elem,
string[] attribute_names,
string[] attribute_values) throws MarkupError {}
protected virtual void close_tag (MarkupParseContext ctx,
string elem) throws MarkupError {}
protected virtual void process_text (MarkupParseContext ctx,
string text,
size_t text_len) throws MarkupError {}
protected string sanitise_markup (string str)
{
return str.replace("&", "&");
}
}
public class ArteJSONParser : ArteParser
{
private string json_url_fr = "http://www.arte.tv/guide/fr/plus7.json";
private string json_url_de = "http://www.arte.tv/guide/de/plus7.json";
public ArteJSONParser ()
{
reset ();
}
public override void reset ()
{
has_data = true;
}
public override uint get_error_threshold ()
{
return 1; // no errors are tolerated
}
public override unowned GLib.SList<Video> parse (Language lang) throws MarkupError, IOError
{
videos = new GLib.SList<Video> ();
Soup.Message msg;
if (lang == Language.GERMAN) {
msg = new Soup.Message ("GET", json_url_de);
} else {
msg = new Soup.Message ("GET", json_url_fr);
}
Soup.Session session = create_session ();
session.send_message (msg);
if (msg.status_code != Soup.Status.OK) {
throw new IOError.HOST_NOT_FOUND ("videos.arte.tv could not be accessed.");
}
var parser = new Json.Parser ();
try {
parser.load_from_data ((string) msg.response_body.flatten ().data, -1);
} catch (GLib.Error e) {
throw new GLib.MarkupError.PARSE ("Json parsing failed: '%s'", e.message);
}
var root_object = parser.get_root ().get_object ();
var video_array = root_object.get_array_member ("videos");
foreach (var video in video_array.get_elements ()) {
var v = video.get_object ();
var current_video = new Video();
current_video.title = v.get_string_member ("title");
current_video.page_url = v.get_string_member ("url");
if (!current_video.page_url.has_prefix("http://")) {
// this URL used to be relative, check just in case
current_video.page_url = "http://www.arte.tv" + current_video.page_url;
}
current_video.image_url = v.get_string_member ("image_url");
current_video.desc = v.get_string_member ("desc");
// TODO current_video.publication_date
string end_time_str = v.get_string_member ("video_rights_until");
try {
var regex = new Regex ("([0-9]+)[:h]([0-9]+)");
MatchInfo match;
regex.match(end_time_str, 0, out match);
string hours_str = match.fetch(1);
string minutes_str = match.fetch(2);
int hours = int.parse(hours_str);
int minutes = int.parse(minutes_str);
current_video.offline_date = GLib.TimeVal ();
current_video.offline_date.get_current_time ();
current_video.offline_date.tv_sec += ((hours * 60 * 60 + minutes * 60));
} catch (GLib.RegexError e) {
GLib.warning ("Offline date parsing failed.");
}
videos.append (current_video);
}
has_data = false;
return videos;
}
}
public class ArteRSSParser : ArteParser
{
private Video current_video = null;
private string current_data = null;
/* official RSS feeds, may contain duplicates */
private const string[] feeds_fr = {
"http://www.arte.tv/papi/tvguide-flow/feeds/videos/fr.xml?type=ARTE_PLUS_SEVEN"
};
private const string[] feeds_de = {
"http://www.arte.tv/papi/tvguide-flow/feeds/videos/de.xml?type=ARTE_PLUS_SEVEN"
};
private const uint feed_count = feeds_fr.length;
private uint feed_idx = 0;
public ArteRSSParser ()
{
xml_fr = feeds_fr[0];
xml_de = feeds_de[0];
reset ();
}
public override void reset ()
{
has_data = true;
feed_idx = 0;
}
public override bool has_duplicates () { return true; }
public override uint get_error_threshold ()
{
return (uint)(feed_count * 0.5);
}
public override bool advance ()
{
feed_idx++;
has_data = feed_idx < feed_count;
if(has_data)
set_feed(feed_idx);
return has_data;
}
private void set_feed (uint idx)
{
xml_de = feeds_de[idx];
xml_fr = feeds_fr[idx];
feed_idx = idx;
}
protected override void open_tag (MarkupParseContext ctx,
string elem,
string[] attribute_names,
string[] attribute_values) throws MarkupError
{
switch (elem) {
case "item":
current_video = new Video();
break;
case "media:thumbnail":
if (current_video != null) {
for (int i = 0; i < attribute_names.length; i++) {
if (attribute_names[i] == "url") {
current_video.image_url = attribute_values[i];
break;
}
}
}
break;
default:
current_data = elem;
break;
}
}
protected override void close_tag (MarkupParseContext ctx,
string elem) throws MarkupError
{
switch (elem) {
case "item":
if (current_video != null) {
videos.append (current_video);
current_video = null;
}
break;
default:
current_data = null;
break;
}
}
protected override void process_text (MarkupParseContext ctx,
string text,
size_t text_len) throws MarkupError
{
if (current_video != null && text_len > 0) {
var my_text = text;
if (text.has_suffix("]]>")) {
// FIXME Why is the end of the CDATA tag kept?
// We do use MarkupParseFlags.TREAT_CDATA_AS_TEXT...
my_text = text.slice(0, -3);
}
switch (current_data) {
case "title":
current_video.title = my_text;
break;
case "link":
current_video.page_url = my_text;
break;
case "description":
current_video.desc = sanitise_markup(my_text);
break;
case "dcterms:valid":
MatchInfo match;
// example value:
// start=2014-11-13T06:44+00:00;end=2014-11-20T06:44+00:00;scheme=W3C-DTF
try {
var regex = new Regex ("start=([0-9T\\-:+]+);end=([0-9T\\-:+]+);");
regex.match(my_text, 0, out match);
} catch (GLib.RegexError e) {
GLib.warning ("Date parsing failed.");
break;
}
// Results are already in the ISO8601 format, but GLib requires seconds...
var pub_date = match.fetch(1).replace("+00:00", ":00+00:00");;
var off_date = match.fetch(2).replace("+00:00", ":00+00:00");
if (!current_video.publication_date.from_iso8601(pub_date)) {
GLib.warning ("Publication date '%s' parsing failed.", pub_date);
}
if (!current_video.offline_date.from_iso8601(off_date)) {
GLib.warning ("Offline date '%s' parsing failed.", off_date);
}
break;
}
}
}
}