2
* arch-tag: Implementation of podcast parse
4
* Copyright (C) 2005 Renato Araujo Oliveira Filho - INdT <renato.filho@indt.org.br>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24
#include <libxml/entities.h>
25
#include <libxml/SAX.h>
26
#include <libxml/parserInternals.h>
27
#include <libgnomevfs/gnome-vfs.h>
31
#include "rb-podcast-parse.h"
33
#define BUFFER_SIZE 256
35
struct RBPoadcastLoadContext
38
xmlParserCtxtPtr xmlctx;
40
RBPodcastChannel *channel_data;
41
RBPodcastItem *item_data;
44
RB_PODCAST_PARSER_STATE_START,
45
RB_PODCAST_PARSER_STATE_RSS,
46
RB_PODCAST_PARSER_STATE_CHANNEL,
47
RB_PODCAST_PARSER_STATE_CHANNEL_PROPERTY,
48
RB_PODCAST_PARSER_STATE_IMG,
49
RB_PODCAST_PARSER_STATE_IMG_PROPERTY,
50
RB_PODCAST_PARSER_STATE_ITEM,
51
RB_PODCAST_PARSER_STATE_ITEM_PROPERTY,
52
RB_PODCAST_PARSER_STATE_END,
56
static gboolean rb_validate_channel_propert (const char *name);
57
static gboolean rb_validate_item_propert (const char *name);
58
static uintmax_t rb_podcast_parse_date (const char* date_str);
59
static gulong rb_podcast_parse_time (const char *time_str);
60
static void rb_podcast_parser_start_element (struct RBPoadcastLoadContext* ctx, const char *name, const char **attrs);
61
static void rb_podcast_parser_end_element (struct RBPoadcastLoadContext* ctx, const char *name);
62
static void rb_podcast_parser_characters (struct RBPoadcastLoadContext* ctx, const char *data, guint len);
63
static void rb_set_channel_value (struct RBPoadcastLoadContext* ctx, const char* name, const char* value);
64
static void rb_set_item_value (struct RBPoadcastLoadContext* ctx, const char* name, const char* value);
66
static RBPodcastItem *
67
rb_podcast_initializa_item ()
69
RBPodcastItem *data = g_new0 (RBPodcastItem, 1);
74
rb_set_channel_value (struct RBPoadcastLoadContext* ctx, const char* name, const char* value)
80
dvalue = xmlCharStrdup (value);
81
g_strstrip ((char *)dvalue);
83
if (!strcmp (name, "title")) {
84
ctx->channel_data->title = dvalue;
85
} else if (!strcmp (name, "language")) {
86
ctx->channel_data->lang = dvalue;
87
} else if (!strcmp (name, "itunes:subtitle")) {
88
ctx->channel_data->subtitle = dvalue;
89
} else if (!strcmp (name, "itunes:summary")) {
90
ctx->channel_data->summary = dvalue;
91
} else if (!strcmp (name, "description")) {
92
ctx->channel_data->description = dvalue;
93
} else if (!strcmp (name, "generator") ||
94
!strcmp (name, "itunes:author")) {
95
ctx->channel_data->author = dvalue;
96
} else if (!strcmp (name, "webMaster")) {
97
ctx->channel_data->contact = dvalue;
98
} else if (!strcmp (name, "pubDate")) {
99
ctx->channel_data->pub_date = rb_podcast_parse_date ((char *)dvalue);
101
} else if (!strcmp (name, "copyright")) {
102
ctx->channel_data->copyright = dvalue;
103
} else if (!strcmp (name, "img")) {
104
ctx->channel_data->img = dvalue;
112
rb_set_item_value (struct RBPoadcastLoadContext* ctx, const char* name, const char* value)
115
dvalue = xmlCharStrdup (value);
116
g_strstrip ((char *)dvalue);
118
if (!strcmp (name, "title")) {
119
ctx->item_data->title = dvalue;
120
} else if (!strcmp (name, "url")) {
121
ctx->item_data->url = dvalue;
122
} else if (!strcmp (name, "pubDate")) {
123
ctx->item_data->pub_date = rb_podcast_parse_date ((char *)dvalue);
125
} else if (!strcmp (name, "description")) {
126
ctx->item_data->description = dvalue;
127
} else if (!strcmp (name, "author")) {
128
ctx->item_data->author = dvalue;
129
} else if (!strcmp (name, "itunes:duration")) {
130
ctx->item_data->duration = rb_podcast_parse_time ((char*)dvalue);
132
} else if (!strcmp (name, "length")) {
133
ctx->item_data->filesize = g_ascii_strtoull ((char*)dvalue, NULL, 10);
141
rb_insert_item (struct RBPoadcastLoadContext* ctx)
143
RBPodcastItem *data = ctx->item_data;
148
ctx->channel_data->posts = g_list_prepend (ctx->channel_data->posts, (void *) ctx->item_data);
151
static gboolean rb_validate_channel_propert (const char *name)
153
if (!strcmp(name, "title") ||
154
!strcmp(name, "language") ||
155
!strcmp(name, "itunes:subtitle") ||
156
!strcmp(name, "itunes:summary") ||
157
!strcmp(name, "description") ||
158
!strcmp(name, "generator") ||
159
!strcmp(name, "itunes:author") ||
160
!strcmp(name, "webMaster") ||
161
!strcmp(name, "lastBuildDate") ||
162
!strcmp(name, "pubDate") ||
163
!strcmp(name, "copyright"))
170
static gboolean rb_validate_item_propert (const char *name)
172
if (!strcmp(name, "title") ||
173
!strcmp(name, "url") ||
174
!strcmp(name, "pubDate") ||
175
!strcmp(name, "description") ||
176
!strcmp(name, "author") ||
177
!strcmp(name, "itunes:duration") )
186
rb_podcast_parser_start_element (struct RBPoadcastLoadContext* ctx, const char *name, const char **attrs)
191
case RB_PODCAST_PARSER_STATE_START:
193
if (!strcmp(name, "rss")) {
194
ctx->state = RB_PODCAST_PARSER_STATE_RSS;
197
ctx->in_unknown_elt++;
201
case RB_PODCAST_PARSER_STATE_RSS:
203
if (!strcmp(name, "channel")) {
204
ctx->state = RB_PODCAST_PARSER_STATE_CHANNEL;
207
ctx->in_unknown_elt++;
211
case RB_PODCAST_PARSER_STATE_CHANNEL:
214
if (!strcmp(name, "image")) {
215
ctx->state = RB_PODCAST_PARSER_STATE_IMG;
217
} else if (!strcmp(name, "item")) {
218
ctx->item_data = rb_podcast_initializa_item(); // g_new0(RBPodcastItem, 1);
219
ctx->state = RB_PODCAST_PARSER_STATE_ITEM;
221
} else if (!rb_validate_channel_propert (name)) {
222
ctx->in_unknown_elt++;
226
ctx->state = RB_PODCAST_PARSER_STATE_CHANNEL_PROPERTY;
230
case RB_PODCAST_PARSER_STATE_ITEM:
232
if (!strcmp(name, "enclosure")) {
233
for (; *attrs; attrs +=2) {
234
if (!strcmp (*attrs, "url")) {
235
const char *url_value = *(attrs+1);
236
rb_set_item_value(ctx, "url", url_value);
237
} else if (!strcmp (*attrs, "length")) {
238
const char *length_value = *(attrs+1);
239
rb_set_item_value(ctx, "length", length_value);
242
} else if (!rb_validate_item_propert (name)) {
243
ctx->in_unknown_elt++;
248
ctx->state = RB_PODCAST_PARSER_STATE_ITEM_PROPERTY;
252
case RB_PODCAST_PARSER_STATE_IMG:
254
if (strcmp(name, "url") != 0) {
255
ctx->in_unknown_elt++;
259
ctx->state = RB_PODCAST_PARSER_STATE_IMG_PROPERTY;
263
case RB_PODCAST_PARSER_STATE_CHANNEL_PROPERTY:
264
case RB_PODCAST_PARSER_STATE_ITEM_PROPERTY:
265
case RB_PODCAST_PARSER_STATE_IMG_PROPERTY:
266
case RB_PODCAST_PARSER_STATE_END:
273
rb_podcast_parser_end_element (struct RBPoadcastLoadContext* ctx,
276
// if (*ctx->die == TRUE) {
277
// xmlStopParser (ctx->xmlctx);
281
if (ctx->in_unknown_elt > 0) {
282
ctx->in_unknown_elt--;
288
case RB_PODCAST_PARSER_STATE_START:
289
ctx->state = RB_PODCAST_PARSER_STATE_END;
292
case RB_PODCAST_PARSER_STATE_RSS:
293
ctx->state = RB_PODCAST_PARSER_STATE_START;
296
case RB_PODCAST_PARSER_STATE_CHANNEL:
297
ctx->state = RB_PODCAST_PARSER_STATE_RSS;
300
case RB_PODCAST_PARSER_STATE_CHANNEL_PROPERTY:
302
rb_set_channel_value(ctx, name, ctx->prop_value->str);
303
ctx->state = RB_PODCAST_PARSER_STATE_CHANNEL;
304
g_string_truncate (ctx->prop_value, 0);
308
case RB_PODCAST_PARSER_STATE_ITEM:
311
ctx->state = RB_PODCAST_PARSER_STATE_CHANNEL;
315
case RB_PODCAST_PARSER_STATE_ITEM_PROPERTY:
317
rb_set_item_value(ctx, name, ctx->prop_value->str);
318
ctx->state = RB_PODCAST_PARSER_STATE_ITEM;
319
g_string_truncate (ctx->prop_value, 0);
323
case RB_PODCAST_PARSER_STATE_IMG_PROPERTY:
325
rb_set_channel_value(ctx, "img", ctx->prop_value->str);
326
ctx->state = RB_PODCAST_PARSER_STATE_IMG;
327
g_string_truncate (ctx->prop_value, 0);
331
case RB_PODCAST_PARSER_STATE_IMG:
332
ctx->state = RB_PODCAST_PARSER_STATE_CHANNEL;
335
case RB_PODCAST_PARSER_STATE_END:
342
rb_podcast_parser_characters (struct RBPoadcastLoadContext* ctx, const char *data,
347
case RB_PODCAST_PARSER_STATE_CHANNEL_PROPERTY:
348
case RB_PODCAST_PARSER_STATE_ITEM_PROPERTY:
349
case RB_PODCAST_PARSER_STATE_IMG_PROPERTY:
350
g_string_append_len (ctx->prop_value, data, len);
352
case RB_PODCAST_PARSER_STATE_START:
353
case RB_PODCAST_PARSER_STATE_IMG:
354
case RB_PODCAST_PARSER_STATE_RSS:
355
case RB_PODCAST_PARSER_STATE_CHANNEL:
356
case RB_PODCAST_PARSER_STATE_ITEM:
357
case RB_PODCAST_PARSER_STATE_END:
364
rb_podcast_parse_load_feed(RBPodcastChannel *data, const char *file_name) {
366
xmlParserCtxtPtr ctxt;
367
xmlSAXHandlerPtr sax_handler = g_new0 (xmlSAXHandler, 1);
368
GnomeVFSResult result;
369
GnomeVFSFileInfo *info;
373
struct RBPoadcastLoadContext *ctx = g_new0 (struct RBPoadcastLoadContext, 1);
375
data->url = xmlCharStrdup (file_name);
377
ctx->in_unknown_elt = 0;
378
ctx->channel_data = data;
380
if (!gnome_vfs_initialized ()) {
384
info = gnome_vfs_file_info_new();
385
result = gnome_vfs_get_file_info (file_name, info, GNOME_VFS_FILE_INFO_DEFAULT);
386
if ((result != GNOME_VFS_OK) ||
387
(info->mime_type == NULL) ||
388
((strstr (info->mime_type, "xml") == NULL) &&
389
(strstr (info->mime_type, "rss") == NULL) &&
390
(strstr (file_name, ".rss") == NULL))) {
391
rb_debug ("Invalid mime-type in podcatst feed %s\n", info->mime_type);
392
gnome_vfs_file_info_unref (info);
396
//firts download file by gnome_vfs for use gnome network configuration
397
result = gnome_vfs_read_entire_file (file_name, &file_size, &buffer);
398
if (result != GNOME_VFS_OK)
404
sax_handler->startElement = (startElementSAXFunc) rb_podcast_parser_start_element;
405
sax_handler->endElement = (endElementSAXFunc) rb_podcast_parser_end_element;
406
sax_handler->characters = (charactersSAXFunc) rb_podcast_parser_characters;
407
xmlSubstituteEntitiesDefault (1);
409
ctx->prop_value = g_string_sized_new(512);
411
ctxt = xmlCreateMemoryParserCtxt (buffer, file_size);
414
ctxt->userData = ctx;
415
ctxt->sax = sax_handler;
416
xmlParseDocument (ctxt);
418
xmlFreeParserCtxt (ctxt);
421
g_string_free(ctx->prop_value, TRUE);
422
ctx->channel_data->posts = g_list_reverse (ctx->channel_data->posts);
430
rb_podcast_parse_date(const char* date_str)
435
result = strptime (date_str, "%a, %d %b %Y %T", &tm);
436
if (result == NULL) {
437
memset (&tm, 0, sizeof (struct tm));
438
result = strptime (date_str, "%d %b %Y %T", &tm);
440
if (result == NULL) {
441
memset (&tm, 0, sizeof (struct tm));
442
rb_debug ("unable to convert date string %s", date_str);
445
return (uintmax_t) mktime (&tm);
449
rb_podcast_parse_time (const char *time_str)
454
memset (&tm, 0, sizeof (struct tm));
455
result = strptime (time_str, "%H:%M:%S", &tm);
456
if (result == NULL) {
457
memset (&tm, 0, sizeof (struct tm));
458
result = strptime (time_str, "%M:%S", &tm);
460
if (result == NULL) {
461
memset (&tm, 0, sizeof (struct tm));
462
rb_debug ("unable to convert duration string %s", time_str);
465
return ((tm.tm_hour * 60 + tm.tm_min) * 60 + tm.tm_sec);
469
rb_podcast_parse_channel_free (RBPodcastChannel *data)
471
g_return_if_fail (data != NULL);
473
g_list_foreach (data->posts, (GFunc) rb_podcast_parse_item_free, NULL);
474
g_list_free (data->posts);
476
if (data->url != NULL)
479
if (data->title != NULL)
480
g_free (data->title);
482
if (data->lang != NULL)
485
if (data->subtitle != NULL)
486
g_free (data->subtitle);
488
if (data->summary != NULL)
489
g_free (data->summary);
491
if (data->description != NULL)
492
g_free (data->description);
494
if (data->author != NULL)
495
g_free (data->author);
497
if (data->contact != NULL)
498
g_free (data->contact);
500
if (data->img != NULL)
503
if (data->copyright != NULL)
504
g_free (data->copyright);
510
rb_podcast_parse_item_free (RBPodcastItem *item)
512
g_return_if_fail (item != NULL);
514
if (item->title != NULL)
515
g_free (item->title);
517
if (item->url != NULL)
520
if (item->description != NULL)
521
g_free (item->description);