~ubuntu-branches/ubuntu/maverick/newsbeuter/maverick

« back to all changes in this revision

Viewing changes to rss/atom_parser.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Nico Golde
  • Date: 2009-04-21 14:06:18 UTC
  • mfrom: (4.1.7 sid)
  • Revision ID: james.westby@ubuntu.com-20090421140618-osnjk19bgkebyg9h
Tags: 2.0-1
* New upstream release.
  - Remove dependeny on mrss and nxml and add libxml2, newsbeuter
    now comes with its own parser.
  - Remove debian/patches and quilt dependency.
* Bump to policy 3.8.1, no changes needed.
* debian/copyright: adjust years.
* Remove dh_clean -k call as it is deprecated -> dh_prep.
* Change newsbeuter homepage, it now has an official one.
* Fix watch file url, new upstream code location.
* Add bookmark-scuttle.sh to installed contrib files.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* rsspp - Copyright (C) 2008-2009 Andreas Krennmair <ak@newsbeuter.org>
 
2
 * Licensed under the MIT/X Consortium License. See file LICENSE
 
3
 * for more information.
 
4
 */
 
5
 
 
6
#include <config.h>
 
7
#include <rsspp_internal.h>
 
8
#include <utils.h>
 
9
#include <cstring>
 
10
 
 
11
namespace rsspp {
 
12
 
 
13
 
 
14
void atom_parser::parse_feed(feed& f, xmlNode * rootNode) {
 
15
        if (!rootNode)
 
16
                throw exception(_("XML root node is NULL"));
 
17
 
 
18
        f.language = get_prop(rootNode, "lang");
 
19
        globalbase = get_prop(rootNode, "base", XML_URI);
 
20
 
 
21
        for (xmlNode * node = rootNode->children; node != NULL; node = node->next) {
 
22
                if (node_is(node, "title")) {
 
23
                        f.title = get_content(node);
 
24
                        f.title_type = get_prop(node, "type");
 
25
                        if (f.title_type == "")
 
26
                                f.title_type = "text";
 
27
                } else if (node_is(node, "subtitle")) {
 
28
                        f.description = get_content(node);
 
29
                } else if (node_is(node, "link")) {
 
30
                        std::string rel = get_prop(node, "rel");
 
31
                        if (rel == "alternate") {
 
32
                                f.link = newsbeuter::utils::absolute_url(globalbase, get_prop(node, "href"));
 
33
                        }
 
34
                } else if (node_is(node, "updated")) {
 
35
                        f.pubDate = w3cdtf_to_rfc822(get_content(node));
 
36
                } else if (node_is(node, "entry")) {
 
37
                        f.items.push_back(parse_entry(node));
 
38
                }
 
39
        }
 
40
 
 
41
}
 
42
 
 
43
item atom_parser::parse_entry(xmlNode * entryNode) {
 
44
        item it;
 
45
        std::string summary;
 
46
        std::string summary_type;
 
47
        std::string updated;
 
48
 
 
49
        std::string base = get_prop(entryNode, "base", XML_URI);
 
50
        if (base == "")
 
51
                base = globalbase;
 
52
 
 
53
        for (xmlNode * node = entryNode->children; node != NULL; node = node->next) {
 
54
                if (node_is(node, "author")) {
 
55
                        for (xmlNode * authornode = node->children; authornode != NULL; authornode = authornode->next) {
 
56
                                if (node_is(authornode, "name")) {
 
57
                                        it.author = get_content(authornode);
 
58
                                } // TODO: is there more?
 
59
                        }
 
60
                } else if (node_is(node, "title")) {
 
61
                        it.title = get_content(node);
 
62
                        it.title_type = get_prop(node, "type");
 
63
                        if (it.title_type == "")
 
64
                                it.title_type = "text";
 
65
                } else if (node_is(node, "content")) {
 
66
                        std::string mode = get_prop(node, "mode");
 
67
                        std::string type = get_prop(node, "type");
 
68
                        if (mode == "xml" || mode == "") {
 
69
                                if (type == "html" || type == "text") {
 
70
                                        it.description = get_content(node);
 
71
                                } else {
 
72
                                        it.description = get_xml_content(node);
 
73
                                }
 
74
                        } else if (mode == "escaped") {
 
75
                                it.description = get_content(node);
 
76
                        }
 
77
                        it.description_type = type;
 
78
                        if (it.description_type == "")
 
79
                                it.description_type = "text";
 
80
                } else if (node_is(node, "id")) {
 
81
                        it.guid = get_content(node);
 
82
                        it.guid_isPermaLink = false;
 
83
                } else if (node_is(node, "published")) {
 
84
                        it.pubDate = w3cdtf_to_rfc822(get_content(node));
 
85
                } else if (node_is(node, "updated")) {
 
86
                        updated = w3cdtf_to_rfc822(get_content(node));
 
87
                } else if (node_is(node, "link")) {
 
88
                        std::string rel = get_prop(node, "rel");
 
89
                        if (rel == "" || rel == "alternate") {
 
90
                                it.link = newsbeuter::utils::absolute_url(base, get_prop(node, "href"));
 
91
                        } else if (rel == "enclosure") {
 
92
                                it.enclosure_url = get_prop(node, "href");
 
93
                                it.enclosure_type = get_prop(node, "type");
 
94
                        }
 
95
                } else if (node_is(node, "summary")) {
 
96
                        std::string mode = get_prop(node, "mode");
 
97
                        summary_type = get_prop(node, "type");
 
98
                        if (mode == "xml" || mode == "") {
 
99
                                if (summary_type == "html" || summary_type == "text") {
 
100
                                        summary = get_content(node);
 
101
                                } else {
 
102
                                        summary = get_xml_content(node);
 
103
                                }
 
104
                        } else if (mode == "escaped") {
 
105
                                summary = get_content(node);
 
106
                        }
 
107
                        if (summary_type == "")
 
108
                                summary_type = "text";
 
109
                }
 
110
        } // for
 
111
 
 
112
        if (it.description == "") {
 
113
                it.description = summary;
 
114
                it.description_type = summary_type;
 
115
        }
 
116
 
 
117
        if (it.pubDate == "") {
 
118
                it.pubDate = updated;
 
119
        }
 
120
 
 
121
        return it;
 
122
}
 
123
 
 
124
}