~autopilot/xpathselect/1.3

« back to all changes in this revision

Viewing changes to lib/xpathselect.cpp

  • Committer: Tarmac
  • Author(s): Thomi Richards
  • Date: 2013-01-18 03:25:00 UTC
  • mfrom: (25.1.9 trunk)
  • Revision ID: tarmac-20130118032500-2bhgu0n9r1tl0a62
Add capability for searching sub-trees with '//'.

Approved by PS Jenkins bot, Christopher Lee.

Show diffs side-by-side

added added

removed removed

Lines of Context:
20
20
#include <boost/algorithm/string.hpp>
21
21
#include <boost/algorithm/string/split.hpp>
22
22
#include <boost/algorithm/string/classification.hpp>
 
23
#include <boost/algorithm/string/predicate.hpp>
23
24
#include <boost/bind.hpp>
24
25
 
25
26
#include "xpathselect.h"
26
 
#include "utils.h"
27
27
 
28
28
namespace xpathselect
29
29
{
34
34
        class XPathQueryPart
35
35
        {
36
36
        public:
 
37
            enum class QueryPartType {Normal, Search};
37
38
            XPathQueryPart(std::string const& query_part)
38
39
            {
 
40
                type_ = (query_part == "") ? QueryPartType::Search :  QueryPartType::Normal;
 
41
 
39
42
                std::vector<std::string> part_pieces;
40
43
                boost::algorithm::split(part_pieces,
41
44
                    query_part,
67
70
                }
68
71
            }
69
72
 
70
 
            XPathQueryPart(XPathQueryPart const& other)
71
 
            : node_name_(other.node_name_)
72
 
            , param_name_(other.param_name_)
73
 
            , param_value_(other.param_value_)
74
 
            {}
75
 
 
76
73
            bool Matches(Node::Ptr const& node) const
77
74
            {
78
75
                bool matches = (node_name_ == "*" || node->GetName() == node_name_);
83
80
 
84
81
                return matches;
85
82
            }
 
83
 
 
84
            QueryPartType Type() const { return type_; }
 
85
 
86
86
        private:
87
87
            std::string node_name_;
88
88
            std::string param_name_;
89
89
            std::string param_value_;
 
90
            QueryPartType type_;
90
91
        };
91
92
 
92
93
        typedef std::vector<XPathQueryPart> QueryList;
100
101
            boost::algorithm::split(query_strings,
101
102
                query,
102
103
                boost::algorithm::is_any_of("/"),
103
 
                boost::algorithm::token_compress_on);
104
 
 
105
 
            // Boost's split() implementation does not match it's documentation! According to the
106
 
            // docs, it's not supposed to add empty strings, but it does, which is a PITA. This
107
 
            // next line removes them:
108
 
            query_strings.erase( std::remove_if( query_strings.begin(),
109
 
                query_strings.end(),
110
 
                boost::bind( &std::string::empty, _1 ) ),
111
 
              query_strings.end());
 
104
                boost::algorithm::token_compress_off);
112
105
 
113
106
            for(std::string part : query_strings)
114
107
            {
115
108
                query_parts.push_back(XPathQueryPart(part));
116
109
            }
117
110
 
 
111
            // bost::split leaves the initial token in the output list, so we ignore the
 
112
            // first search token:
 
113
            if (query_parts.front().Type() == XPathQueryPart::QueryPartType::Search)
 
114
                query_parts.erase(query_parts.begin());
 
115
 
118
116
            return query_parts;
119
117
        }
120
118
 
121
 
        NodeList MatchAbsoluteQuery(Node::Ptr const& root, QueryList const& query_parts)
122
 
        {
123
 
            NodeList matches;
124
 
 
125
 
            if (query_parts.front().Matches(root))
126
 
            {
127
 
                matches.push_back(root);
128
 
            }
129
 
            return matches;
130
 
        }
131
 
 
132
 
        NodeList MatchRelativeQuery(Node::Ptr const& root, QueryList const& query_parts)
133
 
        {
134
 
            NodeList matches;
135
 
            // non-recursive BFS traversal to find starting points:
136
 
            std::queue<Node::Ptr> queue;
137
 
            queue.push(root);
138
 
            while (!queue.empty())
139
 
            {
140
 
                Node::Ptr node = queue.front();
141
 
                queue.pop();
142
 
                if (query_parts.front().Matches(node))
143
 
                {
144
 
                    // found one. We keep going deeper, as there may be another node beneath this one
145
 
                    // with the same node name.
146
 
                    matches.push_back(node);
147
 
                }
148
 
                // Add all children of current node to queue.
149
 
                for(Node::Ptr child : node->Children())
150
 
                {
151
 
                    queue.push(child);
152
 
                }
153
 
            }
154
 
            return matches;
155
 
        }
156
 
 
157
 
        NodeList ProcessStartPoints(NodeList start_points, QueryList query_parts)
158
 
        {
159
 
            query_parts.erase(query_parts.begin());
160
 
            typedef std::pair<Node::Ptr, QueryList::iterator> node_match_pair;
161
 
 
162
 
            std::queue<node_match_pair> traverse_queue;
163
 
            for(Node::Ptr node : start_points)
164
 
            {
165
 
                traverse_queue.push(node_match_pair(node, query_parts.begin()));
166
 
            }
167
 
            start_points.clear();
168
 
 
169
 
            while (!traverse_queue.empty())
170
 
            {
171
 
                node_match_pair p = traverse_queue.front();
172
 
                traverse_queue.pop();
173
 
 
174
 
                Node::Ptr node = p.first;
175
 
                auto query_it = p.second;
176
 
 
177
 
                if (query_it == query_parts.end())
178
 
                {
179
 
                    // found a match:
180
 
                    start_points.push_back(node);
181
 
                }
182
 
                else
183
 
                {
184
 
                    // push all children of current node to start of queue, advance search iterator, and loop again.
185
 
                    for (Node::Ptr child : node->Children())
186
 
                    {
187
 
                        if (query_it->Matches(child))
188
 
                        {
189
 
                            auto it_copy(query_it);
190
 
                            ++it_copy;
191
 
                            traverse_queue.push(node_match_pair(child, it_copy));
192
 
                        }
193
 
                    }
194
 
                }
195
 
            }
196
 
            return start_points;
197
 
        }
198
 
    }
 
119
        // Starting at each node listed in 'start_points', search the tree for nodes that match
 
120
        // 'next_match'. next_match *must* be a normal query part object, not a search token.
 
121
        NodeList SearchTreeForNode(NodeList const& start_points, XPathQueryPart const& next_match)
 
122
        {
 
123
            NodeList matches;
 
124
            for (auto root: start_points)
 
125
            {
 
126
                // non-recursive BFS traversal to find starting points:
 
127
                std::queue<Node::Ptr> queue;
 
128
                queue.push(root);
 
129
                while (!queue.empty())
 
130
                {
 
131
                    Node::Ptr node = queue.front();
 
132
                    queue.pop();
 
133
                    if (next_match.Matches(node))
 
134
                    {
 
135
                        // found one. We keep going deeper, as there may be another node beneath this one
 
136
                        // with the same node name.
 
137
                        matches.push_back(node);
 
138
                    }
 
139
                    // Add all children of current node to queue.
 
140
                    for(Node::Ptr child : node->Children())
 
141
                    {
 
142
                        queue.push(child);
 
143
                    }
 
144
                }
 
145
            }
 
146
            return matches;
 
147
        }
 
148
    } // end of anonymous namespace
199
149
 
200
150
    NodeList SelectNodes(Node::Ptr const& root, std::string query)
201
151
    {
204
154
        {
205
155
            query = "/" + root->GetName();
206
156
        }
 
157
        // sanity checking some obvious invalid queries:
 
158
        if (boost::algorithm::ends_with(query, "//"))
 
159
            return NodeList();
207
160
 
208
 
        bool is_absolute = utils::IsQueryAbsolute(query);
209
161
        QueryList query_parts = GetQueryPartsFromQuery(query);
210
162
 
211
 
        NodeList start_nodes;
212
 
        if (is_absolute)
213
 
        {
214
 
            start_nodes = MatchAbsoluteQuery(root, query_parts);
215
 
        }
216
 
        else
217
 
        {
218
 
            start_nodes = MatchRelativeQuery(root, query_parts);
219
 
        }
220
 
 
221
 
        return ProcessStartPoints(start_nodes, query_parts);
 
163
        auto query_part = query_parts.cbegin();
 
164
        NodeList start_nodes { root };
 
165
        while (query_part != query_parts.cend())
 
166
        {
 
167
            // If the current query piece is a recursive search token ('//')...
 
168
            if (query_part->Type() == XPathQueryPart::QueryPartType::Search)
 
169
            {
 
170
                // advance to look at the next piece.
 
171
                ++query_part;
 
172
                // do some sanity checking...
 
173
                if (query_part->Type() == XPathQueryPart::QueryPartType::Search)
 
174
                    // invalid query - cannot specify multiple search sequences in a row.
 
175
                    return NodeList();
 
176
                // then find all the nodes that match the new query part, and store them as
 
177
                // the new start nodes. We pass in 'start_nodes' rather than 'root' since
 
178
                // there's a chance we'll be doing more than one search in different parts of the tree.
 
179
                start_nodes = SearchTreeForNode(start_nodes, *query_part);
 
180
            }
 
181
            else
 
182
            {
 
183
                // this isn't a search token. Look at each node in the start_nodes list,
 
184
                // and discard any that don't match the current query part.
 
185
                // C++11 is the shit:
 
186
                start_nodes.erase(
 
187
                    std::remove_if(
 
188
                        start_nodes.begin(),
 
189
                        start_nodes.end(),
 
190
                        [query_part](Node::Ptr n) -> bool {
 
191
                            return ! query_part->Matches(n);
 
192
                        }
 
193
                        ),
 
194
                    start_nodes.end()
 
195
                    );
 
196
            }
 
197
            // then replace each node still in the list with all it's children.
 
198
            // ... but only if we're not on the last query part:
 
199
            if (query_part + 1 != query_parts.cend())
 
200
            {
 
201
                NodeList new_start_nodes;
 
202
                for (auto node: start_nodes)
 
203
                {
 
204
                    auto children = node->Children();
 
205
                    if (children.size())
 
206
                    {
 
207
                        new_start_nodes.insert(
 
208
                            new_start_nodes.end(),
 
209
                            children.begin(),
 
210
                            children.end());
 
211
                    }
 
212
                }
 
213
            start_nodes = new_start_nodes;
 
214
            }
 
215
            ++query_part;
 
216
        }
 
217
        return start_nodes;
222
218
    }
223
219
}