~ben-hutchings/ensoft-sextant/upload-perf

1.2.146 by James Harkin
Made copyright consistent throughout modules
1
# -----------------------------------------
2
# Sextant
3
# Copyright 2014, Ensoft Ltd.
4
# Author: Patrick Stevens, using work from Patrick Stevens and James Harkin
5
# -----------------------------------------
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
6
# API to interact with a Neo4J server: upload, query and delete programs in a DB
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
7
1.8.1 by James Harkin
altered styling of code in javascript and html
8
__all__ = ("Validator", "AddToDatabase", "FunctionQueryResult", "Function",
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
9
           "SextantConnection")
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
10
11
import re  # for validation of function/program names
1.2.106 by patrickas at co
Add db_api to Sextant in preparation for deleting validator, neo4j_input_api and neo4j_output_api
12
import logging
8.1.1 by patrickas at co
Add uploader IDs and dates to program nodes
13
from datetime import datetime
14
import os
15
import getpass
8.1.2 by patrickas at co
Add sextant audit command
16
from collections import namedtuple
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
17
18
from neo4jrestclient.client import GraphDatabase
19
import neo4jrestclient.client as client
20
1.2.151 by patrickas at co
Update config file to be more self-documenting; restore COMMON_CUTOFF global variable in src/sextant/db_api.py so that it is obvious that the global does exist.
21
COMMON_CUTOFF = 10
22
# a function is deemed 'common' if it has more than this
23
# many connections
24
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
25
26
class Validator():
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
27
    """ Sanitises/checks strings, to prevent Cypher injection attacks"""
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
28
29
    @staticmethod
30
    def validate(input_):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
31
        """
32
        Checks whether we can allow a string to be passed into a Cypher query.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
33
        :param input_: the string we wish to validate
34
        :return: bool(the string is allowed)
35
        """
36
        regex = re.compile(r'^[A-Za-z0-9\-:\.\$_@\*\(\)%\+,]+$')
37
        return bool(regex.match(input_))
38
39
    @staticmethod
40
    def sanitise(input_):
41
        """
42
        Strips harmful characters from the given string.
43
        :param input_: string to sanitise
44
        :return: the sanitised string
45
        """
46
        return re.sub(r'[^\.\-_a-zA-Z0-9]+', '', input_)
47
48
1.8.1 by James Harkin
altered styling of code in javascript and html
49
class AddToDatabase():
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
50
    """Updates the database, adding functions/calls to a given program"""
51
8.1.1 by patrickas at co
Add uploader IDs and dates to program nodes
52
    def __init__(self, program_name='', sextant_connection=None,
53
                 uploader='', uploader_id='', date=None):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
54
        """
55
        Object which can be used to add functions and calls to a new program
56
        :param program_name: the name of the new program to be created
57
          (must already be validated against Validator)
58
        :param sextant_connection: the SextantConnection to use for connections
8.1.1 by patrickas at co
Add uploader IDs and dates to program nodes
59
        :param uploader: string identifier of user who is uploading
60
        :param uploader_id: string Unix user-id of logged-in user
61
        :param date: string date of today
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
62
        """
63
        # program_name must be alphanumeric, to avoid injection attacks easily
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
64
        if not Validator.validate(program_name):
65
            return
66
67
        self.program_name = program_name
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
68
        self.parent_database_connection = sextant_connection
31 by Ben Hutchings
further cleanup of AddToDatabase class
69
16.1.3 by patrickas at co
Add a comment
70
        self._funcs_tx = None  # transaction for uploading functions
71
        self._calls_tx = None  # transaction for uploading relationships
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
72
31 by Ben Hutchings
further cleanup of AddToDatabase class
73
        self._calldict = {}
74
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
75
        if self.parent_database_connection:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
76
            # we'll locally use db for short
77
            db = self.parent_database_connection._db
78
8.1.1 by patrickas at co
Add uploader IDs and dates to program nodes
79
            parent_function = db.nodes.create(name=program_name,
80
                                              type='program',
81
                                              uploader=uploader,
82
                                              uploader_id=uploader_id,
83
                                              date=date)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
84
            self._parent_id = parent_function.id
85
16.1.1 by patrickas at co
Substantially speed up program upload by splitting a batch into two
86
            self._funcs_tx = db.transaction(using_globals=False, for_query=True)
87
            self._calls_tx = db.transaction(using_globals=False, for_query=True)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
88
16.1.1 by patrickas at co
Substantially speed up program upload by splitting a batch into two
89
    @staticmethod
90
    def _get_display_name(function_name):
91
        """
92
        Gets the name we will display to the user for this function name.
93
94
        For instance, if function_name were __libc_start_main@plt, we would
95
        return ("__libc_start_main", "plt_stub"). The returned function type is
96
        currently one of "plt_stub", "function_pointer" or "normal".
97
98
        :param function_name: the name straight from objdump of a function
99
        :return: ("display name", "function type")
100
101
        """
102
103
        if function_name[-4:] == "@plt":
104
            display_name = function_name[:-4]
105
            function_group = "plt_stub"
106
        elif function_name[:20] == "_._function_pointer_":
107
            display_name = function_name
108
            function_group = "function_pointer"
109
        else:
110
            display_name = function_name
111
            function_group = "normal"
112
113
        return display_name, function_group
114
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
115
    def add_function(self, function_name):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
116
        """
117
        Adds a function to the program, ready to be sent to the remote database.
118
        If the function name is already in use, this method effectively does
119
          nothing and returns True.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
120
121
        :param function_name: a string which must be alphanumeric
122
        :return: True if the request succeeded, False otherwise
123
        """
124
        if not Validator.validate(function_name):
125
            return False
31 by Ben Hutchings
further cleanup of AddToDatabase class
126
        if function_name in self._calldict:
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
127
            return True
128
16.1.1 by patrickas at co
Substantially speed up program upload by splitting a batch into two
129
        display_name, function_group = self._get_display_name(function_name)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
130
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
131
        query = ('START n = node({}) '
16.1.1 by patrickas at co
Substantially speed up program upload by splitting a batch into two
132
                 'CREATE (n)-[:subject]->(m:func {{type: "{}", name: "{}"}}) '
133
                 'RETURN m.name, id(m)')
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
134
        query = query.format(self._parent_id, function_group, display_name)
135
16.1.1 by patrickas at co
Substantially speed up program upload by splitting a batch into two
136
        self._funcs_tx.append(query)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
137
31 by Ben Hutchings
further cleanup of AddToDatabase class
138
        self._calldict[function_name] = set()
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
139
140
        return True
141
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
142
    def add_function_call(self, fn_calling, fn_called):
143
        """
144
        Adds a function call to the program, ready to be sent to the database.
145
        Effectively does nothing if there is already a function call between
146
          these two functions.
147
        Function names must be alphanumeric for easy security purposes;
31 by Ben Hutchings
further cleanup of AddToDatabase class
148
          returns False if they fail validation.  :param fn_calling: the name of the calling-function as a string.
1.8.1 by James Harkin
altered styling of code in javascript and html
149
          It should already exist in the AddToDatabase; if it does not,
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
150
          this method will create a stub for it.
151
        :param fn_called: name of the function called by fn_calling.
152
          If it does not exist, we create a stub representation for it.
153
        :return: True if successful, False otherwise
154
        """
1.8.2 by patrickas at co
Stopping code-review; all tests pass.
155
        if not all((Validator.validate(fn_calling),
156
                    Validator.validate(fn_called))):
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
157
            return False
31 by Ben Hutchings
further cleanup of AddToDatabase class
158
    
159
        if not fn_called in self._calldict:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
160
            self.add_function(fn_called)
31 by Ben Hutchings
further cleanup of AddToDatabase class
161
        if not fn_calling in self._calldict:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
162
            self.add_function(fn_calling)
163
31 by Ben Hutchings
further cleanup of AddToDatabase class
164
165
        self._calldict[fn_calling].add(fn_called)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
166
167
        return True
168
169
    def commit(self):
170
        """
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
171
        Call this when you are finished with the object.
172
        Changes are not synced to the remote database until this is called.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
173
        """
31 by Ben Hutchings
further cleanup of AddToDatabase class
174
 
175
        functions = (result[0] for result in self._funcs_tx.commit())  # send off the function names
176
        id_funcs = dict(functions)
177
        
178
        logging.info('Functions uploaded.')
16.1.1 by patrickas at co
Substantially speed up program upload by splitting a batch into two
179
180
        # so id_funcs is a dict with id_funcs['name'] == id
31 by Ben Hutchings
further cleanup of AddToDatabase class
181
        for (caller, called) in self._calldict.items():
182
            if not called:
183
                pass
184
            else:
185
                # add all the connections for this caller in one query
186
                caller_id = id_funcs[self._get_display_name(caller)[0]]
187
                called_ids = (id_funcs[self._get_display_name(fn)[0]] for fn in called)
188
189
                query = (' MATCH n WHERE id(n) = {}'
190
                         ' UNWIND[{}] as called_id'
191
                         ' MATCH m WHERE id(m) = called_id'
192
                         ' CREATE (n)-[:calls]->(m)')
193
194
                query = query.format(caller_id, ','.join(str(i) for i in called_ids))
195
196
                self._calls_tx.append(query)
16.1.1 by patrickas at co
Substantially speed up program upload by splitting a batch into two
197
198
        self._calls_tx.commit()
31 by Ben Hutchings
further cleanup of AddToDatabase class
199
        logging.info('Calls uploaded')
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
200
201
202
class FunctionQueryResult:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
203
    """A graph of function calls arising as the result of a Neo4J query."""
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
204
205
    def __init__(self, parent_db, program_name='', rest_output=None):
206
        self.program_name = program_name
1.2.156 by patrickas at co
Finish neatening db_api
207
        self._parent_db_connection = parent_db
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
208
        self.functions = self._rest_node_output_to_graph(rest_output)
209
        self._update_common_functions()
210
211
    def __eq__(self, other):
212
        # we make a dictionary so that we can perform easy comparison
213
        selfdict = {func.name: func for func in self.functions}
214
        otherdict = {func.name: func for func in other.functions}
215
216
        return self.program_name == other.program_name and selfdict == otherdict
217
218
    def _update_common_functions(self):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
219
        """
220
        Loop over all functions: increment the called-by count of their callees.
221
        """
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
222
        for func in self.functions:
223
            for called in func.functions_i_call:
224
                called.number_calling_me += 1
225
226
    def _rest_node_output_to_graph(self, rest_output):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
227
        """
228
        Convert the output of a REST API query into our internal representation.
229
        :param rest_output: output of the REST call as a Neo4j QuerySequence
230
        :return: iterable of <Function>s ready to initialise self.functions.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
231
        """
232
233
        if rest_output is None or not rest_output.elements:
234
            return []
235
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
236
        # how we store this is: a dict
237
        #   with keys  'functionname'
238
        #   and values [the function object we will use,
1.2.156 by patrickas at co
Finish neatening db_api
239
        #               and a set of (function names this function calls),
240
        #               and numeric ID of this node in the Neo4J database]
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
241
242
        result = {}
243
1.2.156 by patrickas at co
Finish neatening db_api
244
        # initial pass for names of functions
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
245
1.2.156 by patrickas at co
Finish neatening db_api
246
        # if the following assertion failed, we've probably called db.query
247
        # to get it to not return client.Node objects, which is wrong.
1.8.2 by patrickas at co
Stopping code-review; all tests pass.
248
        # we attempt to handle this a bit later; this should never arise, but
249
        # we can cope with it happening in some cases, like the test suite
250
251
        if type(rest_output.elements) is not list:
252
            logging.warning('Not a list: {}'.format(type(rest_output.elements)))
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
253
254
        for node_list in rest_output.elements:
1.2.156 by patrickas at co
Finish neatening db_api
255
            assert(isinstance(node_list, list))
256
            for node in node_list:
257
                if isinstance(node, client.Node):
258
                    name = node.properties['name']
259
                    node_id = node.id
260
                    node_type = node.properties['type']
261
                else:  # this is the handling we mentioned earlier;
262
                    # we are a dictionary instead of a list, as for some
263
                    # reason we've returned Raw rather than Node data.
264
                    # We should never reach this code, but just in case.
265
                    name = node['data']['name']
266
                    # hacky workaround to get the id
267
                    node_id = node['self'].split('/')[-1]
268
                    node_type = node['data']['type']
269
270
                result[name] = [Function(self.program_name,
271
                                         function_name=name,
272
                                         function_type=node_type),
273
                                set(),
274
                                node_id]
275
276
        # end initialisation of names-dictionary
277
278
        if self._parent_db_connection is not None:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
279
            # This is the normal case, of extracting results from a server.
280
            # We leave the other case in because it is useful for unit testing.
281
282
            # We collect the name-name pairs of caller-callee, batched for speed
1.2.156 by patrickas at co
Finish neatening db_api
283
            new_tx = self._parent_db_connection.transaction(using_globals=False,
284
                                                            for_query=True)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
285
            for index in result:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
286
                q = ("START n=node({})"
287
                     "MATCH n-[calls:calls]->(m)"
288
                     "RETURN n.name, m.name").format(result[index][2])
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
289
                new_tx.append(q)
290
291
            logging.debug('exec')
292
            results = new_tx.execute()
293
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
294
            # results is a list of query results, each of those being a list of
295
            # calls.
296
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
297
            for call_list in results:
298
                if call_list:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
299
                    # call_list has element 0 being an arbitrary call this
300
                    # function makes; element 0 of that call is the name of the
301
                    # function itself. Think {{'orig', 'b'}, {'orig', 'c'}}.
302
                    orig = call_list[0][0]
303
                    # result['orig'] is [<Function>, ('callee1','callee2')]
304
                    result[orig][1] |= set(list(zip(*call_list.elements))[1])
305
                    # recall: set union is denoted by |
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
306
307
        else:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
308
            # we don't have a parent database connection.
309
            # This has probably arisen because we created this object from a
310
            # test suite, or something like that.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
311
            for node in rest_output.elements:
1.2.156 by patrickas at co
Finish neatening db_api
312
                node_name = node[0].properties['name']
313
                result[node_name][1] |= {relationship.end.properties['name']
314
                                         for relationship in node[0].relationships.outgoing()}
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
315
316
        logging.debug('Relationships complete.')
317
1.2.156 by patrickas at co
Finish neatening db_api
318
        # named_function takes a function name and returns the Function object
319
        # with that name, or None if none exists.
320
        named_function = lambda name: result[name][0] if name in result else None
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
321
322
        for function, calls, node_id in result.values():
1.2.156 by patrickas at co
Finish neatening db_api
323
            what_i_call = [named_function(name)
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
324
                           for name in calls
1.2.156 by patrickas at co
Finish neatening db_api
325
                           if named_function(name) is not None]
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
326
            function.functions_i_call = what_i_call
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
327
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
328
        return [list_element[0]
329
                for list_element in result.values()
330
                if list_element[0]]
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
331
332
    def get_functions(self):
333
        """
1.2.156 by patrickas at co
Finish neatening db_api
334
        :return: a list of Function objects present in the query result
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
335
        """
336
        return self.functions
337
338
    def get_function(self, name):
1.2.156 by patrickas at co
Finish neatening db_api
339
        """
340
        Given a function name, returns the Function object which has that name.
341
        If no function with that name exists, returns None.
342
        """
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
343
        func_list = [func for func in self.functions if func.name == name]
344
        return None if len(func_list) == 0 else func_list[0]
345
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
346
1.2.148 by James Harkin
common cutoff definition in config file
347
def set_common_cutoff(common_def):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
348
    """
349
    Sets the number of incoming connections at which we deem a function 'common'
350
    Default is 10 (which is used if this method is never called).
351
    :param common_def: number of incoming connections
352
    """
1.2.151 by patrickas at co
Update config file to be more self-documenting; restore COMMON_CUTOFF global variable in src/sextant/db_api.py so that it is obvious that the global does exist.
353
    global COMMON_CUTOFF
354
    COMMON_CUTOFF = common_def
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
355
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
356
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
357
class Function(object):
358
    """Represents a function which might appear in a FunctionQueryResult."""
359
360
    def __eq__(self, other):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
361
        funcs_i_call_list = {func.name for func in self.functions_i_call}
362
        funcs_other_calls_list = {func.name for func in other.functions_i_call}
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
363
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
364
        return (self.parent_program == other.parent_program
365
                and self.name == other.name
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
366
                and funcs_i_call_list == funcs_other_calls_list
367
                and self.attributes == other.attributes)
368
369
    @property
370
    def number_calling_me(self):
371
        return self._number_calling_me
372
373
    @number_calling_me.setter
374
    def number_calling_me(self, value):
375
        self._number_calling_me = value
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
376
        self.is_common = (self._number_calling_me > COMMON_CUTOFF)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
377
378
    def __init__(self, program_name='', function_name='', function_type=''):
379
        self.parent_program = program_name
380
        self.attributes = []
381
        self.type = function_type
382
        self.functions_i_call = []
383
        self.name = function_name
384
        self.is_common = False
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
385
        self._number_calling_me = 0
386
        # care: _number_calling_me is not automatically updated, except by
387
        # any invocation of FunctionQueryResult._update_common_functions.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
388
389
390
class SextantConnection:
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
391
    """
392
    RESTful connection to a remote database.
393
    It can be used to create/delete/query programs.
394
    """
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
395
8.1.2 by patrickas at co
Add sextant audit command
396
    ProgramWithMetadata = namedtuple('ProgramWithMetadata',
397
                                     ['uploader', 'uploader_id',
3.2.8 by patrickas at co
ProgramWithMetadata.number renamed to number_of_funcs
398
                                      'program_name', 'date', 
399
                                      'number_of_funcs'])
8.1.2 by patrickas at co
Add sextant audit command
400
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
401
    def __init__(self, url):
402
        self.url = url
403
        self._db = GraphDatabase(url)
404
405
    def new_program(self, name_of_program):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
406
        """
407
        Request that the remote database create a new program with the given name.
408
        This procedure will create a new program remotely; you can manipulate
1.8.1 by James Harkin
altered styling of code in javascript and html
409
          that program using the returned AddToDatabase object.
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
410
        The name can appear in the database already, but this is not recommended
411
          because then delete_program will not know which to delete. Check first
412
          using self.check_program_exists.
413
        The name specified must pass Validator.validate()ion; this is a measure
414
          to prevent Cypher injection attacks.
415
        :param name_of_program: string program name
1.13.6 by Phil Connell
new_program() should raise on errors, not return a different type
416
        :return: AddToDatabase instance if successful
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
417
        """
418
419
        if not Validator.validate(name_of_program):
1.13.6 by Phil Connell
new_program() should raise on errors, not return a different type
420
            raise ValueError(
421
                "{} is not a valid program name".format(name_of_program))
8.1.1 by patrickas at co
Add uploader IDs and dates to program nodes
422
        
423
        uploader = getpass.getuser()
424
        uploader_id = os.getuid()
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
425
1.8.2 by patrickas at co
Stopping code-review; all tests pass.
426
        return AddToDatabase(sextant_connection=self,
8.1.1 by patrickas at co
Add uploader IDs and dates to program nodes
427
                             program_name=name_of_program,
428
                             uploader=uploader, uploader_id=uploader_id,
429
                             date=str(datetime.now()))
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
430
431
    def delete_program(self, name_of_program):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
432
        """
433
        Request that the remote database delete a specified program.
434
        :param name_of_program: a string which must be alphanumeric only
435
        :return: bool(request succeeded)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
436
        """
437
        if not Validator.validate(name_of_program):
438
            return False
439
440
        q = """MATCH (n) WHERE n.name= "{}" AND n.type="program"
441
        OPTIONAL MATCH (n)-[r]-(b) OPTIONAL MATCH (b)-[rel]-()
442
        DELETE  b,rel DELETE n, r""".format(name_of_program)
443
444
        self._db.query(q)
445
446
        return True
447
448
    def _execute_query(self, prog_name='', query=''):
449
        """
450
        Executes a Cypher query against the remote database.
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
451
        Note that this returns a FunctionQueryResult, so is unsuitable for any
452
          other expected outputs (such as lists of names). For those instances,
453
          it is better to run self._parent_database_connection_object.query
454
          explicitly.
455
        Intended only to be used for non-updating queries
456
          (such as "get functions" rather than "create").
457
        :param prog_name: name of the program the result object will reflect
458
        :param query: verbatim query we wish the server to execute
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
459
        :return: a FunctionQueryResult corresponding to the server's output
460
        """
461
        rest_output = self._db.query(query, returns=client.Node)
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
462
463
        return FunctionQueryResult(parent_db=self._db,
464
                                   program_name=prog_name,
465
                                   rest_output=rest_output)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
466
467
    def get_program_names(self):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
468
        """
469
        Execute query to retrieve a list of all programs in the database.
470
        Any name in this list can be used verbatim in any SextantConnection
471
          method which requires a program-name input.
472
        :return: a list of function-name strings.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
473
        """
474
        q = """MATCH (n) WHERE n.type = "program" RETURN n.name"""
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
475
        program_names = self._db.query(q, returns=str).elements
476
477
        result = [el[0] for el in program_names]
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
478
479
        return set(result)
480
8.1.2 by patrickas at co
Add sextant audit command
481
    def programs_with_metadata(self):
482
        """
483
        Returns a set of namedtuples which represent the current database.
3.2.9 by patrickas at co
Style fix: newlines in docstring
484
        
8.1.3 by patrickas at co
Add sextant audit command
485
        The namedtuples have .uploader, .uploader_id, .program_name, .date,
3.2.8 by patrickas at co
ProgramWithMetadata.number renamed to number_of_funcs
486
        .number_of_funcs.
8.1.2 by patrickas at co
Add sextant audit command
487
        :return: set of namedtuples
3.2.9 by patrickas at co
Style fix: newlines in docstring
488
       
8.1.2 by patrickas at co
Add sextant audit command
489
        """
3.2.9 by patrickas at co
Style fix: newlines in docstring
490
        
8.1.2 by patrickas at co
Add sextant audit command
491
        q = ("MATCH (base) WHERE base.type = 'program' "
8.1.3 by patrickas at co
Add sextant audit command
492
             "MATCH (base)-[:subject]->(n)"
493
             "RETURN base.uploader, base.uploader_id, base.name, base.date, count(n)")
8.1.2 by patrickas at co
Add sextant audit command
494
        result = self._db.query(q)
495
        return {self.ProgramWithMetadata(*res) for res in result}
496
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
497
    def check_program_exists(self, program_name):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
498
        """
499
        Execute query to check whether a program with the given name exists.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
500
        Returns False if the program_name fails validation against Validator.
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
501
        :return: bool(the program exists in the database).
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
502
        """
503
504
        if not Validator.validate(program_name):
505
            return False
506
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
507
        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "
508
             "RETURN count(base)").format(program_name)
509
510
        result = self._db.query(q, returns=int)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
511
        return result.elements[0][0] > 0
512
513
    def check_function_exists(self, program_name, function_name):
514
        """
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
515
        Execute query to check whether a function with the given name exists.
516
        We only check for functions which are children of a program with the
517
          given program_name.
518
        :param program_name: string name of the program within which to check
519
        :param function_name: string name of the function to check for existence
520
        :return: bool(names validate correctly, and function exists in program)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
521
        """
522
        if not self.check_program_exists(program_name):
523
            return False
524
525
        if not Validator.validate(program_name):
526
            return False
527
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
528
        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program'"
529
             "MATCH (base)-[r:subject]->(m) WHERE m.name = '{}'"
530
             "RETURN count(m)").format(program_name, function_name)
531
532
        result = self._db.query(q, returns=int)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
533
        return result.elements[0][0] > 0
534
535
    def get_function_names(self, program_name):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
536
        """
537
        Execute query to retrieve a list of all functions in the program.
538
        Any of the output names can be used verbatim in any SextantConnection
539
          method which requires a function-name input.
540
        :param program_name: name of the program whose functions to retrieve
541
        :return: None if program_name doesn't exist in the remote database,
542
          a set of function-name strings otherwise.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
543
        """
544
545
        if not self.check_program_exists(program_name):
546
            return None
547
1.8.2 by patrickas at co
Stopping code-review; all tests pass.
548
        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "
549
             "MATCH (base)-[r:subject]->(m) "
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
550
             "RETURN  m.name").format(program_name)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
551
        return {func[0] for func in self._db.query(q)}
552
553
    def get_all_functions_called(self, program_name, function_calling):
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
554
        """
555
        Execute query to find all functions called by a function (indirectly).
556
        If the given function is not present in the program, returns None;
557
          likewise if the program_name does not exist.
558
        :param program_name: a string name of the program we wish to query under
559
        :param function_calling: string name of a function whose children to find
560
        :return: FunctionQueryResult, maximal subgraph rooted at function_calling
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
561
        """
562
563
        if not self.check_program_exists(program_name):
564
            return None
565
566
        if not self.check_function_exists(program_name, function_calling):
567
            return None
568
29 by Ben Hutchings
further tidy-up of create_objects
569
        # @@@ type in query - does it matter?
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
570
        q = """MATCH (base) WHERE base.name = '{}' ANd base.type = 'program'
571
            MATCH (base)-[:subject]->(m) WHERE m.name='{}'
572
            MATCH (m)-[:calls*]->(n)
573
            RETURN distinct n, m""".format(program_name, function_calling)
574
575
        return self._execute_query(program_name, q)
576
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
577
    def get_all_functions_calling(self, program_name, function_called):
578
        """
579
        Execute query to find all functions which call a function (indirectly).
580
        If the given function is not present in the program, returns None;
581
          likewise if the program_name does not exist.
582
        :param program_name: a string name of the program we wish to query
583
        :param function_called: string name of a function whose parents to find
584
        :return: FunctionQueryResult, maximal connected subgraph with leaf function_called
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
585
        """
586
587
        if not self.check_program_exists(program_name):
588
            return None
589
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
590
        if not self.check_function_exists(program_name, function_called):
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
591
            return None
592
593
        q = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'
594
            MATCH (base)-[r:subject]->(m) WHERE m.name='{}'
595
            MATCH (n)-[:calls*]->(m) WHERE n.name <> '{}'
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
596
            RETURN distinct n , m"""
597
        q = q.format(program_name, function_called, program_name)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
598
599
        return self._execute_query(program_name, q)
600
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
601
    def get_call_paths(self, program_name, function_calling, function_called):
602
        """
603
        Execute query to find all possible routes between two specific nodes.
604
        If the given functions are not present in the program, returns None;
605
          ditto if the program_name does not exist.
606
        :param program_name: string program name
607
        :param function_calling: string
608
        :param function_called: string
609
        :return: FunctionQueryResult, the union of all subgraphs reachable by
610
          adding a source at function_calling and a sink at function_called.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
611
        """
612
613
        if not self.check_program_exists(program_name):
614
            return None
615
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
616
        if not self.check_function_exists(program_name, function_called):
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
617
            return None
618
619
        if not self.check_function_exists(program_name, function_calling):
620
            return None
621
622
        q = r"""MATCH (pr) WHERE pr.name = '{}' AND pr.type = 'program'
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
623
                MATCH p=(start {{name: "{}" }})-[:calls*]->(end {{name:"{}"}})
624
                  WHERE (pr)-[:subject]->(start)
625
                WITH DISTINCT nodes(p) AS result
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
626
                UNWIND result AS answer
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
627
                RETURN answer"""
628
        q = q.format(program_name, function_calling, function_called)
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
629
630
        return self._execute_query(program_name, q)
631
632
    def get_whole_program(self, program_name):
633
        """Execute query to find the entire program with a given name.
634
        If the program is not present in the remote database, returns None.
635
        :param: program_name: a string name of the program we wish to return.
636
        :return: a FunctionQueryResult consisting of the program graph.
637
        """
638
639
        if not self.check_program_exists(program_name):
640
            return None
641
642
        query = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'
643
                MATCH (base)-[subject:subject]->(m)
644
                RETURN DISTINCT (m)""".format(program_name)
645
646
        return self._execute_query(program_name, query)
647
648
    def get_shortest_path_between_functions(self, program_name, func1, func2):
649
        """
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
650
        Execute query to get a single, shortest, path between two functions.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
651
        :param program_name: string name of the program we wish to search under
652
        :param func1: the name of the originating function of our shortest path
1.2.155 by patrickas at co
Many style changes, extra documentation, nowhere near done
653
        :param func2: the name of the function at which to terminate the path
654
        :return: FunctionQueryResult shortest path between func1 and func2.
1.2.145 by patrickas at co
Config file ~/.sextant/sextant.conf is now used for Neo4j location and port number.
655
        """
656
        if not self.check_program_exists(program_name):
657
            return None
658
659
        if not self.check_function_exists(program_name, func1):
660
            return None
661
662
        if not self.check_function_exists(program_name, func2):
663
            return None
664
665
        q = """MATCH (func1 {{ name:"{}" }}),(func2 {{ name:"{}" }}),
666
            p = shortestPath((func1)-[:calls*]->(func2))
667
            UNWIND nodes(p) AS ans
668
            RETURN ans""".format(func1, func2)
669
670
        return self._execute_query(program_name, q)