1
# Copyright 2007-2009 Owen Taylor
3
# This file is part of Reinteract and distributed under the terms
4
# of the BSD license. See the file COPYING in the Reinteract
5
# distribution for full details.
7
########################################################################
10
from retokenize import *
12
# These are keywords where completion doesn't make sense afterwords, for
14
NO_COMPLETION_KEYWORDS = set([
15
'as', 'class', 'def', 'for', 'lambda', # introduce new names
16
'from', 'import', # different completion space
17
'break', 'continue', 'finally', 'pass', 'try' # nothing allowed after keyword
20
# These are tokens that the Python grammer won't allow a TOKEN_NAME after; by
21
# preventing completion here we prevent auto-complete from popping up completions
22
# to a name when the user was trying to enter a keyword (like 'for x <in>')
23
NO_COMPLETION_TOKENS = set([
24
TOKEN_NAME, TOKEN_STRING, TOKEN_NUMBER, TOKEN_JUNK, TOKEN_RPAREN,
25
TOKEN_RSQB, TOKEN_BUILTIN_CONSTANT
28
def get_prefixes(items):
31
for l in xrange(1, len(s) + 1):
35
# We don't complete to keywords currently, so when in spontaneous completion
36
# mode, it's annoying when you trying to type a keyword, we offer to complete
37
# to random variables. Even if we supported keyword completion (nice addition)
38
# it would still be annoying for common keywords
39
KEYWORD_PREFIXES = get_prefixes([
40
'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del',
41
'elif', 'else', 'except', 'exec', 'finally', 'for', 'from'
42
'if', 'import', 'in', 'is', 'global', 'lambda', 'not', 'or',
43
'pass', 'print', 'raise', 'return', 'try', 'with', 'while', 'yield'
46
class _TokenIter(object):
47
def __init__(self, statement, line, i):
48
self.statement = statement
54
self.token_type, self.start, self.end, self.flags = self.statement.tokens[self.line][self.i]
63
raise StopIteration("Already at beginning")
64
if len(self.statement.tokens[l]) > 0:
68
self.i = len(self.statement.tokens[l]) - 1
72
if self.i + 1 < len(self.statement.tokens[self.line]):
77
if l >= len(self.statement.tokens):
78
raise StopIteration("Already at end")
79
if len(self.statement.tokens[l]) > 0:
87
return self.flags & FLAG_OPEN != 0
90
return self.flags & FLAG_CLOSE != 0
92
class TokenizedStatement(object):
98
def set_lines(self, lines):
99
"""Set the lines in the Tokenized statement
101
Returns None if nothing changed, otherwise returns a range
102
(start, end) of lines that were added or changed. A return of
103
an empty range means that some lines were deleted, but none
108
# We want to avoid retokenizing everything on pure insertions
109
# to make editing not egregiously O(n^2); we don't care much
110
# if we have to retokenize on other cases.
112
old_lines = self.lines
113
old_tokens = self.tokens
114
old_stacks = self.stacks
117
tokens = self.tokens = [None] * len(lines)
118
stacks = self.stacks = [None] * len(lines)
120
# Iterate forward, find an unchanged segment of lines at the front
122
m = min(len(lines), len(old_lines))
126
if lines[i] != old_lines[i]:
128
tokens[i] = old_tokens[i]
129
stacks[i] = old_stacks[i]
132
if i == len(lines) and i == len(old_lines): # Nothing to do
135
# Iterate backwards, find an unchanged segment of lines at the end
137
m = min(len(lines) - i, len(old_lines) - i)
140
new_pos = len(lines) - 1
141
old_pos = len(old_lines) - 1
143
if lines[new_pos] != old_lines[old_pos]:
145
tokens[new_pos] = old_tokens[old_pos]
146
stacks[new_pos] = old_stacks[old_pos]
151
# Start tokenizing at the first changed line
154
stack = stacks[i - 1]
161
while i < len(lines):
163
# Once we are in the trailing section if identical
164
# lines, and the stack is the same as it was before,
166
old_i = old_pos + i - new_pos - 1
170
old_stack = old_stacks[old_i]
172
if stack == old_stack:
175
if change_start == -1:
179
tokens[i], stack = tokenize_line(lines[i], stack)
183
return (change_start, change_end)
186
return "\n".join(self.lines)
188
def get_tokens(self, line):
189
return self.tokens[line]
191
def _get_iter(self, line, index):
192
# Get an iterator pointing to the token containing the specified
193
# position. Return None if there no such token
194
for i, (_, start, end, _) in enumerate(self.tokens[line]):
197
if start <= index and end > index:
198
return _TokenIter(self, line, i)
202
def _get_iter_before(self, line, index):
203
# Get an iterator pointing the last token that is not completely after
204
# the specified position. Returns None if the position is before any tokens
206
tokens = self.tokens[line]
207
if len(tokens) == 0 or index <= tokens[0][1]:
210
if len(self.tokens[line]) > 0:
211
return _TokenIter(self, line, len(self.tokens[line]) - 1)
215
for i, (_, start, end, _) in enumerate(tokens):
217
return _TokenIter(self, line, i - 1)
219
return _TokenIter(self, line, len(tokens) - 1)
221
def _get_start_iter(self):
222
# Get an iterator pointing to the first token, or None if the statement
226
while line < len(self.lines) and len(self.tokens[line]) == 0:
229
if line == len(self.lines) or len(self.tokens[line]) == 0:
232
return _TokenIter(self, line, 0)
234
def get_pair_location(self, line, index):
235
iter = self._get_iter(line, index)
239
# We don't do pair matching on strings; it's obvious from the
240
# fontification, even though strings can participate in the stack
241
if iter.token_type == TOKEN_STRING:
243
elif iter.is_close():
251
return iter.line, iter.start
263
return iter.line, iter.start
267
except StopIteration:
272
def get_next_line_indent(self, line):
275
prev_line = base_line - 1
278
if (len(self.stacks[prev_line]) == 0 and
279
(len(self.tokens[prev_line]) == 0 or self.tokens[prev_line][-1][0] != TOKEN_CONTINUATION)):
282
base_line = prev_line
284
indent_text = re.match(r"^[\t ]*", self.lines[base_line]).group(0)
287
tokens = self.tokens[line]
289
if (len(tokens) > 0 and tokens[-1][0] == TOKEN_COLON or
290
len(tokens) > 1 and tokens[-1][0] == TOKEN_COMMENT and tokens[-2][0] == TOKEN_COLON):
292
elif len(self.stacks[line]) > 0:
294
elif len(tokens) > 0 and tokens[-1][0] == TOKEN_CONTINUATION:
297
if extra_indent != 0:
298
indent_text += " " * extra_indent
302
def __statement_is_import(self):
303
iter = self._get_start_iter()
305
while iter.token_type == TOKEN_CONTINUATION:
308
except StopIteration:
310
if iter.token_type == TOKEN_KEYWORD:
311
keyword = self.lines[iter.line][iter.start:iter.end]
312
if keyword == 'import' or keyword == 'from':
317
# See if the iter points to a place where completion on the next
318
# word doesn't make sense.
319
def __check_no_completion_after(self, iter):
320
if iter.token_type == TOKEN_KEYWORD:
321
keyword = self.lines[iter.line][iter.start:iter.end]
323
# example: no completion after 'for'
324
return keyword in NO_COMPLETION_KEYWORDS
326
if iter.token_type in NO_COMPLETION_TOKENS:
327
# example: no completion after 'for x '
332
def __resolve_names(self, names, scope):
335
# First name is resolved against the scope
341
# Subsequent names resolved
344
obj = getattr(obj, name)
345
except AttributeError:
350
def __sort_completions(self, completions):
351
# Sort a set of completions with _ and __ names at the end.
352
# (modifies completions and then returns it for convenience)
354
def compare_completions(a, b):
358
if n_a.startswith("__") and not n_b.startswith("__"):
360
elif n_b.startswith("__") and not n_a.startswith("__"):
362
elif n_a.startswith("_") and not n_b.startswith("_"):
364
elif n_b.startswith("_") and not n_a.startswith("_"):
369
completions.sort(compare_completions)
373
def __list_scope(self, scope):
374
# List possible completions given a scope directionary
376
possible = scope.items()
377
if '__builtins__' in scope:
378
builtins = scope['__builtins__']
379
if not isinstance(builtins, dict):
380
builtins = dir(builtins)
384
possible.append((k, builtins[k]))
388
def __find_no_symbol_completions(self, scope):
389
# Return the completions to offer when we don't have a start at a symbol
392
for completion, obj in self.__list_scope(scope):
393
result.append((completion, completion, obj))
395
return self.__sort_completions(result)
397
def find_completions(self, line, index, scope, min_length=0):
398
"""Returns a list of possible completions at the given line and index.
400
Scope is the scope to start calculating the comptions from. Each element
401
in the returned list is a tuple of (display_form, text_to_insert, object_completed_to)'
402
where object_completed_to can be used to determine the type of the completion
403
or get docs about it.
405
@param min_length if supplied, the minimum length to require for an isolated
406
name before we complete against the scope. This is useful if we are suggesting
407
completions without the user explicitly requesting it.
411
# We turn off completion within an import statement, since it's less
412
# than useful to complete to symbols in the current scope. Better would be to
413
# actually examine the path and complete to real imports.
414
if self.__statement_is_import():
417
# We can offer completions if we are at a position of the form:
418
# ([TOKEN_NAME|TOKEN_BUILTIN_CONSTANT] TOKEN_DOT)* (TOKEN_NAME|TOKEN_KEYWORD|TOKEN_BUILTIN_CONSTANT)?
420
# We work backwards from the last name, and build a list of names, then resolve
421
# that list of names against the scope.
423
# Look for a token right before the specified position. index - 1 is OK here
424
# even though that byte may note b a character start since we are just
425
# interested in a position inside the token
426
iter = self._get_iter(line, index - 1)
427
if iter != None and (iter.token_type == TOKEN_KEYWORD or
428
iter.token_type == TOKEN_NAME or
429
iter.token_type == TOKEN_BUILTIN_CONSTANT):
430
end = min(iter.end, index)
431
names = [self.lines[iter.line][iter.start:end]]
434
except StopIteration:
437
# For a TOKEN_DOT, we can be more forgiving and accept white space between the
438
# token and the current position
439
iter = self._get_iter_before(line, index)
440
if iter != None and iter.token_type == TOKEN_DOT:
444
# This is a non-exhaustive list of places where we know that we shouldn't complete to the
445
# the scope. (We could do better by special casing actual completions for TOKEN_RSQB, TOKEN_RBRACE,
447
elif iter != None and iter.token_type in (TOKEN_NAME, TOKEN_BUILTIN_CONSTANT, TOKEN_RPAREN, TOKEN_RSQB, TOKEN_RBRACE,
448
TOKEN_STRING, TOKEN_NUMBER):
450
elif iter != None and self.__check_no_completion_after(iter):
453
return self.__find_no_symbol_completions(scope)
455
while iter and iter.token_type == TOKEN_DOT:
458
except StopIteration:
461
if iter.token_type != TOKEN_NAME and iter.token_type != TOKEN_BUILTIN_CONSTANT:
464
names.insert(0, self.lines[iter.line][iter.start:iter.end])
468
except StopIteration:
471
if iter and self.__check_no_completion_after(iter):
474
# We resolve the leading portion of the name path
476
object = self.__resolve_names(names[0:-1], scope)
480
if len(names[0]) < min_length:
482
# When we are in "spontaneous mode" (slightly hackish to use min_length
483
# for this), we don't want to complete if the user might be typing a keyword
484
if min_length > 0 and names[0] in KEYWORD_PREFIXES:
489
# Then we complete the last element of the name path against what we resolved
490
# to, or against the scope (if there was just one name)
493
to_complete = names[-1]
495
for completion, obj in self.__list_scope(scope):
496
if completion.startswith(to_complete):
497
result.append((completion, completion[len(to_complete):], obj))
499
for completion in dir(object):
500
if completion.startswith(to_complete):
502
if inspect.ismodule(object):
503
object_completed_to = getattr(object, completion, None)
504
# We special case these because obj.__class__.__module__/__doc__
505
# are also a strings, not a method/property
506
elif completion != '__module__' and completion != '__doc__':
507
# Using the attribute of the class over the attribute of
508
# the object gives us better docs on properties
510
klass = getattr(object, '__class__')
511
object_completed_to = getattr(klass, completion)
512
except AttributeError:
513
object_completed_to = getattr(object, completion)
515
object_completed_to = None
517
result.append((completion, completion[len(to_complete):], object_completed_to))
519
return self.__sort_completions(result)
521
def get_object_at_location(self, line, index, scope, result_scope=None, include_adjacent=False):
522
"""Find the object at a particular location within the statement.
524
Returns a tuple of (object, token_start_line, token_start_index, token_end_line, token_end_index)
525
or None, None, None, None, None if there is no object
527
@param scope: scope dictionary to start resolving names from.
528
@param result_scope: scope to resolve names from on the left side of an assignment
529
@param include_adjacent: if False, then line/index identifies a character in the buffer. If True,
530
then line/index identifies a position between characters, and symbols before or after that
531
position are included.
535
NO_RESULT = None, None, None, None, None
537
# Names within an import statement aren't there yet
538
if self.__statement_is_import():
541
# We can resolve the object if we are inside the final token of a sequence of the form:
542
# ([TOKEN_NAME|TOKEN_BUILTIN_CONSTANT] TOKEN_DOT)* (TOKEN_NAME|TOKEN_KEYWORD|TOKEN_BUILTIN_CONSTANT)
544
# We work backwards from the last name, and build a list of names, then resolve
545
# that list of names against the scope
547
iter = self._get_iter(line, index)
548
if iter != None and not (iter.token_type == TOKEN_KEYWORD or
549
iter.token_type == TOKEN_NAME or
550
iter.token_type == TOKEN_BUILTIN_CONSTANT):
553
if iter == None and include_adjacent and index > 0:
554
iter = self._get_iter(line, index - 1)
556
if iter != None and not (iter.token_type == TOKEN_KEYWORD or
557
iter.token_type == TOKEN_NAME or
558
iter.token_type == TOKEN_BUILTIN_CONSTANT):
564
start_index = iter.start
567
names = [self.lines[iter.line][iter.start:iter.end]]
570
except StopIteration:
573
while iter.token_type == TOKEN_DOT:
576
except StopIteration:
579
if iter.token_type != TOKEN_NAME and iter.token_type != TOKEN_BUILTIN_CONSTANT:
582
names.insert(0, self.lines[iter.line][iter.start:iter.end])
584
if result_scope != None:
588
except StopIteration:
591
if iter.token_type == TOKEN_EQUAL or iter.token_type == TOKEN_AUGEQUAL:
595
obj = self.__resolve_names(names, scope)
597
return obj, line, start_index, line, end_index
602
return "TokenizedStatement" + repr([([(t[0], line[t[1]:t[2]]) for t in tokens], stack) for line, tokens, stack in zip(self.lines, self.tokens, self.stacks)])
604
if __name__ == '__main__':
609
def expect(ts, expected):
611
for line, tokens, stack in zip(ts.lines, ts.tokens, ts.stacks):
612
elements = [ line[t[1]:t[2]] for t in tokens ]
614
elements.append(stack)
615
result.append(elements)
617
if result != expected:
618
print "For:\n%s\nGot:\n%s\nExpected:\n%s\n" % (
620
"\n".join([repr(l) for l in result]),
621
"\n".join([repr(l) for l in expected]))
625
ts = TokenizedStatement()
626
assert ts.set_lines(["1"]) == (0, 1)
629
ts = TokenizedStatement()
630
assert ts.set_lines(['"""a','b"""']) == (0, 2)
631
expect(ts, [['"""a',['"""']],['b"""']])
633
ts = TokenizedStatement()
634
assert ts.set_lines(['(1 + 2','+ 3 + 4)']) == (0, 2)
635
expect(ts, [['(', '1', '+', '2', ['(']], ['+', '3', '+', '4', ')']])
637
assert ts.set_lines(['(1 + 2','+ 3 + 4)']) == None
638
expect(ts, [['(', '1', '+', '2', ['(']], ['+', '3', '+', '4', ')']])
640
assert ts.set_lines(['(1 + 2','+ 5 + 6)']) == (1, 2)
641
expect(ts, [['(', '1', '+', '2', ['(']], ['+', '5', '+', '6', ')']])
643
assert ts.set_lines(['(3 + 4','+ 5 + 6)']) == (0, 1)
644
expect(ts, [['(', '3', '+', '4', ['(']], ['+', '5', '+', '6', ')']])
646
assert ts.set_lines(['((1 + 2','+ 5 + 6)']) == (0, 2)
647
expect(ts, [['(', '(', '1', '+', '2', ['(', '(']], ['+', '5', '+', '6', ')', ['(']]])
649
assert ts.set_lines(['((1 + 2', '+ 3 + 4)', '+ 5 + 6)']) == (1, 3)
650
expect(ts, [['(', '(', '1', '+', '2', ['(', '(']], ['+', '3', '+', '4', ')', ['(']], ['+', '5', '+', '6', ')']])
652
assert ts.set_lines(['((1 + 2', '+ 3 + 4)']) == (-1, -1) # truncation
654
### Tests of iterator functionality
656
ts = TokenizedStatement()
657
ts.set_lines(['(1 + ','2)'])
658
assert ts._get_iter(0, 2) == None
659
assert ts._get_iter(1, 2) == None
661
i = ts._get_iter(0, 3)
662
assert i.token_type == TOKEN_PUNCTUATION
667
assert i.token_type == TOKEN_NUMBER
672
assert i.token_type == TOKEN_LPAREN
679
except StopIteration:
685
i = ts._get_iter(0, 3)
698
except StopIteration:
704
### Tests of paired punctuation
706
ts = TokenizedStatement()
707
ts.set_lines(['a = ([(1 + ',
711
# Pair location is not at a random position
712
assert ts.get_pair_location(1, 2) == (None, None)
713
# Pair location is None for an unpaired closed (which isn't a close at all)
714
assert ts.get_pair_location(2, 1) == (None, None)
715
# Pair location is None for an unpaired open
716
assert ts.get_pair_location(0, 4) == (None, None)
719
assert ts.get_pair_location(0, 5) == (2, 0)
720
assert ts.get_pair_location(1, 4) == (1, 16)
723
assert ts.get_pair_location(2, 0) == (0, 5)
724
assert ts.get_pair_location(1, 16) == (1, 4)
726
### Tests of get_next_line_indent()
728
ts = TokenizedStatement()
730
lines = ([('if (True):', 4),
732
('if (True): # a true statement', 4),
740
(' string finish"""', 0),
745
ts.set_lines([text for text, _ in lines])
746
for i, (text, expected) in enumerate(lines):
747
next_line_indent = ts.get_next_line_indent(i).count(" ")
748
if next_line_indent != expected:
749
print "For %s, got next_line_indent=%d, expected %d" % (text, next_line_indent, expected)
752
### Tests of find_completions()
769
def test_completion(line, expected, index = -1, min_length=0):
773
ts = TokenizedStatement()
775
completions = [n for n, _, _ in ts.find_completions(0, index, scope, min_length=min_length)]
776
if completions != expected:
777
print "For %s/%d, got %s, expected %s" % (line,index,completions,expected)
780
def test_multiline_completion(lines, line, index, expected):
781
ts = TokenizedStatement()
783
completions = [n for n, _, _ in ts.find_completions(line, index, scope)]
784
if completions != expected:
785
print "For %s/%d/%d, got %s, expected %s" % (lines,line,index,completions,expected)
788
test_completion("a", ['a', 'abcd'])
789
test_completion("ab", ['abcd'])
790
test_completion("ab", ['abcd'], min_length=2)
791
test_completion("ab", [], min_length=3)
792
test_completion("ab", ['a', 'abcd'], index=1)
793
test_completion("foo.", [])
794
test_completion("(a + b)", [])
795
test_completion("", ['a', 'abcd', 'bcde', 'indecent', 'len', 'obj', "__builtins__"])
796
test_completion("foo + ", ['a', 'abcd', 'bcde', 'indecent', 'len', 'obj', "__builtins__"])
797
test_completion("l", ['len'])
798
test_completion("obj.", ['method', '__doc__', '__module__'])
799
test_completion("obj.m", ['method', '__doc__', '__module__'], index=4)
800
test_completion("obj.m", ['method'])
801
test_completion("obj.m().n", [])
802
test_completion("import b, a", [])
803
test_completion("from foo import a", [])
804
test_completion("for a", []) # No completion to existing variables
805
test_completion("for a in", []) # Don't complete to 'indecent', syntax doesn't allow it
806
test_completion("in", [], min_length=2) # Don't complete to 'indecent', because we have a keyword prefix
808
test_multiline_completion(["(obj.", "m"], 1, 0, ['method', '__doc__', '__module__'])
809
test_multiline_completion(["(obj.", "m"], 1, 1, ['method'])
811
### Tests of get_object_at_location()
813
def test_object_at_location(line, index, expected, include_adjacent=False):
814
ts = TokenizedStatement()
816
obj, _, _, _, _ = ts.get_object_at_location(0, index, scope, include_adjacent=include_adjacent)
818
print "For %s/%d, got %s, expected %s" % (line,index,obj,expected)
821
test_object_at_location("a", 0, 1)
822
test_object_at_location("a", 1, None)
823
test_object_at_location("obj.method", 0, scope['obj'])
824
test_object_at_location("obj.method", 1, scope['obj'])
825
test_object_at_location("obj.method", 4, scope['obj'].method)
826
test_object_at_location("obj.met", 4, None)
828
test_object_at_location("c a b", 2, 1, include_adjacent=True)
829
test_object_at_location("c a b", 3, None, include_adjacent=False)
830
test_object_at_location("c a b", 3, 1, include_adjacent=True)