1
"""A lexical analyzer class for simple shell-like syntaxes."""
3
# Module and documentation by Eric S. Raymond, 21 Dec 1998
4
# Input stacking and error message cleanup added by ESR, March 2000
5
# push_source() and pop_source() made explicit by ESR, January 2001.
6
# Posix compliance, split(), string arguments, and
7
# iterator interface by Gustavo Niemeyer, April 2003.
12
from collections import deque
14
from io import StringIO
16
__all__ = ["shlex", "split", "quote"]
19
"A lexical analyzer class for simple shell-like syntaxes."
20
def __init__(self, instream=None, infile=None, posix=False):
21
if isinstance(instream, str):
22
instream = StringIO(instream)
23
if instream is not None:
24
self.instream = instream
27
self.instream = sys.stdin
35
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
36
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
38
self.wordchars += ('ĆĆ Ć”Ć¢Ć£Ć¤Ć„Ć¦Ć§ĆØĆ©ĆŖƫƬĆĆ®ĆÆĆ°Ć±Ć²Ć³Ć“ĆµĆ¶ĆøĆ¹ĆŗĆ»Ć¼Ć½Ć¾Ćæ'
39
'ĆĆĆĆĆĆ
ĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆĆ')
40
self.whitespace = ' \t\r\n'
41
self.whitespace_split = False
44
self.escapedquotes = '"'
46
self.pushback = deque()
50
self.filestack = deque()
53
print('shlex: reading from %s, line %d' \
54
% (self.instream, self.lineno))
56
def push_token(self, tok):
57
"Push a token onto the stack popped by the get_token method"
59
print("shlex: pushing token " + repr(tok))
60
self.pushback.appendleft(tok)
62
def push_source(self, newstream, newfile=None):
63
"Push an input source onto the lexer's input source stack."
64
if isinstance(newstream, str):
65
newstream = StringIO(newstream)
66
self.filestack.appendleft((self.infile, self.instream, self.lineno))
68
self.instream = newstream
71
if newfile is not None:
72
print('shlex: pushing to file %s' % (self.infile,))
74
print('shlex: pushing to stream %s' % (self.instream,))
77
"Pop the input source stack."
79
(self.infile, self.instream, self.lineno) = self.filestack.popleft()
81
print('shlex: popping to %s, line %d' \
82
% (self.instream, self.lineno))
86
"Get a token from the input stream (or from stack if it's nonempty)"
88
tok = self.pushback.popleft()
90
print("shlex: popping token " + repr(tok))
92
# No pushback. Get a token.
93
raw = self.read_token()
95
if self.source is not None:
96
while raw == self.source:
97
spec = self.sourcehook(self.read_token())
99
(newfile, newstream) = spec
100
self.push_source(newstream, newfile)
101
raw = self.get_token()
102
# Maybe we got EOF instead?
103
while raw == self.eof:
104
if not self.filestack:
108
raw = self.get_token()
109
# Neither inclusion nor EOF
112
print("shlex: token=" + repr(raw))
114
print("shlex: token=EOF")
117
def read_token(self):
121
nextchar = self.instream.read(1)
123
self.lineno = self.lineno + 1
125
print("shlex: in state", repr(self.state), \
126
"I see character:", repr(nextchar))
127
if self.state is None:
128
self.token = '' # past end of file
130
elif self.state == ' ':
132
self.state = None # end of file
134
elif nextchar in self.whitespace:
136
print("shlex: I see whitespace in whitespace state")
137
if self.token or (self.posix and quoted):
138
break # emit current token
141
elif nextchar in self.commenters:
142
self.instream.readline()
143
self.lineno = self.lineno + 1
144
elif self.posix and nextchar in self.escape:
146
self.state = nextchar
147
elif nextchar in self.wordchars:
148
self.token = nextchar
150
elif nextchar in self.quotes:
152
self.token = nextchar
153
self.state = nextchar
154
elif self.whitespace_split:
155
self.token = nextchar
158
self.token = nextchar
159
if self.token or (self.posix and quoted):
160
break # emit current token
163
elif self.state in self.quotes:
165
if not nextchar: # end of file
167
print("shlex: I see EOF in quotes state")
168
# XXX what error should be raised here?
169
raise ValueError("No closing quotation")
170
if nextchar == self.state:
172
self.token = self.token + nextchar
177
elif self.posix and nextchar in self.escape and \
178
self.state in self.escapedquotes:
179
escapedstate = self.state
180
self.state = nextchar
182
self.token = self.token + nextchar
183
elif self.state in self.escape:
184
if not nextchar: # end of file
186
print("shlex: I see EOF in escape state")
187
# XXX what error should be raised here?
188
raise ValueError("No escaped character")
189
# In posix shells, only the quote itself or the escape
190
# character may be escaped within quotes.
191
if escapedstate in self.quotes and \
192
nextchar != self.state and nextchar != escapedstate:
193
self.token = self.token + self.state
194
self.token = self.token + nextchar
195
self.state = escapedstate
196
elif self.state == 'a':
198
self.state = None # end of file
200
elif nextchar in self.whitespace:
202
print("shlex: I see whitespace in word state")
204
if self.token or (self.posix and quoted):
205
break # emit current token
208
elif nextchar in self.commenters:
209
self.instream.readline()
210
self.lineno = self.lineno + 1
213
if self.token or (self.posix and quoted):
214
break # emit current token
217
elif self.posix and nextchar in self.quotes:
218
self.state = nextchar
219
elif self.posix and nextchar in self.escape:
221
self.state = nextchar
222
elif nextchar in self.wordchars or nextchar in self.quotes \
223
or self.whitespace_split:
224
self.token = self.token + nextchar
226
self.pushback.appendleft(nextchar)
228
print("shlex: I see punctuation in word state")
231
break # emit current token
236
if self.posix and not quoted and result == '':
240
print("shlex: raw token=" + repr(result))
242
print("shlex: raw token=EOF")
245
def sourcehook(self, newfile):
246
"Hook called on a filename to be sourced."
247
if newfile[0] == '"':
248
newfile = newfile[1:-1]
249
# This implements cpp-like semantics for relative-path inclusion.
250
if isinstance(self.infile, str) and not os.path.isabs(newfile):
251
newfile = os.path.join(os.path.dirname(self.infile), newfile)
252
return (newfile, open(newfile, "r"))
254
def error_leader(self, infile=None, lineno=None):
255
"Emit a C-compiler-like, Emacs-friendly error-message leader."
260
return "\"%s\", line %d: " % (infile, lineno)
266
token = self.get_token()
267
if token == self.eof:
271
def split(s, comments=False, posix=True):
272
lex = shlex(s, posix=posix)
273
lex.whitespace_split = True
279
_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
282
"""Return a shell-escaped version of the string *s*."""
285
if _find_unsafe(s) is None:
288
# use single quotes, and put single quotes into double quotes
289
# the string $'b is then quoted as '$'"'"'b'
290
return "'" + s.replace("'", "'\"'\"'") + "'"
293
if __name__ == '__main__':
294
if len(sys.argv) == 1:
298
lexer = shlex(open(file), file)
300
tt = lexer.get_token()
302
print("Token: " + repr(tt))