3
Parse a string into words like a (POSIX) shell does.
5
License: Python Software Foundation License
6
http://www.opensource.org/licenses/PythonSoftFoundation.html
8
This module parses a string into words according to the parings-rules
9
of a POSIX shell. These parsing rules are (quoted after 'man bash'):
11
1) Words are split at whitespace charakters; these are Space, Tab,
12
Newline, Carriage-Return, Vertival-Tab (0B) and Form-Feet (0C).
14
NB: Quotes do _not_ separate words! Thus
16
will be parsed into a single word:
19
2) A non-quoted backslash (\) is the escape character. It preserves
20
the literal value of the next character that follows.
22
3) Enclosing characters in single quotes preserves the literal value
23
of each character within the quotes. A single quote may not occur
24
between single quotes, even when preceded by a backslash.
26
This means: baskslash (\) has no special meaning within single
27
quotes. All charakters within single quotes are taken as-is.
29
4) Enclosing characters in double quotes preserves the literal value
30
of all characters within the quotes, with the exception of \. The
31
backslash retains its special meaning only when followed " or \. A
32
double quote may be quoted within double quotes by preceding it
35
http://www.crazy-compilers.com/py-lib/#shellwords
39
__author__ = "Hartmut Goebel <h.goebel@crazy-compilers.com>"
41
__copyright__ = "(C) Copyright 2002 by Hartmut Goebel"
42
__license__ = "Python Software Foundation License"
43
__url__ = 'http://www.crazy-compilers.com/py-lib/#shellwords'
46
from types import ListType, TupleType
49
__all__ = ['shellwords', 'EOFError', 'UnmatchedQuoteError',
50
'UnmatchedSingleQuoteError', 'UnmatchedDoubleQuoteError']
53
## w/o quotes: \ escapes everything
54
## w/i d-quotes: \ escapes only q-quotes and back-slash
55
## w/i s-quotes: no escaping is done at all
57
re_dquote = re.compile(r'"(([^"\\]|\\.)*)"')
58
re_squote = re.compile(r"'(.*?)'")
59
re_escaped = re.compile(r'\\(.)')
60
re_esc_quote = re.compile(r'\\([\\"])')
61
re_outside = re.compile(r"""([^\s\\'"]+)""") # " emacs happy
64
class EOFError(ValueError):
65
def __init__(self, line):
67
class UnmatchedQuoteError(EOFError): pass
68
class UnmatchedSingleQuoteError(UnmatchedQuoteError):
70
return "Unmatched single quote: %s" % self.line
71
class UnmatchedDoubleQuoteError(UnmatchedQuoteError):
73
return "Unmatched double quote: %s" % self.line
78
Simple helper class for a string-like type which
79
distinguishs between 'empty' and 'undefined'.
84
def __ne__(self, other): return self.arg != other
85
#def __eq__(self, other): return self.arg == other # unused
86
#def __repr__(self): return repr(self.arg) # unused
87
def __str__(self): return str(self.arg)
89
def append(self, text):
90
if self.arg is None: self.arg = text
91
else: self.arg += text
96
i = 0; start = 0; arg = Arg()
99
if c == '"': # handle double quote:
100
match = re_dquote.match(line, i)
102
raise UnmatchedDoubleQuoteError(line)
104
snippet = match.group(1)
105
arg.append( re_esc_quote.sub(r'\1', snippet))
107
elif c == "'": # handle single quote:
108
match = re_squote.match(line, i)
110
raise UnmatchedSingleQuoteError(line)
112
arg.append(match.group(1))
113
# there is _no_ escape-charakter within single quotes!
115
elif c == "\\": # handle backslash = escape-charakter
116
match = re_escaped.match(line, i)
120
arg.append(match.group(1))
122
elif c.isspace(): # handle whitespace
124
arg_list.append(str(arg))
126
while i < len(line) and line[i].isspace():
129
match = re_outside.match(line, i)
132
arg.append(match.group())
134
if arg != None: arg_list.append(str(arg))