1
#! /usr/local/bin/python
3
# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4
# intentionally NOT "/usr/bin/env python". On many systems
5
# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6
# scripts, and /usr/local/bin is the default directory where Python is
7
# installed, so /usr/bin/env would be unable to find python. Granted,
8
# binary installations by Linux vendors often install Python in
9
# /usr/bin. So let those vendors patch cgi.py to match their choice
12
"""Support module for CGI (Common Gateway Interface) scripts.
14
This module defines a number of utilities for use by CGI scripts
21
# Michael McLay started this module. Steve Majewski changed the
22
# interface to SvFormContentDict and FormContentDict. The multipart
23
# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24
# Rossum rewrote, reformatted and documented the module and is currently
25
# responsible for its maintenance.
34
from operator import attrgetter
35
from io import StringIO
40
from warnings import warn
42
__all__ = ["MiniFieldStorage", "FieldStorage",
43
"parse", "parse_qs", "parse_qsl", "parse_multipart",
44
"parse_header", "print_exception", "print_environ",
45
"print_form", "print_directory", "print_arguments",
46
"print_environ_usage", "escape"]
51
logfile = "" # Filename to log to, if not empty
52
logfp = None # File object to log to, if not None
54
def initlog(*allargs):
55
"""Write a log message, if there is a log file.
57
Even though this function is called initlog(), you should always
58
use log(); log is a variable that is set either to initlog
59
(initially), to dolog (once the log file has been opened), or to
60
nolog (when logging is disabled).
62
The first argument is a format string; the remaining arguments (if
63
any) are arguments to the % operator, so e.g.
64
log("%s: %s", "a", "b")
65
will write "a: b" to the log file, followed by a newline.
67
If the global logfp is not None, it should be a file object to
68
which log data is written.
70
If the global logfp is None, the global logfile may be a string
71
giving a filename to open, in append mode. This file should be
72
world writable!!! If the file can't be opened, logging is
73
silently disabled (since there is no safe place where we could
74
send an error message).
78
if logfile and not logfp:
80
logfp = open(logfile, "a")
89
def dolog(fmt, *args):
90
"""Write a log message to the log file. See initlog() for docs."""
91
logfp.write(fmt%args + "\n")
94
"""Dummy function, assigned to log when logging is disabled."""
97
log = initlog # The current logging function
103
# Maximum input we will accept when REQUEST_METHOD is POST
104
# 0 ==> unlimited input
107
def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
108
"""Parse a query in the environment or from a file (default stdin)
110
Arguments, all optional:
112
fp : file pointer; default: sys.stdin
114
environ : environment dictionary; default: os.environ
116
keep_blank_values: flag indicating whether blank values in
117
URL encoded forms should be treated as blank strings.
118
A true value indicates that blanks should be retained as
119
blank strings. The default false value indicates that
120
blank values are to be ignored and treated as if they were
123
strict_parsing: flag indicating what to do with parsing errors.
124
If false (the default), errors are silently ignored.
125
If true, errors raise a ValueError exception.
129
if not 'REQUEST_METHOD' in environ:
130
environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
131
if environ['REQUEST_METHOD'] == 'POST':
132
ctype, pdict = parse_header(environ['CONTENT_TYPE'])
133
if ctype == 'multipart/form-data':
134
return parse_multipart(fp, pdict)
135
elif ctype == 'application/x-www-form-urlencoded':
136
clength = int(environ['CONTENT_LENGTH'])
137
if maxlen and clength > maxlen:
138
raise ValueError('Maximum content length exceeded')
139
qs = fp.read(clength)
141
qs = '' # Unknown content-type
142
if 'QUERY_STRING' in environ:
144
qs = qs + environ['QUERY_STRING']
147
qs = qs + sys.argv[1]
148
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
149
elif 'QUERY_STRING' in environ:
150
qs = environ['QUERY_STRING']
156
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
157
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
160
# parse query string function called from urlparse,
161
# this is done in order to maintain backward compatiblity.
163
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
164
"""Parse a query given as a string argument."""
165
warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
167
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
169
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
170
"""Parse a query given as a string argument."""
171
warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
173
return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
175
def parse_multipart(fp, pdict):
176
"""Parse multipart input.
180
pdict: dictionary containing other parameters of content-type header
182
Returns a dictionary just like parse_qs(): keys are the field names, each
183
value is a list of values for that field. This is easy to use but not
184
much good if you are expecting megabytes to be uploaded -- in that case,
185
use the FieldStorage class instead which is much more flexible. Note
186
that content-type is the raw, unparsed contents of the content-type
189
XXX This does not parse nested multipart parts -- use FieldStorage for
192
XXX This should really be subsumed by FieldStorage altogether -- no
193
point in having two implementations of the same parsing algorithm.
194
Also, FieldStorage protects itself better against certain DoS attacks
195
by limiting the size of the data read in one chunk. The API here
196
does not support that kind of protection. This also affects parse()
197
since it can call parse_multipart().
203
if 'boundary' in pdict:
204
boundary = pdict['boundary']
205
if not valid_boundary(boundary):
206
raise ValueError('Invalid boundary in multipart form: %r'
209
nextpart = "--" + boundary
210
lastpart = "--" + boundary + "--"
214
while terminator != lastpart:
218
# At start of next part. Read headers first.
219
headers = http.client.parse_headers(fp)
220
clength = headers.get('content-length')
227
if maxlen and bytes > maxlen:
228
raise ValueError('Maximum content length exceeded')
229
data = fp.read(bytes)
232
# Read lines until end of part.
237
terminator = lastpart # End outer loop
240
terminator = line.strip()
241
if terminator in (nextpart, lastpart):
249
# Strip final line terminator
251
if line[-2:] == "\r\n":
253
elif line[-1:] == "\n":
256
data = "".join(lines)
257
line = headers['content-disposition']
260
key, params = parse_header(line)
261
if key != 'form-data':
264
name = params['name']
268
partdict[name].append(data)
270
partdict[name] = [data]
279
while end > 0 and s.count('"', 0, end) % 2:
280
end = s.find(';', end + 1)
287
def parse_header(line):
288
"""Parse a Content-type like header.
290
Return the main content-type and a dictionary of options.
293
parts = _parseparam(';' + line)
294
key = parts.__next__()
299
name = p[:i].strip().lower()
300
value = p[i+1:].strip()
301
if len(value) >= 2 and value[0] == value[-1] == '"':
303
value = value.replace('\\\\', '\\').replace('\\"', '"')
308
# Classes for field storage
309
# =========================
311
class MiniFieldStorage:
313
"""Like FieldStorage, for use when no file uploads are possible."""
322
disposition_options = {}
325
def __init__(self, name, value):
326
"""Constructor from field name and value."""
329
# self.file = StringIO(value)
332
"""Return printable representation."""
333
return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
338
"""Store a sequence of fields, reading multipart/form-data.
340
This class provides naming, typing, files stored on disk, and
341
more. At the top level, it is accessible like a dictionary, whose
342
keys are the field names. (Note: None can occur as a field name.)
343
The items are either a Python list (if there's multiple values) or
344
another FieldStorage or MiniFieldStorage object. If it's a single
345
object, it has the following attributes:
347
name: the field name, if specified; otherwise None
349
filename: the filename, if specified; otherwise None; this is the
350
client side filename, *not* the file name on which it is
351
stored (that's a temporary file you don't deal with)
353
value: the value as a *string*; for file uploads, this
354
transparently reads the file every time you request the value
356
file: the file(-like) object from which you can read the data;
357
None if the data is stored a simple string
359
type: the content-type, or None if not specified
361
type_options: dictionary of options specified on the content-type
364
disposition: content-disposition, or None if not specified
366
disposition_options: dictionary of corresponding options
368
headers: a dictionary(-like) object (sometimes email.message.Message or a
369
subclass thereof) containing *all* headers
371
The class is subclassable, mostly for the purpose of overriding
372
the make_file() method, which is called internally to come up with
373
a file open for reading and writing. This makes it possible to
374
override the default choice of storing all files in a temporary
375
directory and unlinking them as soon as they have been opened.
379
def __init__(self, fp=None, headers=None, outerboundary="",
380
environ=os.environ, keep_blank_values=0, strict_parsing=0):
381
"""Constructor. Read multipart/* until last part.
383
Arguments, all optional:
385
fp : file pointer; default: sys.stdin
386
(not used when the request method is GET)
388
headers : header dictionary-like object; default:
389
taken from environ as per CGI spec
391
outerboundary : terminating multipart boundary
392
(for internal use only)
394
environ : environment dictionary; default: os.environ
396
keep_blank_values: flag indicating whether blank values in
397
URL encoded forms should be treated as blank strings.
398
A true value indicates that blanks should be retained as
399
blank strings. The default false value indicates that
400
blank values are to be ignored and treated as if they were
403
strict_parsing: flag indicating what to do with parsing errors.
404
If false (the default), errors are silently ignored.
405
If true, errors raise a ValueError exception.
409
self.keep_blank_values = keep_blank_values
410
self.strict_parsing = strict_parsing
411
if 'REQUEST_METHOD' in environ:
412
method = environ['REQUEST_METHOD'].upper()
413
self.qs_on_post = None
414
if method == 'GET' or method == 'HEAD':
415
if 'QUERY_STRING' in environ:
416
qs = environ['QUERY_STRING']
423
headers = {'content-type':
424
"application/x-www-form-urlencoded"}
428
# Set default content-type for POST to what's traditional
429
headers['content-type'] = "application/x-www-form-urlencoded"
430
if 'CONTENT_TYPE' in environ:
431
headers['content-type'] = environ['CONTENT_TYPE']
432
if 'QUERY_STRING' in environ:
433
self.qs_on_post = environ['QUERY_STRING']
434
if 'CONTENT_LENGTH' in environ:
435
headers['content-length'] = environ['CONTENT_LENGTH']
436
self.fp = fp or sys.stdin
437
self.headers = headers
438
self.outerboundary = outerboundary
440
# Process content-disposition header
441
cdisp, pdict = "", {}
442
if 'content-disposition' in self.headers:
443
cdisp, pdict = parse_header(self.headers['content-disposition'])
444
self.disposition = cdisp
445
self.disposition_options = pdict
448
self.name = pdict['name']
450
if 'filename' in pdict:
451
self.filename = pdict['filename']
453
# Process content-type header
455
# Honor any existing content-type header. But if there is no
456
# content-type header, use some sensible defaults. Assume
457
# outerboundary is "" at the outer level, but something non-false
458
# inside a multi-part. The default for an inner part is text/plain,
459
# but for an outer part it should be urlencoded. This should catch
460
# bogus clients which erroneously forget to include a content-type
463
# See below for what we do if there does exist a content-type header,
464
# but it happens to be something we don't understand.
465
if 'content-type' in self.headers:
466
ctype, pdict = parse_header(self.headers['content-type'])
467
elif self.outerboundary or method != 'POST':
468
ctype, pdict = "text/plain", {}
470
ctype, pdict = 'application/x-www-form-urlencoded', {}
472
self.type_options = pdict
473
self.innerboundary = ""
474
if 'boundary' in pdict:
475
self.innerboundary = pdict['boundary']
477
if 'content-length' in self.headers:
479
clen = int(self.headers['content-length'])
482
if maxlen and clen > maxlen:
483
raise ValueError('Maximum content length exceeded')
486
self.list = self.file = None
488
if ctype == 'application/x-www-form-urlencoded':
489
self.read_urlencoded()
490
elif ctype[:10] == 'multipart/':
491
self.read_multi(environ, keep_blank_values, strict_parsing)
496
"""Return a printable representation."""
497
return "FieldStorage(%r, %r, %r)" % (
498
self.name, self.filename, self.value)
501
return iter(self.keys())
503
def __getattr__(self, name):
505
raise AttributeError(name)
508
value = self.file.read()
510
elif self.list is not None:
516
def __getitem__(self, key):
517
"""Dictionary style indexing."""
518
if self.list is None:
519
raise TypeError("not indexable")
521
for item in self.list:
522
if item.name == key: found.append(item)
530
def getvalue(self, key, default=None):
531
"""Dictionary style get() method, including 'value' lookup."""
534
if type(value) is type([]):
535
return [x.value for x in value]
541
def getfirst(self, key, default=None):
542
""" Return the first value received."""
545
if type(value) is type([]):
546
return value[0].value
552
def getlist(self, key):
553
""" Return list of received values."""
556
if type(value) is type([]):
557
return [x.value for x in value]
564
"""Dictionary style keys() method."""
565
if self.list is None:
566
raise TypeError("not indexable")
567
return list(set(item.name for item in self.list))
569
def __contains__(self, key):
570
"""Dictionary style __contains__ method."""
571
if self.list is None:
572
raise TypeError("not indexable")
573
return any(item.name == key for item in self.list)
576
"""Dictionary style len(x) support."""
577
return len(self.keys())
579
def __nonzero__(self):
580
return bool(self.list)
582
def read_urlencoded(self):
583
"""Internal: read data in query string format."""
584
qs = self.fp.read(self.length)
586
qs += '&' + self.qs_on_post
587
self.list = list = []
588
for key, value in urllib.parse.parse_qsl(qs, self.keep_blank_values,
589
self.strict_parsing):
590
list.append(MiniFieldStorage(key, value))
593
FieldStorageClass = None
595
def read_multi(self, environ, keep_blank_values, strict_parsing):
596
"""Internal: read a part that is itself multipart."""
597
ib = self.innerboundary
598
if not valid_boundary(ib):
599
raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
602
for key, value in urllib.parse.parse_qsl(self.qs_on_post,
603
self.keep_blank_values, self.strict_parsing):
604
self.list.append(MiniFieldStorage(key, value))
605
FieldStorageClass = None
607
klass = self.FieldStorageClass or self.__class__
608
parser = email.parser.FeedParser()
609
# Create bogus content-type header for proper multipart parsing
610
parser.feed('Content-Type: %s; boundary=%s\r\n\r\n' % (self.type, ib))
611
parser.feed(self.fp.read())
612
full_msg = parser.close()
614
msgs = full_msg.get_payload()
616
fp = StringIO(msg.get_payload())
617
part = klass(fp, msg, ib, environ, keep_blank_values,
619
self.list.append(part)
622
def read_single(self):
623
"""Internal: read an atomic part."""
631
bufsize = 8*1024 # I/O buffering size for copy to file
633
def read_binary(self):
634
"""Internal: read binary data."""
635
self.file = self.make_file()
639
data = self.fp.read(min(todo, self.bufsize))
643
self.file.write(data)
644
todo = todo - len(data)
646
def read_lines(self):
647
"""Internal: read lines until EOF or outerboundary."""
648
self.file = self.__file = StringIO()
649
if self.outerboundary:
650
self.read_lines_to_outerboundary()
652
self.read_lines_to_eof()
654
def __write(self, line):
655
if self.__file is not None:
656
if self.__file.tell() + len(line) > 1000:
657
self.file = self.make_file()
658
data = self.__file.getvalue()
659
self.file.write(data)
661
self.file.write(line)
663
def read_lines_to_eof(self):
664
"""Internal: read lines until EOF."""
666
line = self.fp.readline(1<<16)
672
def read_lines_to_outerboundary(self):
673
"""Internal: read lines until outerboundary."""
674
next = "--" + self.outerboundary
677
last_line_lfend = True
679
line = self.fp.readline(1<<16)
683
if line[:2] == "--" and last_line_lfend:
684
strippedline = line.strip()
685
if strippedline == next:
687
if strippedline == last:
691
if line[-2:] == "\r\n":
694
last_line_lfend = True
695
elif line[-1] == "\n":
698
last_line_lfend = True
701
last_line_lfend = False
702
self.__write(odelim + line)
704
def skip_lines(self):
705
"""Internal: skip lines until outer boundary if defined."""
706
if not self.outerboundary or self.done:
708
next = "--" + self.outerboundary
710
last_line_lfend = True
712
line = self.fp.readline(1<<16)
716
if line[:2] == "--" and last_line_lfend:
717
strippedline = line.strip()
718
if strippedline == next:
720
if strippedline == last:
723
last_line_lfend = line.endswith('\n')
726
"""Overridable: return a readable & writable file.
728
The file will be used as follows:
729
- data is written to it
731
- data is read from it
733
The file is always opened in text mode.
735
This version opens a temporary file for reading and writing,
736
and immediately deletes (unlinks) it. The trick (on Unix!) is
737
that the file can still be used, but it can't be opened by
738
another process, and it will automatically be deleted when it
739
is closed or when the current process terminates.
741
If you want a more permanent file, you derive a class which
742
overrides this method. If you want a visible temporary file
743
that is nevertheless automatically deleted when the script
744
terminates, try defining a __del__ method in a derived class
745
which unlinks the temporary files you have created.
749
return tempfile.TemporaryFile("w+", encoding="utf-8", newline="\n")
755
def test(environ=os.environ):
756
"""Robust test CGI script, usable as main program.
758
Write minimal HTTP headers and dump all information provided to
759
the script in HTML form.
762
print("Content-type: text/html")
764
sys.stderr = sys.stdout
766
form = FieldStorage() # Replace with other classes to test those
770
print_environ(environ)
771
print_environ_usage()
773
exec("testing print_exception() -- <I>italics?</I>")
776
print("<H3>What follows is a test, not an actual exception:</H3>")
781
print("<H1>Second try with a small maxlen...</H1>")
786
form = FieldStorage() # Replace with other classes to test those
790
print_environ(environ)
794
def print_exception(type=None, value=None, tb=None, limit=None):
796
type, value, tb = sys.exc_info()
799
print("<H3>Traceback (most recent call last):</H3>")
800
list = traceback.format_tb(tb, limit) + \
801
traceback.format_exception_only(type, value)
802
print("<PRE>%s<B>%s</B></PRE>" % (
803
escape("".join(list[:-1])),
808
def print_environ(environ=os.environ):
809
"""Dump the shell environment as HTML."""
810
keys = sorted(environ.keys())
812
print("<H3>Shell Environment:</H3>")
815
print("<DT>", escape(key), "<DD>", escape(environ[key]))
819
def print_form(form):
820
"""Dump the contents of a form as HTML."""
821
keys = sorted(form.keys())
823
print("<H3>Form Contents:</H3>")
825
print("<P>No form fields.")
828
print("<DT>" + escape(key) + ":", end=' ')
830
print("<i>" + escape(repr(type(value))) + "</i>")
831
print("<DD>" + escape(repr(value)))
835
def print_directory():
836
"""Dump the current directory as HTML."""
838
print("<H3>Current Working Directory:</H3>")
841
except os.error as msg:
842
print("os.error:", escape(str(msg)))
847
def print_arguments():
849
print("<H3>Command Line Arguments:</H3>")
854
def print_environ_usage():
855
"""Dump a list of environment variables used by CGI as HTML."""
857
<H3>These environment variables could have been set:</H3>
867
<LI>GATEWAY_INTERFACE
885
In addition, HTTP headers sent by the server may be passed in the
886
environment as well. Here are some common variable names:
901
def escape(s, quote=None):
902
'''Replace special characters "&", "<" and ">" to HTML-safe sequences.
903
If the optional flag quote is true, the quotation mark character (")
904
is also translated.'''
905
s = s.replace("&", "&") # Must be done first!
906
s = s.replace("<", "<")
907
s = s.replace(">", ">")
909
s = s.replace('"', """)
912
def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
914
return re.match(_vb_pattern, s)
919
# Call test() when this file is run as a script (not imported as a module)
920
if __name__ == '__main__':