3
from email.mime.multipart import MIMEMultipart
4
from email.mime.text import MIMEText
8
'#include' : 'text/x-include-url',
9
'#!' : 'text/x-shellscript',
10
'#cloud-config' : 'text/cloud-config',
11
'#upstart-job' : 'text/upstart-job',
12
'#part-handler' : 'text/part-handler'
15
# if 'str' is compressed return decompressed otherwise return it
20
uncomp = gzip.GzipFile(None,"rb",1,StringIO.StringIO(str)).read()
25
def do_include(str,parts):
27
# is just a list of urls, one per line
28
for line in str.splitlines():
29
if line == "#include": continue
30
if line.startswith("#"): continue
31
content = urllib.urlopen(line).read()
32
process_includes(email.message_from_string(decomp_str(content)),parts)
34
def process_includes(msg,parts):
35
# parts is a dictionary of arrays
39
for t in ( 'content', 'names', 'types' ):
40
if not parts.has_key(t):
42
for part in msg.walk():
43
# multipart/* are just containers
44
if part.get_content_maintype() == 'multipart':
47
payload = part.get_payload()
50
for str, gtype in starts_with_mappings.items():
51
if payload.startswith(str):
56
ctype = part.get_content_type()
58
if ctype == 'text/x-include-url':
59
do_include(payload,parts)
62
filename = part.get_filename()
64
filename = 'part-%03d' % len(parts['content'])
66
parts['content'].append(payload)
67
parts['types'].append(ctype)
68
parts['names'].append(filename)
70
def parts2mime(parts):
71
outer = MIMEMultipart()
74
while i < len(parts['content']):
75
if parts['types'][i] is None:
76
# No guess could be made, or the file is encoded (compressed), so
77
# use a generic bag-of-bits type.
78
ctype = 'application/octet-stream'
79
else: ctype = parts['types'][i]
80
maintype, subtype = ctype.split('/', 1)
81
if maintype == 'text':
82
msg = MIMEText(parts['content'][i], _subtype=subtype)
84
msg = MIMEBase(maintype, subtype)
85
msg.set_payload(parts['content'][i])
86
# Encode the payload using Base64
87
encoders.encode_base64(msg)
88
# Set the filename parameter
89
msg.add_header('Content-Disposition', 'attachment',
90
filename=parts['names'][i])
94
return(outer.as_string())
96
# this is heavily wasteful, reads through userdata string input
97
def preprocess_userdata(data):
99
process_includes(email.message_from_string(decomp_str(data)),parts)
100
return(parts2mime(parts))
102
# callbacks is a dictionary with:
103
# { 'content-type': handler(data,content_type,filename,payload) }
104
def walk_userdata(str, callbacks, data = None):
106
for part in email.message_from_string(str).walk():
107
# multipart/* are just containers
108
if part.get_content_maintype() == 'multipart':
111
ctype = part.get_content_type()
113
ctype = 'application/octet-stream'
115
filename = part.get_filename()
117
filename = 'part-%03d' % partnum
119
if callbacks.has_key(ctype):
120
callbacks[ctype](data,ctype,filename,part.get_payload())
124
if __name__ == "__main__":
126
data = decomp_str(file(sys.argv[1]).read())
128
process_includes(email.message_from_string(data),parts)
129
print "#found %s parts" % len(parts['content'])
130
print parts2mime(parts)