3
# Copyright (C) 2012 Canonical Ltd.
4
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
5
# Copyright (C) 2012 Yahoo! Inc.
7
# Author: Scott Moser <scott.moser@canonical.com>
8
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
9
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
11
# This program is free software: you can redistribute it and/or modify
12
# it under the terms of the GNU General Public License version 3, as
13
# published by the Free Software Foundation.
15
# This program is distributed in the hope that it will be useful,
16
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
# GNU General Public License for more details.
20
# You should have received a copy of the GNU General Public License
21
# along with this program. If not, see <http://www.gnu.org/licenses/>.
25
from email.mime.base import MIMEBase
26
from email.mime.multipart import MIMEMultipart
27
from email.mime.nonmultipart import MIMENonMultipart
28
from email.mime.text import MIMEText
32
from cloudinit import handlers
33
from cloudinit import log as logging
34
from cloudinit import util
36
LOG = logging.getLogger(__name__)
38
# Constants copied in from the handler module
39
NOT_MULTIPART_TYPE = handlers.NOT_MULTIPART_TYPE
40
PART_FN_TPL = handlers.PART_FN_TPL
41
OCTET_TYPE = handlers.OCTET_TYPE
44
CONTENT_TYPE = 'Content-Type'
46
# Various special content types that cause special actions
47
TYPE_NEEDED = ["text/plain", "text/x-not-multipart"]
48
INCLUDE_TYPES = ['text/x-include-url', 'text/x-include-once-url']
49
ARCHIVE_TYPES = ["text/cloud-config-archive"]
50
UNDEF_TYPE = "text/plain"
51
ARCHIVE_UNDEF_TYPE = "text/cloud-config"
52
ARCHIVE_UNDEF_BINARY_TYPE = "application/octet-stream"
54
# This seems to hit most of the gzip possible content types.
57
'application/gzip-compressed',
58
'application/gzipped',
59
'application/x-compress',
60
'application/x-compressed',
61
'application/x-gunzip',
63
'application/x-gzip-compressed',
66
# Msg header used to track attachments
67
ATTACHMENT_FIELD = 'Number-Attachments'
69
# Only the following content types can have there launch index examined
70
# in there payload, evey other content type can still provide a header
71
EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"]
74
def _replace_header(msg, key, value):
79
def _set_filename(msg, filename):
80
del msg['Content-Disposition']
81
msg.add_header('Content-Disposition',
82
'attachment', filename=str(filename))
85
class UserDataProcessor(object):
86
def __init__(self, paths):
88
self.ssl_details = util.fetch_ssl_details(paths)
90
def process(self, blob):
91
accumulating_msg = MIMEMultipart()
92
if isinstance(blob, list):
94
self._process_msg(convert_string(b), accumulating_msg)
96
self._process_msg(convert_string(blob), accumulating_msg)
97
return accumulating_msg
99
def _process_msg(self, base_msg, append_msg):
101
def find_ctype(payload):
102
return handlers.type_from_starts_with(payload)
104
for part in base_msg.walk():
105
if is_skippable(part):
109
ctype_orig = part.get_content_type()
110
payload = util.fully_decoded_payload(part)
111
was_compressed = False
113
# When the message states it is of a gzipped content type ensure
114
# that we attempt to decode said payload so that the decompressed
115
# data can be examined (instead of the compressed data).
116
if ctype_orig in DECOMP_TYPES:
118
payload = util.decomp_gzip(payload, quiet=False)
119
# At this point we don't know what the content-type is
120
# since we just decompressed it.
122
was_compressed = True
123
except util.DecompressionError as e:
124
LOG.warn("Failed decompressing payload from %s of length"
125
" %s due to: %s", ctype_orig, len(payload), e)
128
# Attempt to figure out the payloads content-type
130
ctype_orig = UNDEF_TYPE
131
if ctype_orig in TYPE_NEEDED:
132
ctype = find_ctype(payload)
136
# In the case where the data was compressed, we want to make sure
137
# that we create a new message that contains the found content
138
# type with the uncompressed content since later traversals of the
139
# messages will expect a part not compressed.
141
maintype, subtype = ctype.split("/", 1)
142
n_part = MIMENonMultipart(maintype, subtype)
143
n_part.set_payload(payload)
144
# Copy various headers from the old part to the new one,
145
# but don't include all the headers since some are not useful
146
# after decoding and decompression.
147
if part.get_filename():
148
_set_filename(n_part, part.get_filename())
149
for h in ('Launch-Index',):
151
_replace_header(n_part, h, str(part[h]))
154
if ctype != ctype_orig:
155
_replace_header(part, CONTENT_TYPE, ctype)
157
if ctype in INCLUDE_TYPES:
158
self._do_include(payload, append_msg)
161
if ctype in ARCHIVE_TYPES:
162
self._explode_archive(payload, append_msg)
165
# TODO(harlowja): Should this be happening, shouldn't
166
# the part header be modified and not the base?
167
_replace_header(base_msg, CONTENT_TYPE, ctype)
169
self._attach_part(append_msg, part)
171
def _attach_launch_index(self, msg):
172
header_idx = msg.get('Launch-Index', None)
174
if msg.get_content_type() in EXAMINE_FOR_LAUNCH_INDEX:
176
# See if it has a launch-index field
177
# that might affect the final header
178
payload = util.load_yaml(msg.get_payload(decode=True))
180
payload_idx = payload.get('launch-index')
183
# Header overrides contents, for now (?) or the other way around?
184
if header_idx is not None:
185
payload_idx = header_idx
186
# Nothing found in payload, use header (if anything there)
187
if payload_idx is None:
188
payload_idx = header_idx
189
if payload_idx is not None:
191
msg.add_header('Launch-Index', str(int(payload_idx)))
192
except (ValueError, TypeError):
195
def _get_include_once_filename(self, entry):
196
entry_fn = util.hash_blob(entry, 'md5', 64)
197
return os.path.join(self.paths.get_ipath_cur('data'),
198
'urlcache', entry_fn)
200
def _process_before_attach(self, msg, attached_id):
201
if not msg.get_filename():
202
_set_filename(msg, PART_FN_TPL % (attached_id))
203
self._attach_launch_index(msg)
205
def _do_include(self, content, append_msg):
206
# Include a list of urls, one per line
207
# also support '#include <url here>'
208
# or #include-once '<url here>'
209
include_once_on = False
210
for line in content.splitlines():
211
lc_line = line.lower()
212
if lc_line.startswith("#include-once"):
213
line = line[len("#include-once"):].lstrip()
214
# Every following include will now
215
# not be refetched.... but will be
216
# re-read from a local urlcache (if it worked)
217
include_once_on = True
218
elif lc_line.startswith("#include"):
219
line = line[len("#include"):].lstrip()
220
# Disable the include once if it was on
221
# if it wasn't, then this has no effect.
222
include_once_on = False
223
if line.startswith("#"):
225
include_url = line.strip()
229
include_once_fn = None
232
include_once_fn = self._get_include_once_filename(include_url)
233
if include_once_on and os.path.isfile(include_once_fn):
234
content = util.load_file(include_once_fn)
236
resp = util.read_file_or_url(include_url,
237
ssl_details=self.ssl_details)
238
if include_once_on and resp.ok():
239
util.write_file(include_once_fn, resp.contents, mode=0o600)
241
content = resp.contents
243
LOG.warn(("Fetching from %s resulted in"
244
" a invalid http code of %s"),
245
include_url, resp.code)
247
if content is not None:
248
new_msg = convert_string(content)
249
self._process_msg(new_msg, append_msg)
251
def _explode_archive(self, archive, append_msg):
252
entries = util.load_yaml(archive, default=[], allowed=(list, set))
255
# dict { 'filename' : 'value', 'content' :
256
# 'value', 'type' : 'value' }
257
# filename and type not be present
260
if isinstance(ent, six.string_types):
261
ent = {'content': ent}
262
if not isinstance(ent, (dict)):
263
# TODO(harlowja) raise?
266
content = ent.get('content', '')
267
mtype = ent.get('type')
269
default = ARCHIVE_UNDEF_TYPE
270
if isinstance(content, six.binary_type):
271
default = ARCHIVE_UNDEF_BINARY_TYPE
272
mtype = handlers.type_from_starts_with(content, default)
274
maintype, subtype = mtype.split('/', 1)
275
if maintype == "text":
276
if isinstance(content, six.binary_type):
277
content = content.decode()
278
msg = MIMEText(content, _subtype=subtype)
280
msg = MIMEBase(maintype, subtype)
281
msg.set_payload(content)
283
if 'filename' in ent:
284
_set_filename(msg, ent['filename'])
285
if 'launch-index' in ent:
286
msg.add_header('Launch-Index', str(ent['launch-index']))
288
for header in list(ent.keys()):
289
if header.lower() in ('content', 'filename', 'type',
290
'launch-index', 'content-disposition',
291
ATTACHMENT_FIELD.lower(),
292
CONTENT_TYPE.lower()):
294
msg.add_header(header, ent[header])
296
self._attach_part(append_msg, msg)
298
def _multi_part_count(self, outer_msg, new_count=None):
300
Return the number of attachments to this MIMEMultipart by looking
301
at its 'Number-Attachments' header.
303
if ATTACHMENT_FIELD not in outer_msg:
304
outer_msg[ATTACHMENT_FIELD] = '0'
306
if new_count is not None:
307
_replace_header(outer_msg, ATTACHMENT_FIELD, str(new_count))
311
fetched_count = int(outer_msg.get(ATTACHMENT_FIELD))
312
except (ValueError, TypeError):
313
_replace_header(outer_msg, ATTACHMENT_FIELD, str(fetched_count))
316
def _attach_part(self, outer_msg, part):
318
Attach a message to an outer message. outermsg must be a MIMEMultipart.
319
Modifies a header in the outer message to keep track of number of
322
part_count = self._multi_part_count(outer_msg)
323
self._process_before_attach(part, part_count + 1)
324
outer_msg.attach(part)
325
self._multi_part_count(outer_msg, part_count + 1)
328
def is_skippable(part):
329
# multipart/* are just containers
330
part_maintype = part.get_content_maintype() or ''
331
if part_maintype.lower() == 'multipart':
336
# Coverts a raw string into a mime message
337
def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE):
341
def create_binmsg(data, content_type):
342
maintype, subtype = content_type.split("/", 1)
343
msg = MIMEBase(maintype, subtype)
344
msg.set_payload(data)
348
data = util.decode_binary(util.decomp_gzip(raw_data))
349
if "mime-version:" in data[0:4096].lower():
350
msg = util.message_from_string(data)
352
msg = create_binmsg(data, content_type)
353
except UnicodeDecodeError:
354
msg = create_binmsg(raw_data, content_type)