1
# -*- coding: latin-1 -*-
4
This module parses and generates contentlines as defined in RFC 2445
5
(iCalendar), but will probably work for other MIME types with similar syntax.
8
It is stupid in the sense that it treats the content purely as strings. No type
9
conversion is attempted.
11
Copyright, 2005: Max M <maxm@mxm.dk>
12
License: GPL (Just contact med if and why you would like it changed)
16
from types import TupleType, ListType
17
SequenceTypes = [TupleType, ListType]
20
from icalendar.caselessdict import CaselessDict
23
#################################################################
24
# Property parameter stuff
27
"Returns a parameter value"
28
if type(val) in SequenceTypes:
33
NAME = re.compile('[\w-]+')
34
UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F",:;]')
35
QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F"]')
36
FOLD = re.compile('([\r]?\n)+[ \t]{1}')
38
def validate_token(name):
39
match = NAME.findall(name)
40
if len(match) == 1 and name == match[0]:
42
raise ValueError, name
44
def validate_param_value(value, quoted=True):
45
validator = UNSAFE_CHAR
47
validator = QUNSAFE_CHAR
48
if validator.findall(value):
49
raise ValueError, value
51
QUOTABLE = re.compile('[,;:].')
54
Parameter values containing [,;:] must be double quoted
57
>>> dQuote('Rasmussen, Max')
59
>>> dQuote('name:value')
62
if QUOTABLE.search(val):
67
def q_split(st, sep=','):
69
Splits a string on char, taking double (q)uotes into considderation
70
>>> q_split('Max,Moller,"Rasmussen, Max"')
71
['Max', 'Moller', '"Rasmussen, Max"']
77
for i in range(length):
81
if not inquote and ch == sep:
82
result.append(st[cursor:i])
85
result.append(st[cursor:])
88
def q_join(lst, sep=','):
90
Joins a list on sep, quoting strings with QUOTABLE chars
91
>>> s = ['Max', 'Moller', 'Rasmussen, Max']
93
'Max,Moller,"Rasmussen, Max"'
95
return sep.join([dQuote(itm) for itm in lst])
97
class Parameters(CaselessDict):
99
Parser and generator of Property parameter strings. It knows nothing of
100
datatypes. It's main concern is textual structure.
103
Simple parameter:value pair
104
>>> p = Parameters(parameter1='Value1')
109
keys are converted to upper
114
Parameters are case insensitive
121
Parameter with list of values must be seperated by comma
122
>>> p = Parameters({'parameter1':['Value1', 'Value2']})
124
'PARAMETER1=Value1,Value2'
127
Multiple parameters must be seperated by a semicolon
128
>>> p = Parameters({'RSVP':'TRUE', 'ROLE':'REQ-PARTICIPANT'})
130
'ROLE=REQ-PARTICIPANT;RSVP=TRUE'
133
Parameter values containing ',;:' must be double quoted
134
>>> p = Parameters({'ALTREP':'http://www.wiz.org'})
136
'ALTREP="http://www.wiz.org"'
139
list items must be quoted seperately
140
>>> p = Parameters({'MEMBER':['MAILTO:projectA@host.com', 'MAILTO:projectB@host.com', ]})
142
'MEMBER="MAILTO:projectA@host.com","MAILTO:projectB@host.com"'
144
Now the whole sheebang
145
>>> p = Parameters({'parameter1':'Value1', 'parameter2':['Value2', 'Value3'],\
146
'ALTREP':['http://www.wiz.org', 'value4']})
148
'ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3'
150
We can also parse parameter strings
151
>>> Parameters.from_string('PARAMETER1=Value 1;param2=Value 2')
152
Parameters({'PARAMETER1': 'Value 1', 'PARAM2': 'Value 2'})
154
Including empty strings
155
>>> Parameters.from_string('param=')
156
Parameters({'PARAM': ''})
158
We can also parse parameter strings
159
>>> Parameters.from_string('MEMBER="MAILTO:projectA@host.com","MAILTO:projectB@host.com"')
160
Parameters({'MEMBER': ['MAILTO:projectA@host.com', 'MAILTO:projectB@host.com']})
162
We can also parse parameter strings
163
>>> Parameters.from_string('ALTREP="http://www.wiz.org",value4;PARAMETER1=Value1;PARAMETER2=Value2,Value3')
164
Parameters({'PARAMETER1': 'Value1', 'ALTREP': ['http://www.wiz.org', 'value4'], 'PARAMETER2': ['Value2', 'Value3']})
170
in rfc2445 keys are called parameters, so this is to be consitent with
171
the naming conventions
175
### Later, when I get more time... need to finish this off now. The last majot thing missing.
176
### def _encode(self, name, value, cond=1):
177
### # internal, for conditional convertion of values.
179
### klass = types_factory.for_property(name)
180
### return klass(value)
183
### def add(self, name, value, encode=0):
184
### "Add a parameter value and optionally encode it."
186
### value = self._encode(name, value, encode)
187
### self[name] = value
189
### def decoded(self, name):
190
### "returns a decoded value, or list of same"
193
return 'Parameters(' + dict.__repr__(self) + ')'
199
items.sort() # To make doctests work
200
for key, value in items:
201
value = paramVal(value)
202
result.append('%s=%s' % (key.upper(), value))
203
return ';'.join(result)
206
def from_string(st, strict=False):
207
"Parses the parameter format from ical text format"
210
result = Parameters()
211
for param in q_split(st, ';'):
212
key, val = q_split(param, '=')
214
param_values = [v for v in q_split(val, ',')]
215
# Property parameter values that are not in quoted
216
# strings are case insensitive.
218
for v in param_values:
219
if v.startswith('"') and v.endswith('"'):
221
validate_param_value(v, quoted=True)
224
validate_param_value(v, quoted=False)
226
vals.append(v.upper())
233
result[key] = vals[0]
238
raise ValueError, 'Not a valid parameter string'
239
from_string = staticmethod(from_string)
242
#########################################
243
# parsing and generation of content lines
245
class Contentline(str):
247
A content line is basically a string that can be folded and parsed into
250
>>> c = Contentline('Si meliora dies, ut vina, poemata reddit')
252
'Si meliora dies, ut vina, poemata reddit'
254
A long line gets folded
255
>>> c = Contentline(''.join(['123456789 ']*10))
257
'123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 '
259
A folded line gets unfolded
260
>>> c = Contentline.from_string(str(c))
262
'123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 '
264
We do not fold within a UTF-8 character:
265
>>> c = Contentline('This line has a UTF-8 character where it should be folded. Make sure it g\xc3\xabts folded before that character.')
266
>>> '\xc3\xab' in str(c)
269
Don't fail if we fold a line that is exactly X times 74 characters long:
270
>>> c = str(Contentline(''.join(['x']*148)))
272
It can parse itself into parts. Which is a tuple of (name, params, vals)
274
>>> c = Contentline('dtstart:20050101T120000')
276
('dtstart', Parameters({}), '20050101T120000')
278
>>> c = Contentline('dtstart;value=datetime:20050101T120000')
280
('dtstart', Parameters({'VALUE': 'datetime'}), '20050101T120000')
282
>>> c = Contentline('ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com')
284
('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm@example.com')
286
'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com'
289
>>> parts = ('ATTENDEE', Parameters({'ROLE': 'REQ-PARTICIPANT', 'CN': 'Max Rasmussen'}), 'MAILTO:maxm@example.com')
290
>>> Contentline.from_parts(parts)
291
'ATTENDEE;CN=Max Rasmussen;ROLE=REQ-PARTICIPANT:MAILTO:maxm@example.com'
294
>>> parts = ('ATTENDEE', Parameters(), 'MAILTO:maxm@example.com')
295
>>> Contentline.from_parts(parts)
296
'ATTENDEE:MAILTO:maxm@example.com'
298
A value can also be any of the types defined in PropertyValues
299
>>> from icalendar.prop import vText
300
>>> parts = ('ATTENDEE', Parameters(), vText('MAILTO:test@example.com'))
301
>>> Contentline.from_parts(parts)
302
'ATTENDEE:MAILTO:test@example.com'
304
A value can also be unicode
305
>>> from icalendar.prop import vText
306
>>> parts = ('SUMMARY', Parameters(), vText(u'INternational char � � �'))
307
>>> Contentline.from_parts(parts)
308
'SUMMARY:INternational char \\xc3\\xa6 \\xc3\\xb8 \\xc3\\xa5'
310
Traversing could look like this.
311
>>> name, params, vals = c.parts()
315
'MAILTO:maxm@example.com'
316
>>> for key, val in params.items():
318
('ROLE', 'REQ-PARTICIPANT')
319
('CN', 'Max Rasmussen')
321
And the traditional failure
322
>>> c = Contentline('ATTENDEE;maxm@example.com')
324
Traceback (most recent call last):
326
ValueError: Content line could not be parsed into parts
329
>>> c = Contentline(':maxm@example.com')
331
Traceback (most recent call last):
333
ValueError: Content line could not be parsed into parts
335
>>> c = Contentline('key;param=:value')
337
('key', Parameters({'PARAM': ''}), 'value')
339
>>> c = Contentline('key;param="pvalue":value')
341
('key', Parameters({'PARAM': 'pvalue'}), 'value')
343
Should bomb on missing param:
344
>>> c = Contentline.from_string("k;:no param")
346
Traceback (most recent call last):
348
ValueError: Content line could not be parsed into parts
350
>>> c = Contentline('key;param=pvalue:value', strict=False)
352
('key', Parameters({'PARAM': 'pvalue'}), 'value')
354
If strict is set to True, uppercase param values that are not
355
double-quoted, this is because the spec says non-quoted params are
358
>>> c = Contentline('key;param=pvalue:value', strict=True)
360
('key', Parameters({'PARAM': 'PVALUE'}), 'value')
362
>>> c = Contentline('key;param="pValue":value', strict=True)
364
('key', Parameters({'PARAM': 'pValue'}), 'value')
368
def __new__(cls, st, strict=False):
369
self = str.__new__(cls, st)
370
setattr(self, 'strict', strict)
373
def from_parts(parts):
374
"Turns a tuple of parts into a content line"
375
(name, params, values) = [str(p) for p in parts]
378
return Contentline('%s;%s:%s' % (name, params, values))
379
return Contentline('%s:%s' % (name, values))
382
'Property: %s Wrong values "%s" or "%s"' % (repr(name),
385
from_parts = staticmethod(from_parts)
388
""" Splits the content line up into (name, parameters, values) parts
394
for i in range(len(self)):
397
if ch in ':;' and not name_split:
399
if ch == ':' and not value_split:
402
inquotes = not inquotes
403
name = self[:name_split]
405
raise ValueError, 'Key name is required'
407
if name_split+1 == value_split:
408
raise ValueError, 'Invalid content line'
409
params = Parameters.from_string(self[name_split+1:value_split],
411
values = self[value_split+1:]
412
return (name, params, values)
414
raise ValueError, 'Content line could not be parsed into parts'
416
def from_string(st, strict=False):
417
"Unfolds the content lines in an iCalendar into long content lines"
419
# a fold is carriage return followed by either a space or a tab
420
return Contentline(FOLD.sub('', st), strict=strict)
422
raise ValueError, 'Expected StringType with content line'
423
from_string = staticmethod(from_string)
426
"Long content lines are folded so they are less than 75 characters wide"
435
# Check that we don't fold in the middle of a UTF-8 character:
436
# http://lists.osafoundation.org/pipermail/ietf-calsify/2006-August/001126.html
438
char_value = ord(self[end])
439
if char_value < 128 or char_value >= 192:
440
# This is not in the middle of a UTF-8 character, so we
446
new_lines.append(self[start:end])
452
return '\r\n '.join(new_lines)
456
class Contentlines(list):
458
I assume that iCalendar files generally are a few kilobytes in size. Then
459
this should be efficient. for Huge files, an iterator should probably be
462
>>> c = Contentlines([Contentline('BEGIN:VEVENT\\r\\n')])
466
Lets try appending it with a 100 charater wide string
467
>>> c.append(Contentline(''.join(['123456789 ']*10)+'\\r\\n'))
469
'BEGIN:VEVENT\\r\\n\\r\\n123456789 123456789 123456789 123456789 123456789 123456789 123456789 1234\\r\\n 56789 123456789 123456789 \\r\\n'
471
Notice that there is an extra empty string in the end of the content lines.
472
That is so they can be easily joined with: '\r\n'.join(contentlines)).
473
>>> Contentlines.from_string('A short line\\r\\n')
475
>>> Contentlines.from_string('A faked\\r\\n long line\\r\\n')
476
['A faked long line', '']
477
>>> Contentlines.from_string('A faked\\r\\n long line\\r\\nAnd another lin\\r\\n\\te that is folded\\r\\n')
478
['A faked long line', 'And another line that is folded', '']
483
return '\r\n'.join(map(str, self))
486
"Parses a string into content lines"
488
# a fold is carriage return followed by either a space or a tab
489
unfolded = FOLD.sub('', st)
490
lines = [Contentline(line) for line in unfolded.splitlines() if line]
491
lines.append('') # we need a '\r\n' in the end of every content line
492
return Contentlines(lines)
494
raise ValueError, 'Expected StringType with content lines'
495
from_string = staticmethod(from_string)
499
# sample = open('./samples/test.ics', 'rb').read() # binary file in windows!
500
# lines = Contentlines.from_string(sample)
501
# for line in lines[:-1]:
505
#('BEGIN', Parameters({}), 'VCALENDAR')
506
#('METHOD', Parameters({}), 'Request')
507
#('PRODID', Parameters({}), '-//My product//mxm.dk/')
508
#('VERSION', Parameters({}), '2.0')
509
#('BEGIN', Parameters({}), 'VEVENT')
510
#('DESCRIPTION', Parameters({}), 'This is a very long description that ...')
511
#('PARTICIPANT', Parameters({'CN': 'Max M'}), 'MAILTO:maxm@mxm.dk')
512
#('DTEND', Parameters({}), '20050107T160000')
513
#('DTSTART', Parameters({}), '20050107T120000')
514
#('SUMMARY', Parameters({}), 'A second event')
515
#('END', Parameters({}), 'VEVENT')
516
#('BEGIN', Parameters({}), 'VEVENT')
517
#('DTEND', Parameters({}), '20050108T235900')
518
#('DTSTART', Parameters({}), '20050108T230000')
519
#('SUMMARY', Parameters({}), 'A single event')
520
#('UID', Parameters({}), '42')
521
#('END', Parameters({}), 'VEVENT')
522
#('END', Parameters({}), 'VCALENDAR')