1
"""base classes and helper functions for css and stylesheets packages
4
__docformat__ = 'restructuredtext'
5
__version__ = '$Id: util.py 1453 2008-09-08 20:57:19Z cthedot $'
8
from itertools import ifilter
13
from helper import normalize
20
Base class for most CSS and StyleSheets classes
22
**Superceded by Base2 which is used for new seq handling class.**
23
See cssutils.util.Base2
25
Contains helper methods for inheriting classes helping parsing
27
``_normalize`` is static as used by Preferences.
29
__tokenizer2 = tokenize2.Tokenizer()
32
_prods = tokenize2.CSSProductions
34
# for more on shorthand properties see
35
# http://www.dustindiaz.com/css-shorthand/
36
# format: shorthand: [(propname, mandatorycheck?)*]
37
_SHORTHANDPROPERTIES = {
39
u'background-position': [],
45
#u'border-color': [], # list or single but same values
46
#u'border-style': [], # list or single but same values
47
#u'border-width': [], # list or single but same values
51
#u'margin': [], # list or single but same values
53
#u'padding': [], # list or single but same values
62
- remove any \ before non unicode sequences (0-9a-zA-Z) so for
63
x=="c\olor\" return "color" (unicode escape sequences should have
64
been resolved by the tokenizer already)
69
def _checkReadonly(self):
70
"raises xml.dom.NoModificationAllowedErr if rule/... is readonly"
71
if hasattr(self, '_readonly') and self._readonly:
72
raise xml.dom.NoModificationAllowedErr(
73
u'%s is readonly.' % self.__class__)
77
def _splitNamespacesOff(self, text_namespaces_tuple):
79
returns tuple (text, dict-of-namespaces) or if no namespaces are
80
in cssText returns (cssText, {})
82
used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
85
if isinstance(text_namespaces_tuple, tuple):
86
return text_namespaces_tuple[0], _SimpleNamespaces(self._log,
87
text_namespaces_tuple[1])
89
return text_namespaces_tuple, _SimpleNamespaces(log=self._log)
91
def _tokenize2(self, textortokens):
93
returns tokens of textortokens which may already be tokens in which
94
case simply returns input
98
elif isinstance(textortokens, basestring):
99
# needs to be tokenized
100
return self.__tokenizer2.tokenize(
102
elif types.GeneratorType == type(textortokens):
105
elif isinstance(textortokens, tuple):
106
# a single token (like a comment)
107
return [textortokens]
109
# already tokenized but return generator
110
return (x for x in textortokens)
112
def _nexttoken(self, tokenizer, default=None):
113
"returns next token in generator tokenizer or the default value"
115
return tokenizer.next()
116
except (StopIteration, AttributeError):
119
def _type(self, token):
120
"returns type of Tokenizer token"
126
def _tokenvalue(self, token, normalize=False):
127
"returns value of Tokenizer token"
128
if token and normalize:
129
return Base._normalize(token[1])
135
def _stringtokenvalue(self, token):
137
for STRING returns the actual content without surrounding "" or ''
138
and without respective escapes, e.g.::
140
"with \" char" => with " char
144
return value.replace('\\'+value[0], value[0])[1:-1]
148
def _uritokenvalue(self, token):
150
for URI returns the actual content without surrounding url()
151
or url(""), url('') and without respective escapes, e.g.::
156
value = token[1][4:-1].strip()
157
if value and (value[0] in '\'"') and (value[0] == value[-1]):
158
# a string "..." or '...'
159
value = value.replace('\\'+value[0], value[0])[1:-1]
164
def _tokensupto2(self,
167
blockstartonly=False, # {
168
blockendonly=False, # }
170
importmediaqueryendonly=False, # ; or STRING
171
mediaqueryendonly=False, # { or STRING
173
propertynameendonly=False, # :
174
propertyvalueendonly=False, # ! ; }
175
propertypriorityendonly=False, # ; }
176
selectorattendonly=False, # ]
177
funcendonly=False, # )
178
listseponly=False, # ,
179
separateEnd=False # returns (resulttokens, endtoken)
182
returns tokens upto end of atrule and end index
183
end is defined by parameters, might be ; } ) or other
185
default looks for ending "}" and ";"
189
brace = bracket = parant = 0 # {}, [], ()
191
if blockstartonly: # {
193
brace = -1 # set to 0 with first {
194
elif blockendonly: # }
197
elif mediaendonly: # }
199
brace = 1 # rules } and mediarules }
200
elif importmediaqueryendonly:
201
# end of mediaquery which may be ; or STRING
203
endtypes = ('STRING',)
204
elif mediaqueryendonly:
205
# end of mediaquery which may be { or STRING
206
# special case, see below
208
brace = -1 # set to 0 with first {
209
endtypes = ('STRING',)
212
elif propertynameendonly: # : and ; in case of an error
214
elif propertyvalueendonly: # ; or !important
216
elif propertypriorityendonly: # ;
218
elif selectorattendonly: # ]
220
if starttoken and self._tokenvalue(starttoken) == u'[':
222
elif funcendonly: # )
225
elif listseponly: # ,
230
resulttokens.append(starttoken)
232
for token in tokenizer:
233
typ, val, line, col = token
235
resulttokens.append(token)
245
# function( or single (
246
elif u'(' == val or \
247
Base._prods.FUNCTION == typ:
252
resulttokens.append(token)
254
if (brace == bracket == parant == 0) and (
255
val in ends or typ in endtypes):
257
elif mediaqueryendonly and brace == -1 and (
258
bracket == parant == 0) and typ in endtypes:
259
# mediaqueryendonly with STRING
263
# TODO: use this method as generator, then this makes sense
265
return resulttokens[:-1], resulttokens[-1]
267
return resulttokens, None
271
def _valuestr(self, t):
273
returns string value of t (t may be a string, a list of token tuples
274
or a single tuple in format (type, value, line, col).
275
Mainly used to get a string value of t for error messages.
279
elif isinstance(t, basestring):
282
return u''.join([x[1] for x in t])
284
def _adddefaultproductions(self, productions, new=None):
286
adds default productions if not already present, used by
289
each production should return the next expected token
290
normaly a name like "uri" or "EOF"
291
some have no expectation like S or COMMENT, so simply return
292
the current value of self.__expected
294
def ATKEYWORD(expected, seq, token, tokenizer=None):
295
"default impl for unexpected @rule"
296
if expected != 'EOF':
297
# TODO: parentStyleSheet=self
298
rule = cssutils.css.CSSUnknownRule()
299
rule.cssText = self._tokensupto2(tokenizer, token)
304
new['wellformed'] = False
305
self._log.error(u'Expected EOF.', token=token)
308
def COMMENT(expected, seq, token, tokenizer=None):
309
"default implementation for COMMENT token adds CSSCommentRule"
310
seq.append(cssutils.css.CSSComment([token]))
313
def S(expected, seq, token, tokenizer=None):
314
"default implementation for S token, does nothing"
317
def EOF(expected=None, seq=None, token=None, tokenizer=None):
318
"default implementation for EOF token"
321
p = {'ATKEYWORD': ATKEYWORD,
324
'EOF': EOF # only available if fullsheet
326
p.update(productions)
329
def _parse(self, expected, seq, tokenizer, productions, default=None,
330
new=None, initialtoken=None):
332
puts parsed tokens in seq by calling a production with
333
(seq, tokenizer, token)
336
a name what token or value is expected next, e.g. 'uri'
340
call tokenizer.next() to get next token
342
callbacks {tokentype: callback}
344
default callback if tokentype not in productions
346
used to init default productions
348
will be used together with tokenizer running 1st this token
349
and then all tokens in tokenizer
351
returns (wellformed, expected) which the last prod might have set
356
# add initialtoken to tokenizer
358
"Build new tokenizer including initialtoken"
360
for item in tokenizer:
362
fulltokenizer = (t for t in tokens())
364
fulltokenizer = tokenizer
367
prods = self._adddefaultproductions(productions, new)
368
for token in fulltokenizer:
369
p = prods.get(token[0], default)
371
expected = p(expected, seq, token, tokenizer)
374
self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
375
return wellformed, expected
380
Base class for new seq handling, used by Selector for now only
385
def _setSeq(self, newseq):
387
sets newseq and makes it readonly
389
newseq._readonly = True
392
seq = property(lambda self: self._seq, doc="seq for most classes")
394
def _tempSeq(self, readonly=False):
395
"get a writeable Seq() which is added later"
396
return Seq(readonly=readonly)
398
def _adddefaultproductions(self, productions, new=None):
400
adds default productions if not already present, used by
403
each production should return the next expected token
404
normaly a name like "uri" or "EOF"
405
some have no expectation like S or COMMENT, so simply return
406
the current value of self.__expected
408
def ATKEYWORD(expected, seq, token, tokenizer=None):
409
"default impl for unexpected @rule"
410
if expected != 'EOF':
411
# TODO: parentStyleSheet=self
412
rule = cssutils.css.CSSUnknownRule()
413
rule.cssText = self._tokensupto2(tokenizer, token)
415
seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
416
line=token[2], col=token[3])
419
new['wellformed'] = False
420
self._log.error(u'Expected EOF.', token=token)
423
def COMMENT(expected, seq, token, tokenizer=None):
424
"default impl, adds CSSCommentRule if not token == EOF"
425
if expected == 'EOF':
426
new['wellformed'] = False
427
self._log.error(u'Expected EOF but found comment.', token=token)
428
seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
431
def S(expected, seq, token, tokenizer=None):
432
"default impl, does nothing if not token == EOF"
433
if expected == 'EOF':
434
new['wellformed'] = False
435
self._log.error(u'Expected EOF but found whitespace.', token=token)
438
def EOF(expected=None, seq=None, token=None, tokenizer=None):
439
"default implementation for EOF token"
442
defaultproductions = {'ATKEYWORD': ATKEYWORD,
445
'EOF': EOF # only available if fullsheet
447
defaultproductions.update(productions)
448
return defaultproductions
453
property seq of Base2 inheriting classes, holds a list of Item objects.
455
used only by Selector for now
457
is normally readonly, only writable during parsing
459
def __init__(self, readonly=True):
461
only way to write to a Seq is to initialize it with new items
462
each itemtuple has (value, type, line) where line is optional
465
self._readonly = readonly
467
def __delitem__(self, i):
470
def __getitem__(self, i):
473
def __setitem__(self, i, (val, typ, line, col)):
474
self._seq[i] = Item(val, typ, line, col)
477
return iter(self._seq)
480
return len(self._seq)
482
def append(self, val, typ, line=None, col=None):
483
"if not readonly add new Item()"
485
raise AttributeError('Seq is readonly.')
487
self._seq.append(Item(val, typ, line, col))
489
def appendItem(self, item):
490
"if not readonly add item which must be an Item"
492
raise AttributeError('Seq is readonly.')
494
self._seq.append(item)
496
def replace(self, index=-1, val=None, typ=None, line=None, col=None):
498
if not readonly replace Item at index with new Item or
499
simply replace value or type
502
raise AttributeError('Seq is readonly.')
504
self._seq[index] = Item(val, typ, line, col)
506
def appendToVal(self, val=None, index=-1):
508
if not readonly append to Item's value at index
511
raise AttributeError('Seq is readonly.')
513
old = self._seq[index]
514
self._seq[index] = Item(old.value + val, old.type,
518
"returns a repr same as a list of tuples of (value, type)"
519
return u'cssutils.%s.%s([\n %s])' % (self.__module__,
520
self.__class__.__name__,
521
u',\n '.join([u'%r' % item for item in self._seq]
526
if isinstance(v.value, basestring):
528
elif type(v) == tuple:
529
vals.append(v.value[1])
533
return "<cssutils.%s.%s object length=%r valuestring=%r at 0x%x>" % (
534
self.__module__, self.__class__.__name__, len(self),
535
u''.join(vals), id(self))
539
an item in the seq list of classes (successor to tuple items in old seq)
541
each item has attributes:
544
a sematic type like "element", "attribute"
546
the actual value which may be a string, number etc or an instance
549
**NOT IMPLEMENTED YET, may contain the line in the source later**
551
def __init__(self, value, type, line=None, col=None):
557
type = property(lambda self: self.__type)
558
value = property(lambda self: self.__value)
559
line = property(lambda self: self.__line)
560
col = property(lambda self: self.__col)
563
return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
564
self.__module__, self.__class__.__name__,
565
self.__value, self.__type, self.__line, self.__col)
568
class ListSeq(object):
571
A base class used for list classes like css.SelectorList or
572
stylesheets.MediaList
574
adds list like behaviour running on inhering class' property ``seq``
578
- get, set and del x[i]
582
some methods must be overwritten in inheriting class
585
self.seq = [] # does not need to use ``Seq`` as simple list only
587
def __contains__(self, item):
588
return item in self.seq
590
def __delitem__(self, index):
593
def __getitem__(self, index):
594
return self.seq[index]
605
def __setitem__(self, index, item):
606
"must be overwritten"
607
raise NotImplementedError
609
def append(self, item):
610
"must be overwritten"
611
raise NotImplementedError
614
class _Namespaces(object):
616
A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
617
Works on effective namespaces, so e.g. if::
622
only the second rule is effective and kept.
625
a dictionary {prefix: namespaceURI} containing the effective namespaces
626
only. These are the latest set in the CSSStyleSheet.
628
the parent CSSStyleSheet
630
def __init__(self, parentStyleSheet, log=None, *args):
631
"no initial values are set, only the relevant sheet is"
632
self.parentStyleSheet = parentStyleSheet
635
def __contains__(self, prefix):
636
return prefix in self.namespaces
638
def __delitem__(self, prefix):
639
"""deletes CSSNamespaceRule(s) with rule.prefix == prefix
641
prefix '' and None are handled the same
645
delrule = self.__findrule(prefix)
646
for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE,
647
self.parentStyleSheet.cssRules)):
649
self.parentStyleSheet.deleteRule(i)
652
self._log.error('Prefix %r not found.' % prefix,
653
error=xml.dom.NamespaceErr)
655
def __getitem__(self, prefix):
657
return self.namespaces[prefix]
659
self._log.error('Prefix %r not found.' % prefix,
660
error=xml.dom.NamespaceErr)
663
return self.namespaces.__iter__()
666
return len(self.namespaces)
668
def __setitem__(self, prefix, namespaceURI):
669
"replaces prefix or sets new rule, may raise NoModificationAllowedErr"
671
prefix = u'' # None or ''
672
rule = self.__findrule(prefix)
674
self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule(
676
namespaceURI=namespaceURI),
679
if prefix in self.namespaces:
680
rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr
681
if namespaceURI in self.namespaces.values():
684
def __findrule(self, prefix):
685
# returns namespace rule where prefix == key
686
for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE,
687
reversed(self.parentStyleSheet.cssRules)):
688
if rule.prefix == prefix:
691
def __getNamespaces(self):
693
for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE,
694
reversed(self.parentStyleSheet.cssRules)):
695
if rule.namespaceURI not in namespaces.values():
696
namespaces[rule.prefix] = rule.namespaceURI
699
namespaces = property(__getNamespaces,
700
doc=u'Holds only effective @namespace rules in self.parentStyleSheets'
703
def get(self, prefix, default):
704
return self.namespaces.get(prefix, default)
707
return self.namespaces.items()
710
return self.namespaces.keys()
713
return self.namespaces.values()
715
def prefixForNamespaceURI(self, namespaceURI):
717
returns effective prefix for given namespaceURI or raises IndexError
718
if this cannot be found"""
719
for prefix, uri in self.namespaces.items():
720
if uri == namespaceURI:
722
raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
725
return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % (
726
self.__class__.__name__, str(self.parentStyleSheet), id(self))
729
class _SimpleNamespaces(_Namespaces):
731
namespaces used in objects like Selector as long as they are not connected
734
def __init__(self, log=None, *args):
736
super(_SimpleNamespaces, self).__init__(parentStyleSheet=None, log=log)
737
self.__namespaces = dict(*args)
739
def __setitem__(self, prefix, namespaceURI):
740
self.__namespaces[prefix] = namespaceURI
742
namespaces = property(lambda self: self.__namespaces,
743
doc=u'Dict Wrapper for self.sheets @namespace rules.')
746
return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
747
self.__class__.__name__, self.namespaces, id(self))
750
return u"cssutils.util.%s(%r)" % (self.__class__.__name__,
754
def _defaultFetcher(url):
755
"""Retrieve data from ``url``. cssutils default implementation of fetch
758
Returns ``(encoding, string)`` or ``None``
761
res = urllib2.urlopen(url)
763
# e.g if file URL and not found
764
cssutils.log.warn(e, error=OSError)
765
except (OSError, ValueError), e:
766
# invalid url, e.g. "1"
767
cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError)
768
except urllib2.HTTPError, e:
769
# http error, e.g. 404, e can be raised
770
cssutils.log.warn(u'HTTPError opening url=%r: %s %s' %
771
(url, e.code, e.msg), error=e)
772
except urllib2.URLError, e:
773
# URLError like mailto: or other IO errors, e can be raised
774
cssutils.log.warn(u'URLError, %s' % e.reason, error=e)
777
mimeType, encoding = encutils.getHTTPInfo(res)
778
if mimeType != u'text/css':
779
cssutils.log.error(u'Expected "text/css" mime type for url=%r but found: %r' %
780
(url, mimeType), error=ValueError)
781
return encoding, res.read()
783
def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
785
Read cssText from url and decode it using all relevant methods (HTTP
786
header, BOM, @charset). Returns
788
- encoding used to decode text (which is needed to set encoding of
790
- type of encoding (how it was retrieved, see list below)
794
see cssutils.registerFetchUrl for details
796
If given this encoding is used and all other encoding information is
797
ignored (HTTP, BOM etc)
799
Encoding of parent stylesheet (while e.g. reading @import references sheets)
800
or document if available.
802
Priority or encoding information
803
--------------------------------
804
**cssutils only**: 0. overrideEncoding
806
1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols)
807
2. BOM and/or @charset (see below)
808
3. <link charset=""> or other metadata from the linking mechanism (if any)
809
4. charset of referring style sheet or document (if any)
816
fetcher = _defaultFetcher
818
if r and len(r) == 2 and r[1] is not None:
819
httpEncoding, content = r
822
enctype = 0 # 0. override encoding
823
encoding = overrideEncoding
825
enctype = 1 # 1. HTTP
826
encoding = httpEncoding
829
contentEncoding, explicit = cssutils.codec.detectencoding_str(content)
831
enctype = 2 # 2. BOM/@charset: explicitly
832
encoding = contentEncoding
834
enctype = 4 # 4. parent stylesheet or document
835
# may also be None in which case 5. is used in next step anyway
836
encoding = parentEncoding
838
enctype = 5 # 5. assume UTF-8
842
# encoding may still be wrong if encoding *is lying*!
843
if content is not None:
844
decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0]
846
decodedCssText = None
847
except UnicodeDecodeError, e:
848
cssutils.log.warn(e, neverraise=True)
849
decodedCssText = None
851
return encoding, enctype, decodedCssText
853
return None, None, None