2
2
** ________ ___ / / ___ Scala API **
3
** / __/ __// _ | / / / _ | (c) 2003-2011, LAMP/EPFL **
3
** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
4
4
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
5
5
** /____/\___/_/ |_/____/_/ | | **
13
import collection.mutable
14
import mutable.{ Set, HashSet }
11
import scala.collection.mutable
15
12
import parsing.XhtmlEntities
13
import scala.language.implicitConversions
18
* The <code>Utility</code> object provides utility functions for processing
19
* instances of bound and not bound XML classes, as well as escaping text nodes.
16
* The `Utility` object provides utility functions for processing instances
17
* of bound and not bound XML classes, as well as escaping text nodes.
21
19
* @author Burak Emir
23
object Utility extends AnyRef with parsing.TokenTests
21
object Utility extends AnyRef with parsing.TokenTests {
25
22
final val SU = '\u001A'
24
// [Martin] This looks dubious. We don't convert StringBuilders to
25
// Strings anywhere else, why do it here?
27
26
implicit def implicitSbToString(sb: StringBuilder) = sb.toString()
29
28
// helper for the extremely oft-repeated sequence of creating a
36
35
private[xml] def isAtomAndNotText(x: Node) = x.isAtom && !x.isInstanceOf[Text]
38
/** trims an element - call this method, when you know that it is an
37
/** Trims an element - call this method, when you know that it is an
39
38
* element (and not a text node) so you know that it will not be trimmed
40
* away. With this assumption, the function can return a <code>Node</code>,
41
* rather than a <code>Seq[Node]</code>. If you don't know, call
42
* <code>trimProper</code> and account for the fact that you may get back
43
* an empty sequence of nodes.
39
* away. With this assumption, the function can return a `Node`, rather
40
* than a `Seq[Node]`. If you don't know, call `trimProper` and account
41
* for the fact that you may get back an empty sequence of nodes.
45
* precondition: node is not a text node (it might be trimmed)
43
* Precondition: node is not a text node (it might be trimmed)
47
45
def trim(x: Node): Node = x match {
48
46
case Elem(pre, lab, md, scp, child@_*) =>
49
47
Elem(pre, lab, md, scp, (child flatMap trimProper):_*)
52
/** trim a child of an element. <code>Attribute</code> values and
53
* <code>Atom</code> nodes that are not <code>Text</code> nodes are unaffected.
50
/** trim a child of an element. `Attribute` values and `Atom` nodes that
51
* are not `Text` nodes are unaffected.
55
53
def trimProper(x:Node): Seq[Node] = x match {
56
54
case Elem(pre,lab,md,scp,child@_*) =>
63
62
/** returns a sorted attribute list */
64
63
def sort(md: MetaData): MetaData = if((md eq Null) || (md.next eq Null)) md else {
66
65
val smaller = sort(md.filter { m => m.key < key })
67
66
val greater = sort(md.filter { m => m.key > key })
68
smaller.append( Null ).append(md.copy ( greater ))
67
smaller.foldRight (md copy greater) ((x, xs) => x copy xs)
71
/** returns the node with its attribute list sorted alphabetically (prefixes are ignored) */
70
/** Return the node with its attribute list sorted alphabetically
71
* (prefixes are ignored) */
72
72
def sort(n:Node): Node = n match {
73
case Elem(pre,lab,md,scp,child@_*) =>
74
Elem(pre,lab,sort(md),scp, (child map sort):_*)
73
case Elem(pre,lab,md,scp,child@_*) =>
74
Elem(pre,lab,sort(md),scp, (child map sort):_*)
79
79
* Escapes the characters < > & and " from string.
84
81
final def escape(text: String): String = sbToString(escape(text, _))
135
* Appends unescaped string to <code>s</code>, amp becomes &
136
* lt becomes < etc..
128
* Appends unescaped string to `s`, `amp` becomes `&`,
129
* `lt` becomes `<` etc..
140
* @return <code>null</code> if <code>ref</code> was not a predefined
131
* @return `'''null'''` if `ref` was not a predefined entity.
143
133
final def unescape(ref: String, s: StringBuilder): StringBuilder =
144
(unescMap get ref) map (s append _) orNull
134
((unescMap get ref) map (s append _)).orNull
147
137
* Returns a set of all namespaces used in a sequence of nodes
148
138
* and all their descendants, including the empty namespaces.
153
140
def collectNamespaces(nodes: Seq[Node]): mutable.Set[String] =
154
nodes.foldLeft(new HashSet[String]) { (set, x) => collectNamespaces(x, set) ; set }
141
nodes.foldLeft(new mutable.HashSet[String]) { (set, x) => collectNamespaces(x, set) ; set }
157
144
* Adds all namespaces in node to set.
162
146
def collectNamespaces(n: Node, set: mutable.Set[String]) {
163
147
if (n.doCollectNamespaces) {
194
185
preserveWhitespace: Boolean = false,
195
186
minimizeTags: Boolean = false): StringBuilder =
188
serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, if (minimizeTags) MinimizeMode.Always else MinimizeMode.Never)
192
* Serialize an XML Node to a StringBuilder.
194
* This is essentially a minor rework of `toXML` that can't have the same name due to an unfortunate
195
* combination of named/default arguments and overloading.
197
* @todo use a Writer instead
201
pscope: NamespaceBinding = TopScope,
202
sb: StringBuilder = new StringBuilder,
203
stripComments: Boolean = false,
204
decodeEntities: Boolean = true,
205
preserveWhitespace: Boolean = false,
206
minimizeTags: MinimizeMode.Value = MinimizeMode.Default): StringBuilder =
198
case c: Comment => if (!stripComments) c buildString sb else sb
199
case x: SpecialNode => x buildString sb
201
g.nodes foreach {toXML(_, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)}
209
case c: Comment if !stripComments => c buildString sb
210
case s: SpecialNode => s buildString sb
211
case g: Group => for (c <- g.nodes) serialize(c, g.scope, sb, minimizeTags = minimizeTags) ; sb
204
213
// print tag with namespace declarations
207
if (x.attributes ne null) x.attributes.buildString(sb)
208
x.scope.buildString(sb, pscope)
209
if (x.child.isEmpty && minimizeTags) {
216
if (el.attributes ne null) el.attributes.buildString(sb)
217
el.scope.buildString(sb, pscope)
218
if (el.child.isEmpty &&
219
(minimizeTags == MinimizeMode.Always ||
220
(minimizeTags == MinimizeMode.Default && el.minimizeEmpty)))
210
222
// no children, so use short form: <xyz .../>
213
225
// children, so use long form: <xyz ...>...</xyz>
215
sequenceToXML(x.child, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
227
sequenceToXML(el.child, el.scope, sb, stripComments)
232
case _ => throw new IllegalArgumentException("Don't know how to serialize a " + x.getClass.getName)
227
240
stripComments: Boolean = false,
228
241
decodeEntities: Boolean = true,
229
242
preserveWhitespace: Boolean = false,
230
minimizeTags: Boolean = false): Unit =
243
minimizeTags: MinimizeMode.Value = MinimizeMode.Default): Unit =
232
245
if (children.isEmpty) return
233
246
else if (children forall isAtomAndNotText) { // add space
234
247
val it = children.iterator
236
toXML(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
249
serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
237
250
while (it.hasNext) {
240
toXML(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
253
serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
243
else children foreach { toXML(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) }
256
else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) }
247
260
* Returns prefix of qualified name if any.
252
262
final def prefix(name: String): Option[String] = (name indexOf ':') match {
258
268
* Returns a hashcode for the given constituents of a node
262
* @param attribHashCode
265
def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = {
266
val h = new util.MurmurHash[Node](pre.##)
268
h.append(attribHashCode)
270
def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) =
271
scala.util.hashing.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##)
274
273
def appendQuoted(s: String): String = sbToString(appendQuoted(s, _))
277
* Appends "s" if string <code>s</code> does not contain ",
276
* Appends "s" if string `s` does not contain ",
278
277
* 's' otherwise.
284
279
def appendQuoted(s: String, sb: StringBuilder) = {
285
280
val ch = if (s contains '"') '\'' else '"'
403
380
* CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
404
381
* | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
412
* @param reportSyntaxError ...
415
385
def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = {
416
386
val hex = (ch() == 'x') && { nextch(); true }