1
package com.thaiopensource.util;
4
* Selectively percent-encodes characters in a URI.
6
public class UriEncoder {
8
* Flag to include U+0000 - U+001F.
10
static private final int C0_CONTROL = 0x01;
12
* Flag to include U+0020
14
static private final int SPACE = 0x02;
16
* Flag to include '<', '>', '"'
18
static private final int DELIM = 0x04;
20
* Flag to include '{', '}', '|', '\\', '^', U+007E
22
static private final int UNWISE = 0x08;
24
* Flag to include U+007F
26
static private final int DELETE = 0x10;
28
* Flag to include U+0080 - U+009F
30
static private final int C1_CONTROL = 0x20;
32
* Flag to include any non-ASCII character with category Zs, Zl, and Zp
34
static private final int NON_ASCII_SEPARATOR = 0x40;
36
* Flag to include any other character with code-point >= U+0080
38
static private final int OTHER_NON_ASCII = 0x80;
40
static private final int ASCII_CONTROL = C0_CONTROL|DELETE;
41
static private final int CONTROL = ASCII_CONTROL|C1_CONTROL;
42
static private final int SEPARATOR = NON_ASCII_SEPARATOR|SPACE;
43
static private final int ASCII_GRAPHIC_FORBIDDEN = DELIM|UNWISE;
44
static private final int ASCII_PRINTABLE_FORBIDDEN = ASCII_GRAPHIC_FORBIDDEN|SPACE;
45
static private final int ASCII_FORBIDDEN = ASCII_CONTROL|ASCII_PRINTABLE_FORBIDDEN;
46
static private final int NON_ASCII = C1_CONTROL|NON_ASCII_SEPARATOR|OTHER_NON_ASCII;
47
static private final int JAVA_URI_FORBIDDEN = CONTROL|SEPARATOR|ASCII_PRINTABLE_FORBIDDEN;
48
static private final int URI_FORBIDDEN = ASCII_FORBIDDEN|NON_ASCII;
50
static public String encode(String s) {
51
return encode(s, JAVA_URI_FORBIDDEN);
54
static public String encodeAsAscii(String s) {
55
return encode(s, URI_FORBIDDEN);
58
static private String encode(String s, int flags) {
59
StringBuffer encoded = null;
60
final int len = s.length();
61
for (int i = 0; i < len; i++) {
68
mustEncode = ((flags & DELIM) != 0);
76
mustEncode = ((flags & UNWISE) != 0);
79
mustEncode = ((flags & SPACE) != 0);
82
mustEncode = ((flags & DELETE) != 0);
86
mustEncode = ((flags & C0_CONTROL) != 0);
90
switch (flags & NON_ASCII) {
92
// all non-ASCII chars need to be escaped
96
// no non-ASCII chars need to be escaped
100
if (Character.isISOControl(c))
101
mustEncode = ((flags & C1_CONTROL) != 0);
102
else if (Character.isSpaceChar(c))
103
mustEncode = ((flags & NON_ASCII_SEPARATOR) != 0);
105
mustEncode = ((flags & OTHER_NON_ASCII) != 0);
112
encoded = new StringBuffer(s.substring(0, i));
114
if (Utf16.isSurrogate1(c)
116
&& Utf16.isSurrogate2(s.charAt(i + 1)))
117
codePoint = Utf16.scalarValue(c, s.charAt(++i));
120
encoded.append(percentEncode(Utf8.encode(codePoint)));
122
else if (encoded != null)
126
return encoded.toString();
130
static private final String hexDigits = "0123456789ABCDEF";
132
static char[] percentEncode(byte[] bytes) {
133
char[] buf = new char[bytes.length * 3];
135
for (int i = 0; i < bytes.length; i++) {
138
buf[j++] = hexDigits.charAt((b >> 4) & 0xF);
139
buf[j++] = hexDigits.charAt(b & 0xF);