2
// Mork.cs: A parser for mork files (used by software such as Firefox and Thunderbird)
4
// Copyright (C) 2006 Pierre Östlund
8
// Permission is hereby granted, free of charge, to any person obtaining a copy
9
// of this software and associated documentation files (the "Software"), to deal
10
// in the Software without restriction, including without limitation the rights
11
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
// copies of the Software, and to permit persons to whom the Software is
13
// furnished to do so, subject to the following conditions:
15
// The above copyright notice and this permission notice shall be included in all
16
// copies or substantial portions of the Software.
18
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
using System.Collections;
31
using System.Text.RegularExpressions;
35
public class MorkDatabase : IEnumerable {
36
protected string mork_file;
37
protected string enum_namespace;
38
protected string mork_version;
40
protected Hashtable dicts;
41
protected Hashtable metadicts;
42
protected Hashtable rows;
43
protected Hashtable tables;
45
protected string regex_row = @"(?<action>[-!+]?)\[(-|)(?<roid>[0-9A-Za-z:\^]+)(?<cells>(?>[^\[\]]+)?)\]";
46
protected string regex_cell = @"\^(?<key>[0-9A-Fa-f]+)(\^(?<pvalue>[0-9A-Fa-f]+)|=(?<value>[0-9A-Fa-f]+))";
47
protected string regex_table = @"{.*?:(?<ns>[0-9A-Fa-f\^]+) {\(k\^(?<tbl>[0-9A-Fa-f]+):c\)";
49
public MorkDatabase (string mork_file)
51
this.mork_file = mork_file;
52
this.dicts = new Hashtable ();
53
this.metadicts = new Hashtable ();
54
this.rows = new Hashtable ();
55
this.tables = new Hashtable ();
61
StreamReader reader = new StreamReader (mork_file);;
63
// Check if this is a mork file and save database version if it is. We assume the first line will tell us this.
64
if (!IsValid (reader.ReadLine (), out mork_version)) {
66
throw new InvalidMorkDatabaseException ("This file is missing a valid mork header");
69
content = reader.ReadToEnd ();
76
protected bool IsValid (string header, out string version)
79
Regex reg = new Regex (@"<!-- <mdb:mork:z v=\""(?<version>(.*))\""/> -->");
81
if (header == null || header == string.Empty)
84
Match m = reg.Match (header);
88
version = m.Result ("${version}");
92
protected void Read (string content)
96
while (++position != content.Length) {
98
if (content [position].Equals ('/') && content [position].Equals ('/'))
100
position = content.IndexOf ('\n', position);
101
else if (content [position].Equals ('<') && content [position+2].Equals ('<'))
102
// Parse metadict information
103
ParseMetaDict (FindStartIndex (content, ref position, "<(", ")>"), position, content);
104
else if (content [position].Equals ('<'))
105
// Parse dict information
106
ParseDict (FindStartIndex (content, ref position, "<(", ")>"),position, content);
107
else if (content [position].Equals ('{')) {
108
// Parse table information
109
ParseTable (Read (content, ref position, "{", "}"));
110
}else if (content [position].Equals ('['))
112
ParseRows (Read (content, ref position, "[", "]"), null, null);
113
else if (content [position].Equals ('@') && content [position+1].Equals ('$'))
115
ParseGroups (Read (content, ref position, "@$${", "@$$}"));
119
protected string Read (string content, ref int position, string start, string end)
121
int tmp = position, start_position = position;
124
position = content.IndexOf (end, position+1);
125
if ((tmp = content.IndexOf (start, tmp+1)) < 0)
127
} while (tmp < position);
129
return content.Substring (start_position, position-start_position+1);
131
// This method is complex, and quite hacky, but it basically returns the index of the beginning
132
// of the substring, and points position to the end of the substring. Which I use in ParseDict
133
// and ParseMetaDict to significantly reduce the number of string allocations we are making.
134
protected int FindStartIndex (string content, ref int position, string start, string end)
136
int tmp = position, start_position = position;
139
position = content.IndexOf (end, position+1);
140
if ((tmp = content.IndexOf (start, tmp+1)) < 0)
142
} while (tmp < position);
144
return start_position;
147
protected virtual void ParseDict (int start, int end, string dict)
149
Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)\s*=(?<value>(.*))", RegexOptions.Compiled);
151
// This is sooo lame that, but it's an easy solution that works. It seems like regex fails
152
// here when dealing with big amounts of data.
153
foreach (string t in Regex.Replace (dict.Substring (start+2,(end-start)-3).Replace ("\\\n", "").
154
Replace ("\n", ""), @"\)\s*\(", "\n").Split ('\n')) {
156
Match m = reg.Match (t);
158
dicts [m.Result ("${id}")] = m.Result ("${value}");
162
protected virtual void ParseMetaDict (int start, int end, string content)
164
Regex reg = new Regex (@"(?<id>[0-9A-Fa-f]+)=(?<value>[^()]+)", RegexOptions.Compiled);
166
foreach (Match m in reg.Matches (content.Substring(start,end-start+1)))
167
metadicts [m.Result ("${id}")] = m.Result ("${value}");
170
protected virtual void ParseTable (string table)
172
int start = table.IndexOf ('}')+1;
173
Match m = new Regex (regex_table, RegexOptions.Compiled).Match (table);
175
ParseRows (table.Substring (start, table.Length-start-1), m.Result ("${ns}"), m.Result ("${tbl}"));
178
protected virtual void ParseRows (string rows, string ns, string table)
180
Regex reg = new Regex (regex_row, RegexOptions.Compiled);
182
foreach (Match m in reg.Matches (Clean (rows))) {
183
// tmp [0] == id, tmp [1] == ns
184
string[] tmp = m.Result ("${roid}").Split (':');
186
if (m.Result ("${action}") == "-" || m.Result ("${cells}") == string.Empty)
187
RemoveRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns));
189
AddRow (tmp [0], (tmp.Length > 1 ? tmp [1] : ns), table, m.Result ("${cells}"));
193
protected virtual void ParseGroups (string groups)
195
int start = groups.IndexOf ("{@")+2;
196
groups =groups.Substring (start, groups.Length-start-1);
200
protected string Clean (string str)
202
return str.Replace ("\n", "").Replace (" ", "");
205
public string ParseNamespace (string ns)
207
if (ns == null || ns == string.Empty)
209
if (ns.StartsWith ("^"))
212
foreach (string key in metadicts.Keys)
213
if ((metadicts [key] as string) == ns)
214
return String.Format ("^{0}", key);
220
public void AddRow (string id, string ns, string table, string cells)
222
string ns2 = ParseNamespace (ns);
224
if (id == string.Empty || ns2 == string.Empty || table == string.Empty || cells == string.Empty)
226
else if (!rows.ContainsKey (ns2))
227
rows [ns2] = new Hashtable ();
229
(rows [ns2] as Hashtable) [id] = (Exists (id, ns2) ? String.Concat (cells, GetCells (id, ns2)) : cells);
231
if (!tables.ContainsKey (id))
235
public void RemoveRow (string id, string ns)
237
string ns2 = ParseNamespace (ns);
239
if (!rows.ContainsKey (ns2))
242
(rows [ns2] as Hashtable).Remove (id);
246
public string GetCells (string id, string ns)
248
string ns2 = ParseNamespace (ns);
250
return (ns2 != null ?(rows [ns2] as Hashtable) [id] as string : null);
253
public Hashtable Compile (string id, string ns)
255
string ns2 = ParseNamespace (ns);
257
if (!Exists (id, ns2))
260
Hashtable tbl = new Hashtable ();
261
Regex reg = new Regex (regex_cell, RegexOptions.Compiled);
263
foreach (Match m in reg.Matches (GetCells (id, ns2))) {
264
string value = (string) (m.Result ("${pvalue}") != string.Empty ?
265
dicts [m.Result("${pvalue}")] : m.Result ("${value}"));
266
tbl [metadicts [m.Result ("${key}")]] = Decode (value, Encoding);
270
tbl ["table"] = tables [id];
275
public bool Exists (string id, string ns)
277
string ns2 = ParseNamespace (ns);
279
return (ns2 != null ? (rows [ns] as Hashtable).ContainsKey (id) : false);
282
public int GetRowCount (string ns)
284
string ns2 = ParseNamespace (ns);
286
if (ns2 == null || rows [ns2] == null)
289
return (rows [ns2] as Hashtable).Count;
292
public int GetRowCount (string ns, string table)
295
string ns2 = ParseNamespace (ns);
297
if (ns2 == null || rows [ns2] == null)
300
foreach (string id in (rows [ns2] as Hashtable).Keys) {
301
if ((string) tables [id] == table)
308
public IEnumerator GetEnumerator ()
310
string ns = ParseNamespace (EnumNamespace);
312
if (ns == null || (rows [ns] as Hashtable) == null || Empty)
315
return (rows [ns] as Hashtable).Keys.GetEnumerator ();
324
mork_version = string.Empty;
327
public static string Convert (int char1, int char2, System.Text.Encoding to_encoding)
330
System.Text.Encoding from;
333
from = System.Text.Encoding.UTF7;
334
bytes = new byte[] { System.Convert.ToByte (char1) };
336
from = System.Text.Encoding.UTF8;
337
bytes = new byte[] { System.Convert.ToByte (char1), System.Convert.ToByte (char2) };
340
return to_encoding.GetString (System.Text.Encoding.Convert (from, to_encoding, bytes));
343
public static string Decode (string str, System.Text.Encoding to_encoding)
345
if (str == null || str == string.Empty || to_encoding == null || str.IndexOf ('$') == -1)
348
foreach (Match m in Regex.Matches (str, @"\$(?<1>[0-9A-F]{2})\$(?<2>[0-9A-F]{2})|\$(?<3>[0-9A-F]{2})")) {
349
string char1 = m.Result ("${1}"), char2 = m.Result ("${2}"), char3 = m.Result ("${3}");
351
if (char1 != string.Empty) {
352
str = str.Replace (String.Format (@"${0}${1}", char1, char2),
354
Convert (Thunderbird.Hex2Dec (char1),
355
Thunderbird.Hex2Dec (char2),
358
Convert (int.Parse (char1, System.Globalization.NumberStyles.HexNumber),
359
int.Parse (char2, System.Globalization.NumberStyles.HexNumber),
364
str = str.Replace (String.Format (@"${0}", char3),
365
Convert (int.Parse (char3, System.Globalization.NumberStyles.HexNumber), -1, to_encoding));
375
foreach (Hashtable r in rows.Values)
382
public string EnumNamespace {
383
get { return enum_namespace; }
384
set { enum_namespace = value; }
387
public string Filename {
388
get { return mork_file; }
391
public string Version {
392
get { return mork_version; }
395
// There will always exist an item with id 1 in namespace 80, which means
396
// that when there are less than two items in the database, it's empty
398
get { return (rows.Count > 1 ? false : true); }
401
public System.Text.Encoding Encoding {
403
System.Text.Encoding encoding;
406
encoding = System.Text.Encoding.GetEncoding ((string) metadicts ["f"]);
408
encoding = System.Text.Encoding.GetEncoding ("iso-8859-1");
416
public class InvalidMorkDatabaseException : System.Exception {
418
public InvalidMorkDatabaseException (string message) : base (message)