2
// Copyright (C) 2009,2010,2011 Bartek thindil Jasicki
4
// This file is part of Grubng
6
// Grubng is free software: you can redistribute it and/or modify
7
// it under the terms of the GNU General Public License as published by
8
// the Free Software Foundation, either version 3 of the License, or
9
// (at your option) any later version.
11
// This program is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
// GNU General Public License for more details.
16
// You should have received a copy of the GNU General Public License
17
// along with this program. If not, see <http://www.gnu.org/licenses/>.
22
using System.Text.RegularExpressions;
28
/// Class provide functions for manipulate URL's
30
internal sealed class ParseURLs : IDisposable
33
/// Used to generate SHA256 hash of URL
35
System.Security.Cryptography.SHA1Managed SHhash;
37
/// SHA256 hash of URL
41
/// Used to generate SHA256 hash of URL
45
/// Used to generate SHA256 hash of URL
49
/// If true, all disposable fields are disposed.
54
/// Standard class constructor.
58
this.SHhash = new System.Security.Cryptography.SHA1Managed();
59
this.key = new StringBuilder();
63
/// Function manipulate URL - check it correctness, drop to lower case host.
65
/// <param name="url">
66
/// A <see cref="System.String"/> URL to parse.
69
/// A <see cref="System.String"/> parsed URL or String.Empty if URL is invalid.
71
public static string ParseURL(string url)
73
//Remove http(s):// from URL
74
int index = url.IndexOf("//");
77
url = url.Remove(0, (index + 2));
79
//Check correctness of URL
80
if (!Regex.IsMatch(url, @"^[a-z0-9]+([a-z0-9\=\-\.\?\,\'\/\\\+&%\$#_~]*)?$", RegexOptions.IgnoreCase))
84
//Conver to lower case host name
85
index = url.IndexOf('/');
89
host = url.Substring(0, index);
90
path = url.Substring(index);
97
host = host.ToLower();
99
if (!Regex.IsMatch(host, @"^[a-z0-9]+([a-z0-9\-\.]*)?$", RegexOptions.IgnoreCase))
103
//Remove sessions id's from path
106
path = Regex.Replace(path, @"(PHPSESSID|s|ses)=[a-z0-9]{32}(&|&)*", String.Empty, RegexOptions.IgnoreCase);
107
path = path.TrimEnd(new char[] {'&', '?'});
108
if ((path.LastIndexOf("&") > -1) && (path.LastIndexOf("&") == (path.Length - 5)))
110
path = path.Remove(path.LastIndexOf("&"));
114
url = System.Web.HttpUtility.UrlEncode(host + path);
115
url = url.Replace("%2f", "/");
116
url = url.Replace("%2c", ",");
117
url = url.Replace("%3d", "=");
118
url = url.Replace("%3f", "?");
119
url = url.Replace("%26", "&");
120
url = url.Replace("%3b", ";");
121
url = url.Replace("%3a", ":");
122
url = url.Replace("%40", "@");
123
url = url.Replace("%23", "#");
124
url = url.Replace("%7e", "~");
125
url = url.Replace("%252f", "/");
130
/// Function return SHA1 hash of URL
132
/// <param name="url">
133
/// A <see cref="System.String"/> URL to hash
136
/// A <see cref="System.String"/> SHA1 hash of URL
138
public string GetHash(string url)
142
throw new ObjectDisposedException(GetType().Name);
144
if (this.key.Length > 0)
146
this.key.Remove(0, key.Length);
148
this.PureHash = Encoding.UTF8.GetBytes(url);
149
this.HashValue = SHhash.ComputeHash(this.PureHash);
150
foreach(byte b in this.HashValue)
152
this.key.Append(String.Format("{0:x2}", b));
154
return this.key.ToString();
158
/// Function dispose unmanaged resources.
160
public void Dispose()
166
/// Function dispose unmanaged resources. Private function.
168
/// <param name="disposing">
169
/// A <see cref="System.Boolean"/> if true, dispose all unmanaged resources.
171
void Dispose(bool disposing)
176
this.HashValue = null;
177
this.PureHash = null;
178
this.disposed = true;