~grubng-dev/grubng/tools-urlsdb

« back to all changes in this revision

Viewing changes to Files.cs

  • Committer: thindil
  • Date: 2010-04-04 08:42:41 UTC
  • Revision ID: thindil2@gmail.com-20100404084241-p0z5fqho81xwtjes
added ToDo list

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
//  
2
 
//  Copyright (C) 2011 Bartek thindil Jasicki
3
 
// 
4
 
//  This file is part of Grubng
5
 
// 
6
 
//  Grubng is free software: you can redistribute it and/or modify
7
 
//  it under the terms of the GNU General Public License as published by
8
 
//  the Free Software Foundation, either version 3 of the License, or
9
 
//  (at your option) any later version.
10
 
// 
11
 
//  This program is distributed in the hope that it will be useful,
12
 
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 
//  GNU General Public License for more details.
15
 
// 
16
 
//  You should have received a copy of the GNU General Public License
17
 
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 
// 
19
 
using System;
20
 
using Grubng;
21
 
using System.Text;
22
 
using System.IO;
23
 
using System.IO.Compression;
24
 
 
25
 
namespace urlsdb
26
 
{
27
 
        /// <summary>
28
 
        /// Provide function for manipulate files.
29
 
        /// </summary>
30
 
        internal sealed class Files : IDisposable
31
 
        {
32
 
                /// <summary>
33
 
                /// Object for database operations.
34
 
                /// </summary>
35
 
                Database db;
36
 
                /// <summary>
37
 
                /// Object for show messages to user.
38
 
                /// </summary>
39
 
                Utils util;
40
 
                /// <summary>
41
 
                /// If true, all disposable fields are disposed.
42
 
                /// </summary>
43
 
                bool disposed;
44
 
                
45
 
                /// <summary>
46
 
                /// Standard class constructor.
47
 
                /// </summary>
48
 
                /// <param name="quiet">
49
 
                /// A <see cref="System.Boolean"/> if true, don't show messages to user.
50
 
                /// </param>
51
 
                public Files (bool quiet)
52
 
                {
53
 
                        this.db = new Database();
54
 
                        this.util = new Utils(quiet);
55
 
                }
56
 
                
57
 
                /// <summary>
58
 
                /// Function create database backup to selected file.
59
 
                /// </summary>
60
 
                /// <param name="filename">
61
 
                /// A <see cref="System.String"/> name of backup file.
62
 
                /// </param>
63
 
                public void Backup(string filename)
64
 
                {
65
 
                        if (this.disposed)
66
 
                        {
67
 
                                throw new ObjectDisposedException(GetType().Name);
68
 
                        }
69
 
                        this.util.Message("Creating database backup ... ", false);
70
 
                        using (StreamWriter writer = new StreamWriter(filename))
71
 
                        {
72
 
                                StringBuilder urls2 = new StringBuilder();
73
 
                                this.util.ProgressStart(0, "URL's was saved");
74
 
                                while (true)
75
 
                                {
76
 
                                        urls2.Append(this.db.SelectURLs(this.util.Curamount));
77
 
                                        if (urls2.Length == 0)
78
 
                                        {
79
 
                                                break;
80
 
                                        }
81
 
                                        urls2.Append(Environment.NewLine);
82
 
                                        writer.Write(urls2);
83
 
                                        writer.Flush();
84
 
                                        urls2.Remove(0, urls2.Length);
85
 
                                        this.util.Curamount += 100000;
86
 
                                        GC.GetTotalMemory(true);
87
 
                                }
88
 
                                this.util.ProgressStop();
89
 
                                writer.Close();
90
 
                        }
91
 
                        this.util.Message(" done.", true);
92
 
                        this.util.Message("Compressing backup file ... ", false);
93
 
                        using (FileStream file = File.OpenRead(filename))
94
 
                        {
95
 
                                using (FileStream workunit = File.Create(filename + ".lzma"))
96
 
                                {
97
 
                                        long length = file.Length;
98
 
                                        if (length > 4194304)
99
 
                                        {
100
 
                                                length = 4194304;
101
 
                                        }
102
 
                                        object[] properties = { (int)(length),
103
 
                                                (int)(2),
104
 
                                                (int)(3),
105
 
                                                (int)(0),
106
 
                                                (int)(2),
107
 
                                                (int)(128),
108
 
                                                "bt4",
109
 
                                                false
110
 
                                        };
111
 
                                        using (Grubng.Encoder encoder = new Grubng.Encoder())
112
 
                                        {
113
 
                                                CoderPropID[] propIDs = {CoderPropID.DictionarySize,
114
 
                                                        CoderPropID.PosStateBits,
115
 
                                                        CoderPropID.LitContextBits,
116
 
                                                        CoderPropID.LitPosBits,
117
 
                                                        CoderPropID.Algorithm,
118
 
                                                        CoderPropID.NumFastBytes,
119
 
                                                        CoderPropID.MatchFinder,
120
 
                                                        CoderPropID.EndMarker
121
 
                                                };
122
 
                                                encoder.SetCoderProperties(propIDs, properties);
123
 
                                                encoder.WriteCoderProperties(workunit);
124
 
                                                for (int i = 0; i < 8; i++)
125
 
                                                {
126
 
                                                        workunit.WriteByte((Byte)(length >> (8 * i)));
127
 
                                                }
128
 
                                                encoder.Code(file, workunit);
129
 
                                                properties = null;
130
 
                                        }
131
 
                                }
132
 
                        }
133
 
                        this.util.Message("done", true);
134
 
                }
135
 
                
136
 
                /// <summary>
137
 
                /// Function read list of URL's from file and send they to database.
138
 
                /// </summary>
139
 
                /// <param name="filename">
140
 
                /// A <see cref="System.String"/> filename with URL's.
141
 
                /// </param>
142
 
                /// <param name="fast">
143
 
                /// A <see cref="System.Boolean"/> if true, do fast insert URL's to database.
144
 
                /// </param>
145
 
                public void AddURLs(string filename, bool fast)
146
 
                {
147
 
                        if (this.disposed)
148
 
                        {
149
 
                                throw new ObjectDisposedException(GetType().Name);
150
 
                        }
151
 
                        FileInfo finfo = new FileInfo(filename);
152
 
                        if (finfo.Extension == ".gz")
153
 
                        {
154
 
                                this.util.Message("Decompressing file ... ", false);
155
 
                                string newfilename = filename.Remove(filename.Length - 3);
156
 
                                using (FileStream file2 = File.OpenWrite(newfilename))
157
 
                                {
158
 
                                        using (FileStream file = File.OpenRead(filename))
159
 
                                        {
160
 
                                                using (GZipStream gzstream = new GZipStream(file, CompressionMode.Decompress))
161
 
                                                {
162
 
                                                        byte[] buffer = new byte[1024];
163
 
                                                        int bytesRead = 0;
164
 
                                                        while ((bytesRead = gzstream.Read(buffer, 0, buffer.Length)) != 0)
165
 
                                                        {
166
 
                                                                file2.Write(buffer, 0, bytesRead);
167
 
                                                                file2.Flush();
168
 
                                                        }
169
 
                                                }
170
 
                                        }
171
 
                                }
172
 
                                filename = newfilename;
173
 
                                finfo = new FileInfo(filename);
174
 
                                this.util.Message("done", true);
175
 
                        }
176
 
                        this.util.Message("Uploading URL's to database ... ", false);
177
 
                        int length = (int)finfo.Length;
178
 
                        finfo = null;
179
 
                        this.util.ProgressStart(length, String.Empty);
180
 
                        int i = 0;
181
 
                        System.Collections.Generic.List<string> records = new System.Collections.Generic.List<string>();
182
 
                        System.Collections.Generic.Dictionary<string, string[]> urls = new System.Collections.Generic.Dictionary<string, string[]>();
183
 
                        int amount = 0;
184
 
                        string record, hash1, hash2, record2 = String.Empty;
185
 
                        using (StreamReader reader = new StreamReader(filename))
186
 
                        {
187
 
                                using (ParseURLs parseurl = new ParseURLs())
188
 
                                {
189
 
                                        while (!reader.EndOfStream)
190
 
                                        {
191
 
                                                for (int j = 0; j < 25000; j++)
192
 
                                                {
193
 
                                                        record2 = reader.ReadLine();
194
 
                                                        if (record2 == null)
195
 
                                                        {
196
 
                                                                break;
197
 
                                                        }
198
 
                                                        if (records.Contains(record2))
199
 
                                                        {
200
 
                                                                continue;
201
 
                                                        }
202
 
                                                        if ((record2.StartsWith("www.")) && (records.Contains(record2.Substring(4))))
203
 
                                                        {
204
 
                                                                continue;
205
 
                                                        }
206
 
                                                        if ((!record2.StartsWith("www.")) && (record2.Contains("www." + record2)))
207
 
                                                        {
208
 
                                                                continue;
209
 
                                                        }
210
 
                                                        records.Add(record2);
211
 
                                                }
212
 
                                                //Remove bad URL's and create hash for each
213
 
                                                foreach (string record3 in records)
214
 
                                                {
215
 
                                                        record = ParseURLs.ParseURL(record3);
216
 
                                                        i ++;
217
 
                                                        if (record.Length == 0)
218
 
                                                        {
219
 
                                                                continue;
220
 
                                                        }
221
 
                                                        hash1 = parseurl.GetHash(record);
222
 
                                                        if (!record.StartsWith("www."))
223
 
                                                        {
224
 
                                                                hash2 = parseurl.GetHash("www." + record);
225
 
                                                        }
226
 
                                                        else
227
 
                                                        {
228
 
                                                                hash2 = parseurl.GetHash(record.Substring(4));
229
 
                                                        }
230
 
                                                        if (!urls.ContainsKey(record))
231
 
                                                        {
232
 
                                                                urls.Add(record, new string[] {hash1, hash2});
233
 
                                                        }
234
 
                                                        this.util.Curamount += record3.Length;
235
 
                                                }
236
 
                                                if (!fast)
237
 
                                                {
238
 
                                                        amount += this.db.InsertURLs(urls, false);
239
 
                                                }
240
 
                                                else
241
 
                                                {
242
 
                                                        amount += this.db.InsertURLs(urls, true);
243
 
                                                }
244
 
                                                urls.Clear();
245
 
                                                records.Clear();
246
 
                                                records.TrimExcess();
247
 
                                        }
248
 
                                }
249
 
                        }
250
 
                        records = null;
251
 
                        this.util.ProgressStop();
252
 
                        this.util.Message(String.Empty, true);
253
 
                        this.util.Message(amount.ToString() + " URL's was inserted from " + i.ToString() + " URL's.", true);
254
 
                }
255
 
                
256
 
                /// <summary>
257
 
                /// Function dispose unmanaged resources.
258
 
                /// </summary>
259
 
                public void Dispose()
260
 
                {
261
 
                        if (this.util != null)
262
 
                        {
263
 
                                this.util.Dispose();
264
 
                        }
265
 
                        this.disposed = true;
266
 
                }
267
 
        }
268
 
}
269