1
//Please refer to http://dansguardian.org/?page=copyright
2
//for the license for this code.
3
//Written by Daniel Barron (daniel@//jadeb/.com).
4
//For support go to http://groups.yahoo.com/group/dansguardian
6
// This program is free software; you can redistribute it and/or modify
7
// it under the terms of the GNU General Public License as published by
8
// the Free Software Foundation; either version 2 of the License, or
9
// (at your option) any later version.
11
// This program is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
// GNU General Public License for more details.
16
// You should have received a copy of the GNU General Public License
17
// along with this program; if not, write to the Free Software
18
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
#include "HTTPHeader.hpp"
26
#include "OptionContainer.hpp"
46
#define __DGHEADER_SENDALL 0
47
#define __DGHEADER_SENDFIRSTLINE 1
48
#define __DGHEADER_SENDREST 2
53
extern OptionContainer o;
58
DataBuffer::DataBuffer():data(new char[1]), buffer_length(0), compresseddata(NULL), compressed_buffer_length(0),
59
tempfilesize(0), dontsendbody(false), tempfilefd(-1), timeout(20), bytesalreadysent(0), preservetemp(false)
64
DataBuffer::DataBuffer(const void* indata, off_t length):data(new char[length]), buffer_length(length), compresseddata(NULL), compressed_buffer_length(0),
65
tempfilesize(0), dontsendbody(false), tempfilefd(-1), timeout(20), bytesalreadysent(0), preservetemp(false)
67
memcpy(data, indata, length);
70
void DataBuffer::reset()
75
delete[]compresseddata;
76
compresseddata = NULL;
78
compressed_buffer_length = 0;
79
if (tempfilefd > -1) {
82
unlink(tempfilepath.toCharArray());
93
// delete the memory block when the class is destroyed
94
DataBuffer::~DataBuffer()
97
if (compresseddata != NULL) {
98
delete[]compresseddata;
99
compresseddata = NULL;
101
if (tempfilefd > -1) {
104
unlink(tempfilepath.toCharArray());
111
// swap back to a compressed version of the data, if one exits
112
// also delete uncompressed version
113
// if body was decompressed but not modified, this can save bandwidth
114
void DataBuffer::swapbacktocompressed()
116
if (compresseddata != NULL && compressed_buffer_length > 0) {
118
buffer_length = compressed_buffer_length;
119
data = compresseddata;
120
compresseddata = NULL;
121
compressed_buffer_length = 0;
125
// a much more efficient reader that does not assume the contents of
126
// the buffer gets filled thus reducing memcpy()ing and new()ing
127
int DataBuffer::bufferReadFromSocket(Socket * sock, char *buffer, int size, int sockettimeout)
132
rc = sock->readFromSocket(&buffer[pos], size - pos, 0, sockettimeout);
134
// none recieved or an error
136
return pos; // some was recieved previous into buffer
138
return rc; // just return with the return code
142
return size; // full buffer
145
// a much more efficient reader that does not assume the contents of
146
// the buffer gets filled thus reducing memcpy()ing and new()ing.
147
// in addition to the actual socket timeout, used for each individual read, this version
148
// incorporates a "global" timeout within which all reads must complete.
149
int DataBuffer::bufferReadFromSocket(Socket * sock, char *buffer, int size, int sockettimeout, int timeout)
154
struct timeval starttime;
155
struct timeval nowadays;
156
gettimeofday(&starttime, NULL);
158
rc = sock->readFromSocket(&buffer[pos], size - pos, 0, sockettimeout, false);
160
// none recieved or an error
162
return pos; // some was recieved previous into buffer
164
return rc; // just return with the return code
167
gettimeofday(&nowadays, NULL);
168
if (nowadays.tv_sec - starttime.tv_sec > timeout) {
170
std::cout << "buffered socket read more than timeout" << std::endl;
172
return pos; // just return how much got so far then
175
return size; // full buffer
178
// make a temp file and return its FD. only currently used in DM plugins.
179
int DataBuffer::getTempFileFD()
181
if (tempfilefd > -1) {
184
tempfilepath = o.download_dir.c_str();
185
tempfilepath += "/tfXXXXXX";
186
char *tempfilepatharray = new char[tempfilepath.length() + 1];
187
strcpy(tempfilepatharray, tempfilepath.toCharArray());
188
if ((tempfilefd = mkstemp(tempfilepatharray)) < 0) {
190
std::cerr << "error creating temp " << tempfilepath << ": " << strerror(errno) << std::endl;
192
syslog(LOG_ERR, "Could not create temp file to store download for scanning: %s", strerror(errno));
196
tempfilepath = tempfilepatharray;
198
delete[]tempfilepatharray;
202
// check the client's user agent, see if we have a DM plugin compatible with it, and use it to download the body of the given request
203
bool DataBuffer::in(Socket * sock, Socket * peersock, HTTPHeader * requestheader, HTTPHeader * docheader, bool runav, int *headersent)
205
//Socket *sock = where to read from
206
//Socket *peersock = browser to send stuff to for keeping it alive
207
//HTTPHeader *requestheader = header client used to request
208
//HTTPHeader *docheader = header used for sending first line of reply
209
//bool runav = to determine if limit is av or not
210
//int *headersent = to use to send the first line of header if needed
211
// or to mark that the header has already been sent
213
// so we know if we only partially downloaded from
214
// squid so later, if allowed, we can send the rest
217
// match request to download manager so browsers potentially can have a prettier version
218
// and software updates, stream clients, etc. can have a compatible version.
223
for (std::deque<Plugin *>::iterator i = o.dmplugins_begin; i != o.dmplugins_end; i++) {
224
if ((i + 1) == o.dmplugins_end) {
226
std::cerr << "Got to final download manager so defaulting to always match." << std::endl;
228
dm_plugin = (DMPlugin*)(*i);
229
rc = dm_plugin->in(this, sock, peersock, requestheader, docheader, runav, headersent, &toobig);
232
if (((DMPlugin*)(*i))->willHandle(requestheader, docheader)) {
234
std::cerr << "Matching download manager number: " << j << std::endl;
236
dm_plugin = (DMPlugin*)(*i);
237
rc = dm_plugin->in(this, sock, peersock, requestheader, docheader, runav, headersent, &toobig);
245
// we should check rc and log on error/warn
246
// note for later - Tue 16th November 2004
250
// send the request body to the client after having been handled by a DM plugin
251
void DataBuffer::out(Socket * sock) throw(exception)
255
std::cout << "dontsendbody true; not sending" << std::endl;
259
(*sock).readyForOutput(timeout); // exceptions on timeout or error
261
if (tempfilefd > -1) {
262
// must have been too big for ram so stream from disk in blocks
264
std::cout << "Sending " << tempfilesize - bytesalreadysent << " bytes from temp file (" << bytesalreadysent << " already sent)" << std::endl;
266
off_t sent = bytesalreadysent;
268
lseek(tempfilefd, bytesalreadysent, SEEK_SET);
269
while (sent < tempfilesize) {
270
rc = readEINTR(tempfilefd, data, buffer_length);
272
std::cout << "reading temp file rc:" << rc << std::endl;
276
std::cout << "error reading temp file so throwing exception" << std::endl;
282
std::cout << "got zero bytes reading temp file" << std::endl;
284
break; // should never happen
286
// as it's cached to disk the buffer must be reasonably big
287
if (!(*sock).writeToSocket(data, rc, 0, timeout)) {
292
std::cout << "total sent from temp:" << sent << std::endl;
298
unlink(tempfilepath.toCharArray());
301
std::cout << "Sending " << buffer_length - bytesalreadysent << " bytes from RAM (" << buffer_length << " in buffer; " << bytesalreadysent << " already sent)" << std::endl;
303
// it's in RAM, so just send it, no streaming from disk
304
if (buffer_length != 0) {
305
if (!(*sock).writeToSocket(data + bytesalreadysent, buffer_length - bytesalreadysent, 0, timeout))
308
if (!sock->writeToSocket("\r\n\r\n", 4, 0, timeout))
314
// zlib decompression
315
void DataBuffer::zlibinflate(bool header)
317
if (buffer_length < 12) {
318
return; // it can't possibly be zlib'd
321
std::cout << "compressed size:" << buffer_length << std::endl;
324
#if ZLIB_VERNUM < 0x1210
325
#warning ************************************
326
#warning For gzip support you need zlib 1.2.1
327
#warning or later to be installed.
328
#warning You can ignore this warning but
329
#warning internet bandwidth may be wasted.
330
#warning ************************************
336
int newsize = buffer_length * 5; // good estimate of deflated HTML
338
char *block = new char[newsize];
344
d_stream.zalloc = (alloc_func) 0;
345
d_stream.zfree = (free_func) 0;
346
d_stream.opaque = (voidpf) 0;
347
d_stream.next_in = (Bytef *) data;
348
d_stream.avail_in = buffer_length;
349
d_stream.next_out = (Bytef *) block;
350
d_stream.avail_out = newsize;
352
// inflate either raw zlib, or possibly gzip with a header
354
err = inflateInit2(&d_stream, 15 + 32);
356
err = inflateInit2(&d_stream, -15);
359
if (err != Z_OK) { // was a problem so just return
360
delete[]block; // don't forget to free claimed memory
362
std::cerr << "bad init inflate: " << err << std::endl;
368
std::cerr << "inflate loop" << std::endl;
370
err = inflate(&d_stream, Z_SYNC_FLUSH);
371
bytesgot = d_stream.total_out;
372
if (err == Z_STREAM_END) {
373
err = inflateEnd(&d_stream);
377
std::cerr << "bad inflateEnd: " << d_stream.msg << std::endl;
383
if (err != Z_OK) { // was a problem so just return
384
delete[]block; // don't forget to free claimed memory
386
std::cerr << "bad inflate: " << d_stream.msg << std::endl;
388
err = inflateEnd(&d_stream);
391
std::cerr << "bad inflateEnd: " << d_stream.msg << std::endl;
396
if (bytesgot > o.max_content_filter_size) {
397
delete[]block; // don't forget to free claimed memory
399
std::cerr << "inflated file larger than maxcontentfiltersize, not inflating further" << std::endl;
401
err = inflateEnd(&d_stream);
404
std::cerr << "bad inflateEnd: " << d_stream.msg << std::endl;
410
// inflation is going ok, but we don't have enough room in the output buffer
411
newsize = bytesgot * 2;
412
temp = new char[newsize];
413
memcpy(temp, block, bytesgot);
418
d_stream.next_out = (Bytef *) (block + bytesgot);
419
d_stream.avail_out = newsize - bytesgot;
421
compresseddata = data;
422
compressed_buffer_length = buffer_length;
423
buffer_length = bytesgot;
425
std::cout << "decompressed size: " << buffer_length << std::endl;
427
data = new char[bytesgot+1];
428
data[bytesgot] = '\0';
429
memcpy(data, block, bytesgot);
433
// Does a regexp search and replace.
434
typedef struct newreplacement
439
bool DataBuffer::contentRegExp(int filtergroup)
443
std::cout << "Starting content reg exp replace" << std::endl;
445
bool contentmodified = false;
447
unsigned int j, k, m;
448
unsigned int s = (*o.fg[filtergroup]).content_regexp_list_comp.size();
449
unsigned int matches;
450
unsigned int submatch, submatches;
458
unsigned int nextoffset;
459
unsigned int matchlen;
461
std::queue<newreplacement*> matchqueue;
463
for (i = 0; i < s; i++) {
464
re = &((*o.fg[filtergroup]).content_regexp_list_comp[i]);
465
if (re->match(data)) {
466
replacement = &((*o.fg[filtergroup]).content_regexp_list_rep[i]);
467
//replen = replacement->length();
468
matches = re->numberOfMatches();
472
for (j = 0; j < matches; j++) {
473
srcoff = re->offset(j);
474
matchlen = re->length(j);
476
// Count matches for ()'s
477
for (submatches = 0; j+submatches+1 < matches; submatches++)
478
if (re->offset(j+submatches+1) + re->length(j+submatches+1) > srcoff + matchlen)
481
// \1 and $1 replacement
483
// store match no. and default (empty) replacement string
484
newreplacement* newrep = new newreplacement;
486
newrep->replacement = "";
487
// iterate over regex's replacement string
488
for (k = 0; k < replacement->length(); k++) {
489
// look for \1..\9 and $1..$9
490
if (((*replacement)[k] == '\\' || (*replacement)[k] == '$') && (*replacement)[k+1] >= '1' && (*replacement)[k+1] <= '9') {
491
// determine match number
492
submatch = (*replacement)[++k] - '0';
493
// add submatch contents to replacement string
494
if (submatch <= submatches) {
495
newrep->replacement += re->result(j + submatch).c_str();
498
// unescape \\ and \$, and add other non-backreference characters
499
if ((*replacement)[k] == '\\' && ((*replacement)[k+1] == '\\' || (*replacement)[k+1] == '$'))
501
newrep->replacement += replacement->subString(k, 1);
504
matchqueue.push(newrep);
506
// update size difference between original and modified content
507
sizediff -= re->length(j);
508
sizediff += newrep->replacement.length();
509
// skip submatches to next top level match
514
// now we know eventual size of content-replaced block, allocate memory for it
515
newblock = new char[buffer_length + sizediff + 1];
516
newblock[buffer_length + sizediff] = '\0';
522
std::cout << "content matches:" << matches << std::endl;
524
// replace top-level matches using filled-out replacement strings
525
newreplacement* newrep;
526
for (j = 0; j < matches; j++) {
527
newrep = matchqueue.front();
528
nextoffset = re->offset(newrep->match);
529
if (nextoffset > srcoff) {
530
memcpy(dstpos, data + srcoff, nextoffset - srcoff);
531
dstpos += nextoffset - srcoff;
534
replen = newrep->replacement.length();
535
memcpy(dstpos, newrep->replacement.toCharArray(), replen);
537
srcoff += re->length(newrep->match);
541
if (srcoff < buffer_length) {
542
memcpy(dstpos, data + srcoff, buffer_length - srcoff);
546
buffer_length = buffer_length + sizediff;
547
contentmodified = true;
550
return contentmodified;