1
/* $Id: esort.h,v 1.9 2003/07/01 14:05:52 terpstra Exp $
3
* esort.h - Public interface to libesort
5
* Copyright (C) 2002 - Wesley W. Terpstra
9
* Authors: 'Wesley W. Terpstra' <wesley@terpstra.ca>
11
* This program is free software; you can redistribute it and/or modify
12
* it under the terms of the GNU General Public License as published by
13
* the Free Software Foundation; version 2.1.
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
20
* You should have received a copy of the GNU General Public License
21
* along with this program; if not, write to the Free Software
22
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31
/* What is ESort? -- An Online External Sort
33
* ESort provides a very primitive database API: An insert-only set of keys.
35
* There are very different trade-offs when compared with a standard database.
37
* N = number of keys in the set, M = number of operations
38
* The seeks needed to for each repeated operation in one transaction:
41
* Insertion O(1) O(M*log(N)) O(M)
42
* Read in sequence O(1) O(M) impossible
43
* Seek to key O(M*log^2(N)) O(M*log(N)) O(M)
44
* Delete impossible O(M*log(N)) O(M)
46
* From this table, one can conclude that ESort allows blindly fast insertion
47
* and sequential read, but pays in the cost to seek. This is what makes it
48
* highly appropriate for keyword indexes.
50
* An additional restriction is that ESort only supports a single-writer.
51
* An additional advantage is that ESort readers get snap-shots.
60
/** These parameters specify minimum values required for a database.
61
* If an existing database has too small a value, an error is returned.
62
* The unique flag toggles whether the database is a set or multiset.
67
unsigned int version_;
68
unsigned long blockSize_;
69
unsigned long keySize_;
72
unsigned int keyWidth_;
75
// keySize & blockSize are the number of bytes maximum
79
unsigned int version = 1,
80
unsigned long blockSize = 8192,
81
unsigned long keySize = 255);
83
bool isWider(const Parameters& o);
85
unsigned int version () const { return version_; }
86
unsigned long blockSize() const { return blockSize_; }
87
unsigned long keySize () const { return keySize_; }
88
bool unique () const { return unique_; }
89
bool synced () const { return synced_; }
90
unsigned int keyWidth () const { return keyWidth_; }
92
static Parameters minimize(const Parameters& x)
94
return Parameters(x.synced(), x.unique(), 1, 8, 1);
98
/** The direction in which the Walker walks the database.
100
* A Forward Query will find the first key >= the request
101
* A Backward Query will find the last key < the request
103
* The Forward Query moves the iterator so the key increases
104
* The Backward Query moves the iterator so the key decreases
106
* In this manner Forward + Backward patition the database's keys
114
/** The Walker class walks the database in sorted order.
115
* As from the chart above, each invokation of advance() pays 0 seeks.
120
/** The current key which the walker points to.
121
* This is not valid until advance is called once.
125
/** Advance to the next key in the database.
126
* Returned is the number of bytes in common with the last key.
127
* -1 is returned on error, errno = 0 -> reached EOF.
129
virtual int advance() = 0;
134
/** A Reader object allows you to obtain Walkers.
136
* A Reader is atomic. When you obtain a Reader, the contents of the set
137
* remain fixed for as long as you keep a hold of the handle. Furthermore,
138
* a Reader will take snapshots of the database only between Writer commits.
145
/** Obtain a Walker as described in Direction.
146
* This costs O(log^2(N)) seeks, so try not to call it too often.
148
* This operation never fails; you must call advance() before use.
150
virtual auto_ptr<Walker> seek(const string& k, Direction dir) = 0;
152
/** Obtain a record Walker over only those records which are
153
* prefixed by 'prefix'. When the last such is passed, eof is
156
* This operation never failes; you must call advance() before use.
158
virtual auto_ptr<Walker> seek(const string& prefix, const string& k, Direction dir) = 0;
160
/** Open an existing database for reading.
161
* 0 is returned and errno set on error.
163
static auto_ptr<Reader> opendb(
165
const Parameters& p = Parameters::minimize(Parameters()));
168
/** The Writer object allows you to insert keys.
170
* The Writer is atomic. When a key is inserted, it immediately becomes
171
* available in all Walkers & Readers obtained from the Writer. However,
172
* all open Readers never see the inserted keys, and new Readers do not
173
* see them until the Writer calls commit.
175
class Writer : public Reader
178
/** Insert a key into the database.
179
* If an error occures, -1 is returned and errno set, else 0.
180
* Be sure the key is smaller than keySize on pain of EINVAL.
182
virtual int insert(const string& k) = 0;
184
/** Commit the changes to the database.
185
* If an error occures, -1 is returned and errno set, else 0.
187
virtual int commit() = 0;
189
/** Open a database for writing.
190
* It is created if it did not exist.
192
* The mode is identical to open(2).
194
* 0 is returned and errno set on error.
196
static auto_ptr<Writer> opendb(
197
const string& db, const Parameters& p = Parameters(),