1
# DB_File.pm -- Perl 5 interface to Berkeley DB
3
# written by Paul Marquess (Paul.Marquess@btinternet.com)
4
# last modified 1st September 2002
7
# Copyright (c) 1995-2002 Paul Marquess. All rights reserved.
8
# This program is free software; you can redistribute it and/or
9
# modify it under the same terms as Perl itself.
12
package DB_File::HASHINFO ;
20
@DB_File::HASHINFO::ISA = qw(Tie::Hash);
53
return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
56
croak "${pkg}::FETCH - Unknown element '$key'" ;
66
my $type = $self->{VALID}{$key};
70
croak "Key '$key' not associated with a code reference"
71
if $type == 2 && !ref $value && ref $value ne 'CODE';
72
$self->{GOT}{$key} = $value ;
77
croak "${pkg}::STORE - Unknown element '$key'" ;
85
if ( exists $self->{VALID}{$key} )
87
delete $self->{GOT}{$key} ;
92
croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
100
exists $self->{VALID}{$key} ;
108
croak ref($self) . " does not define the method ${method}" ;
111
sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
112
sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
113
sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
115
package DB_File::RECNOINFO ;
120
@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
126
bless { VALID => { map {$_, 1}
127
qw( bval cachesize psize flags lorder reclen bfname )
133
package DB_File::BTREEINFO ;
138
@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
163
our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
164
our ($db_version, $use_XSLoader, $splice_end_array);
171
local $SIG{__WARN__} = sub {$splice_end_array = "@_";};
172
my @a =(1); splice(@a, 3);
174
($splice_end_array =~ /^splice\(\) offset past end of array at /);
177
#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
178
$DB_BTREE = new DB_File::BTREEINFO ;
179
$DB_HASH = new DB_File::HASHINFO ;
180
$DB_RECNO = new DB_File::RECNOINFO ;
187
{ local $SIG{__DIE__} ; eval { require XSLoader } ; }
192
@ISA = qw(DynaLoader);
196
push @ISA, qw(Tie::Hash Exporter);
198
$DB_BTREE $DB_HASH $DB_RECNO
233
($constname = $AUTOLOAD) =~ s/.*:://;
234
my ($error, $val) = constant($constname);
235
Carp::croak $error if $error;
237
*{$AUTOLOAD} = sub { $val };
243
# Make all Fcntl O_XXX constants available for importing
245
my @O = grep /^O_/, @Fcntl::EXPORT;
246
Fcntl->import(@O); # first we import what we want to export
251
{ XSLoader::load("DB_File", $VERSION)}
253
{ bootstrap DB_File $VERSION }
255
# Preloaded methods go here. Autoload methods go after __END__, and are
256
# processed by the autosplit program.
258
sub tie_hash_or_array
261
my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
263
$arg[4] = tied %{ $arg[4] }
264
if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
266
$arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
267
$arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
269
# make recno in Berkeley DB version 2 work like recno in version 1.
270
if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
271
$arg[1] and ! -e $arg[1]) {
272
open(FH, ">$arg[1]") or return undef ;
274
chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
277
DoTie_($tieHASH, @arg) ;
282
tie_hash_or_array(@_) ;
287
tie_hash_or_array(@_) ;
295
my $status = $self->seq($key, $value, R_FIRST());
298
while ($status == 0) {
300
$status = $self->seq($key, $value, R_NEXT());
302
foreach $key (reverse @keys) {
303
my $s = $self->del($key);
313
my $current_length = $self->length() ;
315
if ($length < $current_length) {
317
for ($key = $current_length - 1 ; $key >= $length ; -- $key)
320
elsif ($length > $current_length) {
321
$self->put($length-1, "") ;
330
if (not defined $offset) {
331
warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
335
my $length = @_ ? shift : 0;
336
# Carping about definedness comes _after_ the OFFSET sanity check.
337
# This is so we get the same error messages as Perl's splice().
342
my $size = $self->FETCHSIZE();
344
# 'If OFFSET is negative then it start that far from the end of
348
my $new_offset = $size + $offset;
349
if ($new_offset < 0) {
350
die "Modification of non-creatable array value attempted, "
351
. "subscript $offset";
353
$offset = $new_offset;
356
if (not defined $length) {
357
warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
361
if ($offset > $size) {
363
warnings::warnif('misc', 'splice() offset past end of array')
364
if $splice_end_array;
367
# 'If LENGTH is omitted, removes everything from OFFSET onward.'
368
if (not defined $length) {
369
$length = $size - $offset;
372
# 'If LENGTH is negative, leave that many elements off the end of
376
$length = $size - $offset + $length;
379
# The user must have specified a length bigger than the
380
# length of the array passed in. But perl's splice()
381
# doesn't catch this, it just behaves as for length=0.
387
if ($length > $size - $offset) {
388
$length = $size - $offset;
391
# $num_elems holds the current number of elements in the database.
392
my $num_elems = $size;
394
# 'Removes the elements designated by OFFSET and LENGTH from an
398
foreach (0 .. $length - 1) {
400
my $status = $self->get($offset, $old);
402
my $msg = "error from Berkeley DB on get($offset, \$old)";
404
$msg .= ' (no such element?)';
407
$msg .= ": error status $status";
408
if (defined $! and $! ne '') {
409
$msg .= ", message $!";
416
$status = $self->del($offset);
418
my $msg = "error from Berkeley DB on del($offset)";
420
$msg .= ' (no such element?)';
423
$msg .= ": error status $status";
424
if (defined $! and $! ne '') {
425
$msg .= ", message $!";
434
# ...'and replaces them with the elements of LIST, if any.'
436
while (defined (my $elem = shift @list)) {
439
if ($pos >= $num_elems) {
440
$status = $self->put($pos, $elem);
443
$status = $self->put($pos, $elem, $self->R_IBEFORE);
447
my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
449
$msg .= ' (no such element?)';
452
$msg .= ", error status $status";
453
if (defined $! and $! ne '') {
454
$msg .= ", message $!";
460
die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
468
# 'In list context, returns the elements removed from the
473
elsif (defined wantarray and not wantarray) {
474
# 'In scalar context, returns the last element removed, or
475
# undef if no elements are removed.'
478
my $last = pop @removed;
485
elsif (not defined wantarray) {
490
sub ::DB_File::splice { &SPLICE }
494
croak "Usage: \$db->find_dup(key,value)\n"
498
my ($origkey, $value_wanted) = @_ ;
499
my ($key, $value) = ($origkey, 0);
502
for ($status = $db->seq($key, $value, R_CURSOR() ) ;
504
$status = $db->seq($key, $value, R_NEXT() ) ) {
506
return 0 if $key eq $origkey and $value eq $value_wanted ;
514
croak "Usage: \$db->del_dup(key,value)\n"
518
my ($key, $value) = @_ ;
519
my ($status) = $db->find_dup($key, $value) ;
520
return $status if $status != 0 ;
522
$status = $db->del($key, R_CURSOR() ) ;
528
croak "Usage: \$db->get_dup(key [,flag])\n"
529
unless @_ == 2 or @_ == 3 ;
536
my $wantarray = wantarray ;
542
# iterate through the database until either EOF ($status == 0)
543
# or a different key is encountered ($key ne $origkey).
544
for ($status = $db->seq($key, $value, R_CURSOR()) ;
545
$status == 0 and $key eq $origkey ;
546
$status = $db->seq($key, $value, R_NEXT()) ) {
548
# save the value or count number of matches
551
{ ++ $values{$value} }
553
{ push (@values, $value) }
560
return ($wantarray ? ($flag ? %values : @values) : $counter) ;
569
DB_File - Perl5 access to Berkeley DB version 1.x
575
[$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
576
[$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
577
[$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
579
$status = $X->del($key [, $flags]) ;
580
$status = $X->put($key, $value [, $flags]) ;
581
$status = $X->get($key, $value [, $flags]) ;
582
$status = $X->seq($key, $value, $flags) ;
583
$status = $X->sync([$flags]) ;
587
$count = $X->get_dup($key) ;
588
@list = $X->get_dup($key) ;
589
%list = $X->get_dup($key, 1) ;
590
$status = $X->find_dup($key, $value) ;
591
$status = $X->del_dup($key, $value) ;
599
@r = $X->splice(offset, length, elements);
602
$old_filter = $db->filter_store_key ( sub { ... } ) ;
603
$old_filter = $db->filter_store_value( sub { ... } ) ;
604
$old_filter = $db->filter_fetch_key ( sub { ... } ) ;
605
$old_filter = $db->filter_fetch_value( sub { ... } ) ;
612
B<DB_File> is a module which allows Perl programs to make use of the
613
facilities provided by Berkeley DB version 1.x (if you have a newer
614
version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
615
It is assumed that you have a copy of the Berkeley DB manual pages at
616
hand when reading this documentation. The interface defined here
617
mirrors the Berkeley DB interface closely.
619
Berkeley DB is a C library which provides a consistent interface to a
620
number of database formats. B<DB_File> provides an interface to all
621
three of the database types currently supported by Berkeley DB.
629
This database type allows arbitrary key/value pairs to be stored in data
630
files. This is equivalent to the functionality provided by other
631
hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
632
the files created using DB_HASH are not compatible with any of the
633
other packages mentioned.
635
A default hashing algorithm, which will be adequate for most
636
applications, is built into Berkeley DB. If you do need to use your own
637
hashing algorithm it is possible to write your own in Perl and have
638
B<DB_File> use it instead.
642
The btree format allows arbitrary key/value pairs to be stored in a
643
sorted, balanced binary tree.
645
As with the DB_HASH format, it is possible to provide a user defined
646
Perl routine to perform the comparison of keys. By default, though, the
647
keys are stored in lexical order.
651
DB_RECNO allows both fixed-length and variable-length flat text files
652
to be manipulated using the same key/value pair interface as in DB_HASH
653
and DB_BTREE. In this case the key will consist of a record (line)
658
=head2 Using DB_File with Berkeley DB version 2 or greater
660
Although B<DB_File> is intended to be used with Berkeley DB version 1,
661
it can also be used with version 2, 3 or 4. In this case the interface is
662
limited to the functionality provided by Berkeley DB 1.x. Anywhere the
663
version 2 or greater interface differs, B<DB_File> arranges for it to work
664
like version 1. This feature allows B<DB_File> scripts that were built
665
with version 1 to be migrated to version 2 or greater without any changes.
667
If you want to make use of the new features available in Berkeley DB
668
2.x or greater, use the Perl module B<BerkeleyDB> instead.
670
B<Note:> The database file format has changed multiple times in Berkeley
671
DB version 2, 3 and 4. If you cannot recreate your databases, you
672
must dump any existing databases with either the C<db_dump> or the
673
C<db_dump185> utility that comes with Berkeley DB.
674
Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
675
your databases can be recreated using C<db_load>. Refer to the Berkeley DB
676
documentation for further details.
678
Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
681
=head2 Interface to Berkeley DB
683
B<DB_File> allows access to Berkeley DB files using the tie() mechanism
684
in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
685
allows B<DB_File> to access Berkeley DB files using either an
686
associative array (for DB_HASH & DB_BTREE file types) or an ordinary
687
array (for the DB_RECNO file type).
689
In addition to the tie() interface, it is also possible to access most
690
of the functions provided in the Berkeley DB API directly.
691
See L<THE API INTERFACE>.
693
=head2 Opening a Berkeley DB Database File
695
Berkeley DB uses the function dbopen() to open or create a database.
696
Here is the C prototype for dbopen():
699
dbopen (const char * file, int flags, int mode,
700
DBTYPE type, const void * openinfo)
702
The parameter C<type> is an enumeration which specifies which of the 3
703
interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
704
Depending on which of these is actually chosen, the final parameter,
705
I<openinfo> points to a data structure which allows tailoring of the
706
specific interface method.
708
This interface is handled slightly differently in B<DB_File>. Here is
709
an equivalent call using B<DB_File>:
711
tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
713
The C<filename>, C<flags> and C<mode> parameters are the direct
714
equivalent of their dbopen() counterparts. The final parameter $DB_HASH
715
performs the function of both the C<type> and C<openinfo> parameters in
718
In the example above $DB_HASH is actually a pre-defined reference to a
719
hash object. B<DB_File> has three of these pre-defined references.
720
Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
722
The keys allowed in each of these pre-defined references is limited to
723
the names used in the equivalent C structure. So, for example, the
724
$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
725
C<ffactor>, C<hash>, C<lorder> and C<nelem>.
727
To change one of these elements, just assign to it like this:
729
$DB_HASH->{'cachesize'} = 10000 ;
731
The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
732
usually adequate for most applications. If you do need to create extra
733
instances of these objects, constructors are available for each file
736
Here are examples of the constructors and the valid options available
737
for DB_HASH, DB_BTREE and DB_RECNO respectively.
739
$a = new DB_File::HASHINFO ;
747
$b = new DB_File::BTREEINFO ;
757
$c = new DB_File::RECNOINFO ;
766
The values stored in the hashes above are mostly the direct equivalent
767
of their C counterpart. Like their C counterparts, all are set to a
768
default values - that means you don't have to set I<all> of the
769
values when you only want to change one. Here is an example:
771
$a = new DB_File::HASHINFO ;
772
$a->{'cachesize'} = 12345 ;
773
tie %y, 'DB_File', "filename", $flags, 0777, $a ;
775
A few of the options need extra discussion here. When used, the C
776
equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
777
to C functions. In B<DB_File> these keys are used to store references
778
to Perl subs. Below are templates for each of the subs:
784
# return the hash value for $data
790
my ($key, $key2) = @_ ;
792
# return 0 if $key1 eq $key2
793
# -1 if $key1 lt $key2
794
# 1 if $key1 gt $key2
795
return (-1 , 0 or 1) ;
800
my ($key, $key2) = @_ ;
802
# return number of bytes of $key2 which are
803
# necessary to determine that it is greater than $key1
807
See L<Changing the BTREE sort order> for an example of using the
810
If you are using the DB_RECNO interface and you intend making use of
811
C<bval>, you should check out L<The 'bval' Option>.
813
=head2 Default Parameters
815
It is possible to omit some or all of the final 4 parameters in the
816
call to C<tie> and let them take default values. As DB_HASH is the most
817
common file format used, the call:
819
tie %A, "DB_File", "filename" ;
823
tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
825
It is also possible to omit the filename parameter as well, so the
832
tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
834
See L<In Memory Databases> for a discussion on the use of C<undef>
835
in place of a filename.
837
=head2 In Memory Databases
839
Berkeley DB allows the creation of in-memory databases by using NULL
840
(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
841
uses C<undef> instead of NULL to provide this functionality.
845
The DB_HASH file format is probably the most commonly used of the three
846
file formats that B<DB_File> supports. It is also very straightforward
849
=head2 A Simple Example
851
This example shows how to create a database, add key/value pairs to the
852
database, delete keys/value pairs and finally how to enumerate the
853
contents of the database.
861
tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
862
or die "Cannot open file 'fruit': $!\n";
864
# Add a few key/value pairs to the file
865
$h{"apple"} = "red" ;
866
$h{"orange"} = "orange" ;
867
$h{"banana"} = "yellow" ;
868
$h{"tomato"} = "red" ;
870
# Check for existence of a key
871
print "Banana Exists\n\n" if $h{"banana"} ;
873
# Delete a key/value pair.
876
# print the contents of the file
877
while (($k, $v) = each %h)
878
{ print "$k -> $v\n" }
890
Note that the like ordinary associative arrays, the order of the keys
891
retrieved is in an apparently random order.
895
The DB_BTREE format is useful when you want to store data in a given
896
order. By default the keys will be stored in lexical order, but as you
897
will see from the example shown in the next section, it is very easy to
898
define your own sorting function.
900
=head2 Changing the BTREE sort order
902
This script shows how to override the default sorting algorithm that
903
BTREE uses. Instead of using the normal lexical ordering, a case
904
insensitive compare function will be used.
914
my ($key1, $key2) = @_ ;
915
"\L$key1" cmp "\L$key2" ;
918
# specify the Perl sub that will do the comparison
919
$DB_BTREE->{'compare'} = \&Compare ;
922
tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
923
or die "Cannot open file 'tree': $!\n" ;
925
# Add a key/value pair to the file
926
$h{'Wall'} = 'Larry' ;
927
$h{'Smith'} = 'John' ;
928
$h{'mouse'} = 'mickey' ;
929
$h{'duck'} = 'donald' ;
934
# Cycle through the keys printing them in order.
935
# Note it is not necessary to sort the keys as
936
# the btree will have kept them in order automatically.
942
Here is the output from the code above.
948
There are a few point to bear in mind if you want to change the
949
ordering in a BTREE database:
955
The new compare function must be specified when you create the database.
959
You cannot change the ordering once the database has been created. Thus
960
you must use the same compare function every time you access the
965
Duplicate keys are entirely defined by the comparison function.
966
In the case-insensitive example above, the keys: 'KEY' and 'key'
967
would be considered duplicates, and assigning to the second one
968
would overwrite the first. If duplicates are allowed for (with the
969
R_DUPS flag discussed below), only a single copy of duplicate keys
970
is stored in the database --- so (again with example above) assigning
971
three values to the keys: 'KEY', 'Key', and 'key' would leave just
972
the first key: 'KEY' in the database with three values. For some
973
situations this results in information loss, so care should be taken
974
to provide fully qualified comparison functions when necessary.
975
For example, the above comparison routine could be modified to
976
additionally compare case-sensitively if two keys are equal in the
977
case insensitive comparison:
980
my($key1, $key2) = @_;
981
lc $key1 cmp lc $key2 ||
985
And now you will only have duplicates when the keys themselves
986
are truly the same. (note: in versions of the db library prior to
987
about November 1996, such duplicate keys were retained so it was
988
possible to recover the original keys in sets of keys that
994
=head2 Handling Duplicate Keys
996
The BTREE file type optionally allows a single key to be associated
997
with an arbitrary number of values. This option is enabled by setting
998
the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1000
There are some difficulties in using the tied hash interface if you
1001
want to manipulate a BTREE database with duplicate keys. Consider this
1008
my ($filename, %h) ;
1010
$filename = "tree" ;
1013
# Enable duplicate records
1014
$DB_BTREE->{'flags'} = R_DUP ;
1016
tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
1017
or die "Cannot open $filename: $!\n";
1019
# Add some key/value pairs to the file
1020
$h{'Wall'} = 'Larry' ;
1021
$h{'Wall'} = 'Brick' ; # Note the duplicate key
1022
$h{'Wall'} = 'Brick' ; # Note the duplicate key and value
1023
$h{'Smith'} = 'John' ;
1024
$h{'mouse'} = 'mickey' ;
1026
# iterate through the associative array
1027
# and print each key/value pair.
1028
foreach (sort keys %h)
1029
{ print "$_ -> $h{$_}\n" }
1041
As you can see 3 records have been successfully created with key C<Wall>
1042
- the only thing is, when they are retrieved from the database they
1043
I<seem> to have the same value, namely C<Larry>. The problem is caused
1044
by the way that the associative array interface works. Basically, when
1045
the associative array interface is used to fetch the value associated
1046
with a given key, it will only ever retrieve the first value.
1048
Although it may not be immediately obvious from the code above, the
1049
associative array interface can be used to write values with duplicate
1050
keys, but it cannot be used to read them back from the database.
1052
The way to get around this problem is to use the Berkeley DB API method
1053
called C<seq>. This method allows sequential access to key/value
1054
pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1055
and the API in general.
1057
Here is the script above rewritten using the C<seq> API method.
1063
my ($filename, $x, %h, $status, $key, $value) ;
1065
$filename = "tree" ;
1068
# Enable duplicate records
1069
$DB_BTREE->{'flags'} = R_DUP ;
1071
$x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
1072
or die "Cannot open $filename: $!\n";
1074
# Add some key/value pairs to the file
1075
$h{'Wall'} = 'Larry' ;
1076
$h{'Wall'} = 'Brick' ; # Note the duplicate key
1077
$h{'Wall'} = 'Brick' ; # Note the duplicate key and value
1078
$h{'Smith'} = 'John' ;
1079
$h{'mouse'} = 'mickey' ;
1081
# iterate through the btree using seq
1082
# and print each key/value pair.
1084
for ($status = $x->seq($key, $value, R_FIRST) ;
1086
$status = $x->seq($key, $value, R_NEXT) )
1087
{ print "$key -> $value\n" }
1100
This time we have got all the key/value pairs, including the multiple
1101
values associated with the key C<Wall>.
1103
To make life easier when dealing with duplicate keys, B<DB_File> comes with
1104
a few utility methods.
1106
=head2 The get_dup() Method
1108
The C<get_dup> method assists in
1109
reading duplicate values from BTREE databases. The method can take the
1112
$count = $x->get_dup($key) ;
1113
@list = $x->get_dup($key) ;
1114
%list = $x->get_dup($key, 1) ;
1116
In a scalar context the method returns the number of values associated
1117
with the key, C<$key>.
1119
In list context, it returns all the values which match C<$key>. Note
1120
that the values will be returned in an apparently random order.
1122
In list context, if the second parameter is present and evaluates
1123
TRUE, the method returns an associative array. The keys of the
1124
associative array correspond to the values that matched in the BTREE
1125
and the values of the array are a count of the number of times that
1126
particular value occurred in the BTREE.
1128
So assuming the database created above, we can use C<get_dup> like
1135
my ($filename, $x, %h) ;
1137
$filename = "tree" ;
1139
# Enable duplicate records
1140
$DB_BTREE->{'flags'} = R_DUP ;
1142
$x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
1143
or die "Cannot open $filename: $!\n";
1145
my $cnt = $x->get_dup("Wall") ;
1146
print "Wall occurred $cnt times\n" ;
1148
my %hash = $x->get_dup("Wall", 1) ;
1149
print "Larry is there\n" if $hash{'Larry'} ;
1150
print "There are $hash{'Brick'} Brick Walls\n" ;
1152
my @list = sort $x->get_dup("Wall") ;
1153
print "Wall => [@list]\n" ;
1155
@list = $x->get_dup("Smith") ;
1156
print "Smith => [@list]\n" ;
1158
@list = $x->get_dup("Dog") ;
1159
print "Dog => [@list]\n" ;
1164
Wall occurred 3 times
1166
There are 2 Brick Walls
1167
Wall => [Brick Brick Larry]
1171
=head2 The find_dup() Method
1173
$status = $X->find_dup($key, $value) ;
1175
This method checks for the existence of a specific key/value pair. If the
1176
pair exists, the cursor is left pointing to the pair and the method
1177
returns 0. Otherwise the method returns a non-zero value.
1179
Assuming the database from the previous example:
1185
my ($filename, $x, %h, $found) ;
1187
$filename = "tree" ;
1189
# Enable duplicate records
1190
$DB_BTREE->{'flags'} = R_DUP ;
1192
$x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
1193
or die "Cannot open $filename: $!\n";
1195
$found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1196
print "Larry Wall is $found there\n" ;
1198
$found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1199
print "Harry Wall is $found there\n" ;
1207
Harry Wall is not there
1210
=head2 The del_dup() Method
1212
$status = $X->del_dup($key, $value) ;
1214
This method deletes a specific key/value pair. It returns
1215
0 if they exist and have been deleted successfully.
1216
Otherwise the method returns a non-zero value.
1218
Again assuming the existence of the C<tree> database
1224
my ($filename, $x, %h, $found) ;
1226
$filename = "tree" ;
1228
# Enable duplicate records
1229
$DB_BTREE->{'flags'} = R_DUP ;
1231
$x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
1232
or die "Cannot open $filename: $!\n";
1234
$x->del_dup("Wall", "Larry") ;
1236
$found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1237
print "Larry Wall is $found there\n" ;
1244
Larry Wall is not there
1246
=head2 Matching Partial Keys
1248
The BTREE interface has a feature which allows partial keys to be
1249
matched. This functionality is I<only> available when the C<seq> method
1250
is used along with the R_CURSOR flag.
1252
$x->seq($key, $value, R_CURSOR) ;
1254
Here is the relevant quote from the dbopen man page where it defines
1255
the use of the R_CURSOR flag with seq:
1257
Note, for the DB_BTREE access method, the returned key is not
1258
necessarily an exact match for the specified key. The returned key
1259
is the smallest key greater than or equal to the specified key,
1260
permitting partial key matches and range searches.
1262
In the example script below, the C<match> sub uses this feature to find
1263
and print the first matching key/value pair given a partial key.
1270
my ($filename, $x, %h, $st, $key, $value) ;
1276
my $orig_key = $key ;
1277
$x->seq($key, $value, R_CURSOR) ;
1278
print "$orig_key\t-> $key\t-> $value\n" ;
1281
$filename = "tree" ;
1284
$x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
1285
or die "Cannot open $filename: $!\n";
1287
# Add some key/value pairs to the file
1288
$h{'mouse'} = 'mickey' ;
1289
$h{'Wall'} = 'Larry' ;
1290
$h{'Walls'} = 'Brick' ;
1291
$h{'Smith'} = 'John' ;
1295
print "IN ORDER\n" ;
1296
for ($st = $x->seq($key, $value, R_FIRST) ;
1298
$st = $x->seq($key, $value, R_NEXT) )
1300
{ print "$key -> $value\n" }
1302
print "\nPARTIAL MATCH\n" ;
1322
a -> mouse -> mickey
1326
DB_RECNO provides an interface to flat text files. Both variable and
1327
fixed length records are supported.
1329
In order to make RECNO more compatible with Perl, the array offset for
1330
all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
1332
As with normal Perl arrays, a RECNO array can be accessed using
1333
negative indexes. The index -1 refers to the last element of the array,
1334
-2 the second last, and so on. Attempting to access an element before
1335
the start of the array will raise a fatal run-time error.
1337
=head2 The 'bval' Option
1339
The operation of the bval option warrants some discussion. Here is the
1340
definition of bval from the Berkeley DB 1.85 recno manual page:
1342
The delimiting byte to be used to mark the end of a
1343
record for variable-length records, and the pad charac-
1344
ter for fixed-length records. If no value is speci-
1345
fied, newlines (``\n'') are used to mark the end of
1346
variable-length records and fixed-length records are
1349
The second sentence is wrong. In actual fact bval will only default to
1350
C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1351
openinfo parameter is used at all, the value that happens to be in bval
1352
will be used. That means you always have to specify bval when making
1353
use of any of the options in the openinfo parameter. This documentation
1354
error will be fixed in the next release of Berkeley DB.
1356
That clarifies the situation with regards Berkeley DB itself. What
1357
about B<DB_File>? Well, the behavior defined in the quote above is
1358
quite useful, so B<DB_File> conforms to it.
1360
That means that you can specify other options (e.g. cachesize) and
1361
still have bval default to C<"\n"> for variable length records, and
1362
space for fixed length records.
1364
Also note that the bval option only allows you to specify a single byte
1367
=head2 A Simple Example
1369
Here is a simple example that uses RECNO (if you are using a version
1370
of Perl earlier than 5.004_57 this example won't work -- see
1371
L<Extra RECNO Methods> for a workaround).
1377
my $filename = "text" ;
1381
tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
1382
or die "Cannot open file 'text': $!\n" ;
1384
# Add a few key/value pairs to the file
1389
push @h, "green", "black" ;
1391
my $elements = scalar @h ;
1392
print "The array contains $elements entries\n" ;
1395
print "popped $last\n" ;
1397
unshift @h, "white" ;
1398
my $first = shift @h ;
1399
print "shifted $first\n" ;
1401
# Check for existence of a key
1402
print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1404
# use a negative index
1405
print "The last element is $h[-1]\n" ;
1406
print "The 2nd last element is $h[-2]\n" ;
1410
Here is the output from the script:
1412
The array contains 5 entries
1415
Element 1 Exists with value blue
1416
The last element is green
1417
The 2nd last element is yellow
1419
=head2 Extra RECNO Methods
1421
If you are using a version of Perl earlier than 5.004_57, the tied
1422
array interface is quite limited. In the example script above
1423
C<push>, C<pop>, C<shift>, C<unshift>
1424
or determining the array length will not work with a tied array.
1426
To make the interface more useful for older versions of Perl, a number
1427
of methods are supplied with B<DB_File> to simulate the missing array
1428
operations. All these methods are accessed via the object returned from
1431
Here are the methods:
1435
=item B<$X-E<gt>push(list) ;>
1437
Pushes the elements of C<list> to the end of the array.
1439
=item B<$value = $X-E<gt>pop ;>
1441
Removes and returns the last element of the array.
1443
=item B<$X-E<gt>shift>
1445
Removes and returns the first element of the array.
1447
=item B<$X-E<gt>unshift(list) ;>
1449
Pushes the elements of C<list> to the start of the array.
1451
=item B<$X-E<gt>length>
1453
Returns the number of elements in the array.
1455
=item B<$X-E<gt>splice(offset, length, elements);>
1457
Returns a splice of the the array.
1461
=head2 Another Example
1463
Here is a more complete example that makes use of some of the methods
1464
described above. It also makes use of the API interface directly (see
1465
L<THE API INTERFACE>).
1469
my (@h, $H, $file, $i) ;
1477
$H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
1478
or die "Cannot open file $file: $!\n" ;
1480
# first create a text file to play with
1488
# Print the records in order.
1490
# The length method is needed here because evaluating a tied
1491
# array in a scalar context does not return the number of
1492
# elements in the array.
1494
print "\nORIGINAL\n" ;
1495
foreach $i (0 .. $H->length - 1) {
1496
print "$i: $h[$i]\n" ;
1499
# use the push & pop methods
1502
print "\nThe last record was [$a]\n" ;
1504
# and the shift & unshift methods
1506
$H->unshift("first") ;
1507
print "The first record was [$a]\n" ;
1509
# Use the API to add a new record after record 2.
1511
$H->put($i, "Newbie", R_IAFTER) ;
1513
# and a new record before record 1.
1515
$H->put($i, "New One", R_IBEFORE) ;
1520
# now print the records in reverse order
1521
print "\nREVERSE\n" ;
1522
for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1523
{ print "$i: $h[$i]\n" }
1525
# same again, but use the API functions instead
1526
print "\nREVERSE again\n" ;
1527
my ($s, $k, $v) = (0, 0, 0) ;
1528
for ($s = $H->seq($k, $v, R_LAST) ;
1530
$s = $H->seq($k, $v, R_PREV))
1531
{ print "$k: $v\n" }
1536
and this is what it outputs:
1545
The last record was [four]
1546
The first record was [zero]
1570
Rather than iterating through the array, C<@h> like this:
1574
it is necessary to use either this:
1576
foreach $i (0 .. $H->length - 1)
1580
for ($a = $H->get($k, $v, R_FIRST) ;
1582
$a = $H->get($k, $v, R_NEXT) )
1586
Notice that both times the C<put> method was used the record index was
1587
specified using a variable, C<$i>, rather than the literal value
1588
itself. This is because C<put> will return the record number of the
1589
inserted line via that parameter.
1593
=head1 THE API INTERFACE
1595
As well as accessing Berkeley DB using a tied hash or array, it is also
1596
possible to make direct use of most of the API functions defined in the
1597
Berkeley DB documentation.
1599
To do this you need to store a copy of the object returned from the tie.
1601
$db = tie %hash, "DB_File", "filename" ;
1603
Once you have done that, you can access the Berkeley DB API functions
1604
as B<DB_File> methods directly like this:
1606
$db->put($key, $value, R_NOOVERWRITE) ;
1608
B<Important:> If you have saved a copy of the object returned from
1609
C<tie>, the underlying database file will I<not> be closed until both
1610
the tied variable is untied and all copies of the saved object are
1614
$db = tie %hash, "DB_File", "filename"
1615
or die "Cannot tie filename: $!" ;
1620
See L<The untie() Gotcha> for more details.
1622
All the functions defined in L<dbopen> are available except for
1623
close() and dbopen() itself. The B<DB_File> method interface to the
1624
supported functions have been implemented to mirror the way Berkeley DB
1625
works whenever possible. In particular note that:
1631
The methods return a status value. All return 0 on success.
1632
All return -1 to signify an error and set C<$!> to the exact
1633
error code. The return code 1 generally (but not always) means that the
1634
key specified did not exist in the database.
1636
Other return codes are defined. See below and in the Berkeley DB
1637
documentation for details. The Berkeley DB documentation should be used
1638
as the definitive source.
1642
Whenever a Berkeley DB function returns data via one of its parameters,
1643
the equivalent B<DB_File> method does exactly the same.
1647
If you are careful, it is possible to mix API calls with the tied
1648
hash/array interface in the same piece of code. Although only a few of
1649
the methods used to implement the tied interface currently make use of
1650
the cursor, you should always assume that the cursor has been changed
1651
any time the tied hash/array interface is used. As an example, this
1652
code will probably not do what you expect:
1654
$X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1655
or die "Cannot tie $filename: $!" ;
1657
# Get the first key/value pair and set the cursor
1658
$X->seq($key, $value, R_FIRST) ;
1660
# this line will modify the cursor
1661
$count = scalar keys %x ;
1663
# Get the second key/value pair.
1664
# oops, it didn't, it got the last key/value pair!
1665
$X->seq($key, $value, R_NEXT) ;
1667
The code above can be rearranged to get around the problem, like this:
1669
$X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1670
or die "Cannot tie $filename: $!" ;
1672
# this line will modify the cursor
1673
$count = scalar keys %x ;
1675
# Get the first key/value pair and set the cursor
1676
$X->seq($key, $value, R_FIRST) ;
1678
# Get the second key/value pair.
1680
$X->seq($key, $value, R_NEXT) ;
1684
All the constants defined in L<dbopen> for use in the flags parameters
1685
in the methods defined below are also available. Refer to the Berkeley
1686
DB documentation for the precise meaning of the flags values.
1688
Below is a list of the methods available.
1692
=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
1694
Given a key (C<$key>) this method reads the value associated with it
1695
from the database. The value read from the database is returned in the
1696
C<$value> parameter.
1698
If the key does not exist the method returns 1.
1700
No flags are currently defined for this method.
1702
=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
1704
Stores the key/value pair in the database.
1706
If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
1707
will have the record number of the inserted key/value pair set.
1709
Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1712
=item B<$status = $X-E<gt>del($key [, $flags]) ;>
1714
Removes all key/value pairs with key C<$key> from the database.
1716
A return code of 1 means that the requested key was not in the
1719
R_CURSOR is the only valid flag at present.
1721
=item B<$status = $X-E<gt>fd ;>
1723
Returns the file descriptor for the underlying database.
1725
See L<Locking: The Trouble with fd> for an explanation for why you should
1726
not use C<fd> to lock your database.
1728
=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
1730
This interface allows sequential retrieval from the database. See
1731
L<dbopen> for full details.
1733
Both the C<$key> and C<$value> parameters will be set to the key/value
1734
pair read from the database.
1736
The flags parameter is mandatory. The valid flag values are R_CURSOR,
1737
R_FIRST, R_LAST, R_NEXT and R_PREV.
1739
=item B<$status = $X-E<gt>sync([$flags]) ;>
1741
Flushes any cached buffers to disk.
1743
R_RECNOSYNC is the only valid flag at present.
1749
A DBM Filter is a piece of code that is be used when you I<always>
1750
want to make the same transformation to all keys and/or values in a
1753
There are four methods associated with DBM Filters. All work identically,
1754
and each is used to install (or uninstall) a single DBM Filter. Each
1755
expects a single parameter, namely a reference to a sub. The only
1756
difference between them is the place that the filter is installed.
1762
=item B<filter_store_key>
1764
If a filter has been installed with this method, it will be invoked
1765
every time you write a key to a DBM database.
1767
=item B<filter_store_value>
1769
If a filter has been installed with this method, it will be invoked
1770
every time you write a value to a DBM database.
1773
=item B<filter_fetch_key>
1775
If a filter has been installed with this method, it will be invoked
1776
every time you read a key from a DBM database.
1778
=item B<filter_fetch_value>
1780
If a filter has been installed with this method, it will be invoked
1781
every time you read a value from a DBM database.
1785
You can use any combination of the methods, from none, to all four.
1787
All filter methods return the existing filter, if present, or C<undef>
1790
To delete a filter pass C<undef> to it.
1794
When each filter is called by Perl, a local copy of C<$_> will contain
1795
the key or value to be filtered. Filtering is achieved by modifying
1796
the contents of C<$_>. The return code from the filter is ignored.
1798
=head2 An Example -- the NULL termination problem.
1800
Consider the following scenario. You have a DBM database
1801
that you need to share with a third-party C application. The C application
1802
assumes that I<all> keys and values are NULL terminated. Unfortunately
1803
when Perl writes to DBM databases it doesn't use NULL termination, so
1804
your Perl application will have to manage NULL termination itself. When
1805
you write to the database you will have to use something like this:
1807
$hash{"$key\0"} = "$value\0" ;
1809
Similarly the NULL needs to be taken into account when you are considering
1810
the length of existing keys/values.
1812
It would be much better if you could ignore the NULL terminations issue
1813
in the main application code and have a mechanism that automatically
1814
added the terminating NULL to all keys and values whenever you write to
1815
the database and have them removed when you read from the database. As I'm
1816
sure you have already guessed, this is a problem that DBM Filters can
1824
my $filename = "/tmp/filt" ;
1827
my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1828
or die "Cannot open $filename: $!\n" ;
1830
# Install DBM Filters
1831
$db->filter_fetch_key ( sub { s/\0$// } ) ;
1832
$db->filter_store_key ( sub { $_ .= "\0" } ) ;
1833
$db->filter_fetch_value( sub { s/\0$// } ) ;
1834
$db->filter_store_value( sub { $_ .= "\0" } ) ;
1836
$hash{"abc"} = "def" ;
1837
my $a = $hash{"ABC"} ;
1842
Hopefully the contents of each of the filters should be
1843
self-explanatory. Both "fetch" filters remove the terminating NULL,
1844
and both "store" filters add a terminating NULL.
1847
=head2 Another Example -- Key is a C int.
1849
Here is another real-life example. By default, whenever Perl writes to
1850
a DBM database it always writes the key and value as strings. So when
1853
$hash{12345} = "soemthing" ;
1855
the key 12345 will get stored in the DBM database as the 5 byte string
1856
"12345". If you actually want the key to be stored in the DBM database
1857
as a C int, you will have to use C<pack> when writing, and C<unpack>
1860
Here is a DBM Filter that does it:
1866
my $filename = "/tmp/filt" ;
1870
my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1871
or die "Cannot open $filename: $!\n" ;
1873
$db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1874
$db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1875
$hash{123} = "def" ;
1880
This time only two filters have been used -- we only need to manipulate
1881
the contents of the key, so it wasn't necessary to install any value
1884
=head1 HINTS AND TIPS
1887
=head2 Locking: The Trouble with fd
1889
Until version 1.72 of this module, the recommended technique for locking
1890
B<DB_File> databases was to flock the filehandle returned from the "fd"
1891
function. Unfortunately this technique has been shown to be fundamentally
1892
flawed (Kudos to David Harris for tracking this down). Use it at your own
1895
The locking technique went like this.
1897
$db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0666)
1898
|| die "dbcreat /tmp/foo.db $!";
1900
open(DB_FH, "+<&=$fd") || die "dup $!";
1901
flock (DB_FH, LOCK_EX) || die "flock: $!";
1903
$db{"Tom"} = "Jerry" ;
1905
flock(DB_FH, LOCK_UN);
1910
In simple terms, this is what happens:
1916
Use "tie" to open the database.
1920
Lock the database with fd & flock.
1924
Read & Write to the database.
1928
Unlock and close the database.
1932
Here is the crux of the problem. A side-effect of opening the B<DB_File>
1933
database in step 2 is that an initial block from the database will get
1934
read from disk and cached in memory.
1936
To see why this is a problem, consider what can happen when two processes,
1937
say "A" and "B", both want to update the same B<DB_File> database
1938
using the locking steps outlined above. Assume process "A" has already
1939
opened the database and has a write lock, but it hasn't actually updated
1940
the database yet (it has finished step 2, but not started step 3 yet). Now
1941
process "B" tries to open the same database - step 1 will succeed,
1942
but it will block on step 2 until process "A" releases the lock. The
1943
important thing to notice here is that at this point in time both
1944
processes will have cached identical initial blocks from the database.
1946
Now process "A" updates the database and happens to change some of the
1947
data held in the initial buffer. Process "A" terminates, flushing
1948
all cached data to disk and releasing the database lock. At this point
1949
the database on disk will correctly reflect the changes made by process
1952
With the lock released, process "B" can now continue. It also updates the
1953
database and unfortunately it too modifies the data that was in its
1954
initial buffer. Once that data gets flushed to disk it will overwrite
1955
some/all of the changes process "A" made to the database.
1957
The result of this scenario is at best a database that doesn't contain
1958
what you expect. At worst the database will corrupt.
1960
The above won't happen every time competing process update the same
1961
B<DB_File> database, but it does illustrate why the technique should
1964
=head2 Safe ways to lock a database
1966
Starting with version 2.x, Berkeley DB has internal support for locking.
1967
The companion module to this one, B<BerkeleyDB>, provides an interface
1968
to this locking functionality. If you are serious about locking
1969
Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1971
If using B<BerkeleyDB> isn't an option, there are a number of modules
1972
available on CPAN that can be used to implement locking. Each one
1973
implements locking differently and has different goals in mind. It is
1974
therefore worth knowing the difference, so that you can pick the right
1975
one for your application. Here are the three locking wrappers:
1979
=item B<Tie::DB_Lock>
1981
A B<DB_File> wrapper which creates copies of the database file for
1982
read access, so that you have a kind of a multiversioning concurrent read
1983
system. However, updates are still serial. Use for databases where reads
1984
may be lengthy and consistency problems may occur.
1986
=item B<Tie::DB_LockFile>
1988
A B<DB_File> wrapper that has the ability to lock and unlock the database
1989
while it is being used. Avoids the tie-before-flock problem by simply
1990
re-tie-ing the database when you get or drop a lock. Because of the
1991
flexibility in dropping and re-acquiring the lock in the middle of a
1992
session, this can be massaged into a system that will work with long
1993
updates and/or reads if the application follows the hints in the POD
1996
=item B<DB_File::Lock>
1998
An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
1999
before tie-ing the database and drops the lock after the untie. Allows
2000
one to use the same lockfile for multiple databases to avoid deadlock
2001
problems, if desired. Use for databases where updates are reads are
2002
quick and simple flock locking semantics are enough.
2006
=head2 Sharing Databases With C Applications
2008
There is no technical reason why a Berkeley DB database cannot be
2009
shared by both a Perl and a C application.
2011
The vast majority of problems that are reported in this area boil down
2012
to the fact that C strings are NULL terminated, whilst Perl strings are
2013
not. See L<DBM FILTERS> for a generic way to work around this problem.
2015
Here is a real example. Netscape 2.0 keeps a record of the locations you
2016
visit along with the time you last visited them in a DB_HASH database.
2017
This is usually stored in the file F<~/.netscape/history.db>. The key
2018
field in the database is the location string and the value field is the
2019
time the location was last visited stored as a 4 byte binary value.
2021
If you haven't already guessed, the location string is stored with a
2022
terminating NULL. This means you need to be careful when accessing the
2025
Here is a snippet of code that is loosely based on Tom Christiansen's
2026
I<ggh> script (available from your nearest CPAN archive in
2027
F<authors/id/TOMC/scripts/nshist.gz>).
2034
my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
2035
$dotdir = $ENV{HOME} || $ENV{LOGNAME};
2037
$HISTORY = "$dotdir/.netscape/history.db";
2039
tie %hist_db, 'DB_File', $HISTORY
2040
or die "Cannot open $HISTORY: $!\n" ;;
2042
# Dump the complete database
2043
while ( ($href, $binary_time) = each %hist_db ) {
2045
# remove the terminating NULL
2046
$href =~ s/\x00$// ;
2048
# convert the binary time into a user friendly string
2049
$date = localtime unpack("V", $binary_time);
2050
print "$date $href\n" ;
2053
# check for the existence of a specific key
2054
# remember to add the NULL
2055
if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2056
$date = localtime unpack("V", $binary_time) ;
2057
print "Last visited mox.perl.com on $date\n" ;
2060
print "Never visited mox.perl.com\n"
2065
=head2 The untie() Gotcha
2067
If you make use of the Berkeley DB API, it is I<very> strongly
2068
recommended that you read L<perltie/The untie Gotcha>.
2070
Even if you don't currently make use of the API interface, it is still
2073
Here is an example which illustrates the problem from a B<DB_File>
2082
$X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2083
or die "Cannot tie first time: $!" ;
2089
tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2090
or die "Cannot tie second time: $!" ;
2094
When run, the script will produce this error message:
2096
Cannot tie second time: Invalid argument at bad.file line 14.
2098
Although the error message above refers to the second tie() statement
2099
in the script, the source of the problem is really with the untie()
2100
statement that precedes it.
2102
Having read L<perltie> you will probably have already guessed that the
2103
error is caused by the extra copy of the tied object stored in C<$X>.
2104
If you haven't, then the problem boils down to the fact that the
2105
B<DB_File> destructor, DESTROY, will not be called until I<all>
2106
references to the tied object are destroyed. Both the tied variable,
2107
C<%x>, and C<$X> above hold a reference to the object. The call to
2108
untie() will destroy the first, but C<$X> still holds a valid
2109
reference, so the destructor will not get called and the database file
2110
F<tst.fil> will remain open. The fact that Berkeley DB then reports the
2111
attempt to open a database that is already open via the catch-all
2112
"Invalid argument" doesn't help.
2114
If you run the script with the C<-w> flag the error message becomes:
2116
untie attempted while 1 inner references still exist at bad.file line 12.
2117
Cannot tie second time: Invalid argument at bad.file line 14.
2119
which pinpoints the real problem. Finally the script can now be
2120
modified to fix the original problem by destroying the API object
2129
$X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2133
=head1 COMMON QUESTIONS
2135
=head2 Why is there Perl source in my database?
2137
If you look at the contents of a database file created by DB_File,
2138
there can sometimes be part of a Perl script included in it.
2140
This happens because Berkeley DB uses dynamic memory to allocate
2141
buffers which will subsequently be written to the database file. Being
2142
dynamic, the memory could have been used for anything before DB
2143
malloced it. As Berkeley DB doesn't clear the memory once it has been
2144
allocated, the unused portions will contain random junk. In the case
2145
where a Perl script gets written to the database, the random junk will
2146
correspond to an area of dynamic memory that happened to be used during
2147
the compilation of the script.
2149
Unless you don't like the possibility of there being part of your Perl
2150
scripts embedded in a database file, this is nothing to worry about.
2152
=head2 How do I store complex data structures with DB_File?
2154
Although B<DB_File> cannot do this directly, there is a module which
2155
can layer transparently over B<DB_File> to accomplish this feat.
2157
Check out the MLDBM module, available on CPAN in the directory
2158
F<modules/by-module/MLDBM>.
2160
=head2 What does "Invalid Argument" mean?
2162
You will get this error message when one of the parameters in the
2163
C<tie> call is wrong. Unfortunately there are quite a few parameters to
2164
get wrong, so it can be difficult to figure out which one it is.
2166
Here are a couple of possibilities:
2172
Attempting to reopen a database without closing it.
2176
Using the O_WRONLY flag.
2180
=head2 What does "Bareword 'DB_File' not allowed" mean?
2182
You will encounter this particular error message when you have the
2183
C<strict 'subs'> pragma (or the full strict pragma) in your script.
2184
Consider this script:
2190
tie %x, DB_File, "filename" ;
2192
Running it produces the error in question:
2194
Bareword "DB_File" not allowed while "strict subs" in use
2196
To get around the error, place the word C<DB_File> in either single or
2197
double quotes, like this:
2199
tie %x, "DB_File", "filename" ;
2201
Although it might seem like a real pain, it is really worth the effort
2202
of having a C<use strict> in all your scripts.
2206
Articles that are either about B<DB_File> or make use of it.
2212
I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2213
Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2219
Moved to the Changes file.
2223
Some older versions of Berkeley DB had problems with fixed length
2224
records using the RECNO file format. This problem has been fixed since
2225
version 1.85 of Berkeley DB.
2227
I am sure there are bugs in the code. If you do find any, or can
2228
suggest any enhancements, I would welcome your comments.
2232
B<DB_File> comes with the standard Perl source distribution. Look in
2233
the directory F<ext/DB_File>. Given the amount of time between releases
2234
of Perl the version that ships with Perl is quite likely to be out of
2235
date, so the most recent version can always be found on CPAN (see
2236
L<perlmod/CPAN> for details), in the directory
2237
F<modules/by-module/DB_File>.
2239
This version of B<DB_File> will work with either version 1.x, 2.x or
2240
3.x of Berkeley DB, but is limited to the functionality provided by
2243
The official web site for Berkeley DB is F<http://www.sleepycat.com>.
2244
All versions of Berkeley DB are available there.
2246
Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2247
archive in F<src/misc/db.1.85.tar.gz>.
2249
If you are running IRIX, then get Berkeley DB version 1 from
2250
F<http://reality.sgi.com/ariel>. It has the patches necessary to
2251
compile properly on IRIX 5.3.
2255
Copyright (c) 1995-2002 Paul Marquess. All rights reserved. This program
2256
is free software; you can redistribute it and/or modify it under the
2257
same terms as Perl itself.
2259
Although B<DB_File> is covered by the Perl license, the library it
2260
makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2261
copyright and its own license. Please take the time to read it.
2263
Here are are few words taken from the Berkeley DB FAQ (at
2264
F<http://www.sleepycat.com>) regarding the license:
2266
Do I have to license DB to use it in Perl scripts?
2268
No. The Berkeley DB license requires that software that uses
2269
Berkeley DB be freely redistributable. In the case of Perl, that
2270
software is Perl, and not your scripts. Any Perl scripts that you
2271
write are your property, including scripts that make use of
2272
Berkeley DB. Neither the Perl license nor the Berkeley DB license
2273
place any restriction on what you may do with them.
2275
If you are in any doubt about the license situation, contact either the
2276
Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
2281
L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2286
The DB_File interface was written by Paul Marquess
2287
E<lt>Paul.Marquess@btinternet.comE<gt>.
2288
Questions about the DB system itself may be addressed to
2289
E<lt>db@sleepycat.com<gt>.