3
# DBD::CSV - A DBI driver for CSV and similar structured files
5
# This module is currently maintained by
8
# <jeff@vpservices.com>
10
# The original author is Jochen Wiedmann.
12
# Copyright (C) 1998 by Jochen Wiedmann
14
# All rights reserved.
16
# You may distribute this module under the terms of either the GNU
17
# General Public License or the Artistic License, as specified in
18
# the Perl README file.
32
use vars qw(@ISA $VERSION $drh $err $errstr $sqlstate);
38
$err = 0; # holds error code for DBI::err
39
$errstr = ""; # holds error string for DBI::errstr
40
$sqlstate = ""; # holds error state for DBI::state
41
$drh = undef; # holds driver handle once initialised
44
package DBD::CSV::dr; # ====== DRIVER ======
48
use vars qw(@ISA @CSV_TYPES);
51
Text::CSV_XS::IV(), # SQL_TINYINT
52
Text::CSV_XS::IV(), # SQL_BIGINT
53
Text::CSV_XS::PV(), # SQL_LONGVARBINARY
54
Text::CSV_XS::PV(), # SQL_VARBINARY
55
Text::CSV_XS::PV(), # SQL_BINARY
56
Text::CSV_XS::PV(), # SQL_LONGVARCHAR
57
Text::CSV_XS::PV(), # SQL_ALL_TYPES
58
Text::CSV_XS::PV(), # SQL_CHAR
59
Text::CSV_XS::NV(), # SQL_NUMERIC
60
Text::CSV_XS::NV(), # SQL_DECIMAL
61
Text::CSV_XS::IV(), # SQL_INTEGER
62
Text::CSV_XS::IV(), # SQL_SMALLINT
63
Text::CSV_XS::NV(), # SQL_FLOAT
64
Text::CSV_XS::NV(), # SQL_REAL
65
Text::CSV_XS::NV(), # SQL_DOUBLE
68
@DBD::CSV::dr::ISA = qw(DBD::File::dr);
70
$DBD::CSV::dr::imp_data_size = 0;
71
$DBD::CSV::dr::data_sources_attr = undef;
73
sub connect ($$;$$$) {
74
my($drh, $dbname, $user, $auth, $attr) = @_;
76
my $this = $drh->DBD::File::dr::connect($dbname, $user, $auth, $attr);
77
$this->{'csv_tables'} ||= {};
83
package DBD::CSV::db; # ====== DATABASE ======
85
$DBD::CSV::db::imp_data_size = 0;
87
@DBD::CSV::db::ISA = qw(DBD::File::db);
89
sub csv_cache_sql_parser_object {
93
RaiseError => $dbh->FETCH('RaiseError'),
94
PrintError => $dbh->FETCH('PrintError'),
96
my $sql_flags = $dbh->FETCH('csv_sql') || {};
97
%$parser = (%$parser,%$sql_flags);
98
$parser = SQL::Parser->new($parser->{dialect},$parser);
99
$dbh->{csv_sql_parser_object} = $parser;
105
package DBD::CSV::st; # ====== STATEMENT ======
107
$DBD::CSV::st::imp_data_size = 0;
109
@DBD::CSV::st::ISA = qw(DBD::File::st);
112
package DBD::CSV::Statement;
114
@DBD::CSV::Statement::ISA = qw(DBD::File::Statement);
116
sub open_table ($$$$$) {
117
my($self, $data, $table, $createMode, $lockMode) = @_;
118
my $dbh = $data->{Database};
119
my $tables = $dbh->{csv_tables};
120
if (!exists($tables->{$table})) {
121
$tables->{$table} = {};
123
my $meta = $tables->{$table} || {};
124
my $csv = $meta->{csv} || $dbh->{csv_csv};
126
my $class = $meta->{class} || $dbh->{'csv_class'} ||
128
my %opts = ( 'binary' => 1 );
129
$opts{'eol'} = $meta->{'eol'} || $dbh->{'csv_eol'} || "\015\012";
131
exists($meta->{'sep_char'}) ? $meta->{'sep_char'} :
132
exists($dbh->{'csv_sep_char'}) ? $dbh->{'csv_sep_char'} : ",";
133
$opts{'quote_char'} =
134
exists($meta->{'quote_char'}) ? $meta->{'quote_char'} :
135
exists($dbh->{'csv_quote_char'}) ? $dbh->{'csv_quote_char'} :
137
$opts{'escape_char'} =
138
exists($meta->{'escape_char'}) ? $meta->{'escape_char'} :
139
exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} :
141
$csv = $meta->{csv} = $class->new(\%opts);
143
my $file = $meta->{file} || $table;
144
my $tbl = $self->SUPER::open_table($data, $file, $createMode, $lockMode);
146
$tbl->{'csv_csv'} = $csv;
147
my $types = $meta->{types};
149
# The 'types' array contains DBI types, but we need types
150
# suitable for Text::CSV_XS.
152
foreach (@{$types}) {
154
$_ = $DBD::CSV::CSV_TYPES[$_+6] || Text::CSV_XS::PV();
156
$_ = Text::CSV_XS::PV();
163
my($array, $skipRows);
164
if (exists($meta->{skip_rows})) {
165
$skipRows = $meta->{skip_rows};
167
$skipRows = exists($meta->{col_names}) ? 0 : 1;
170
while ($skipRows--) {
171
die "Missing first row"
172
if !($array = $tbl->fetch_row($data));
174
$tbl->{col_names} = $array;
176
$tbl->{first_row_pos} = $tbl->{fh}->tell();
177
if (exists($meta->{col_names})) {
178
$array = $tbl->{col_names} = $meta->{col_names};
180
if (!$tbl->{col_names} || !@{$tbl->{col_names}}) {
181
# No column names given; fetch first row and create default
183
my $a = $tbl->{cached_row} = $tbl->fetch_row($data);
184
$array = $tbl->{'col_names'};
185
for (my $i = 0; $i < @$a; $i++) {
186
push(@$array, "col$i");
190
my $columns = $tbl->{col_nums};
191
foreach $col (@$array) {
192
$columns->{$col} = $i++;
200
package DBD::CSV::Table;
202
@DBD::CSV::Table::ISA = qw(DBD::File::Table);
205
my($self, $data) = @_;
207
if (exists($self->{cached_row})) {
208
$fields = delete($self->{cached_row});
211
my $csv = $self->{csv_csv};
212
local $/ = $csv->{'eol'};
213
$fields = $csv->getline($self->{'fh'});
215
die "Error while reading file " . $self->{'file'} . ": $!" if $!;
219
$self->{row} = (@$fields ? $fields : undef);
223
my($self, $data, $fields) = @_;
224
my($csv) = $self->{csv_csv};
225
my($fh) = $self->{'fh'};
227
# Remove undef from the right end of the fields, so that at least
228
# in these cases undef is returned from FetchRow
230
while (@$fields && !defined($fields->[$#$fields])) {
233
if (!$csv->print($fh, $fields)) {
234
die "Error while writing file " . $self->{'file'} . ": $!";
238
*push_names = \&push_row;
248
DBD::CSV - DBI driver for CSV files
253
$dbh = DBI->connect("DBI:CSV:f_dir=/home/joe/csvdb")
254
or die "Cannot connect: " . $DBI::errstr;
255
$sth = $dbh->prepare("CREATE TABLE a (id INTEGER, name CHAR(10))")
256
or die "Cannot prepare: " . $dbh->errstr();
257
$sth->execute() or die "Cannot execute: " . $sth->errstr();
262
# Read a CSV file with ";" as the separator, as exported by
263
# MS Excel. Note we need to escape the ";", otherwise it
264
# would be treated as an attribute separator.
265
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});
266
$sth = $dbh->prepare("SELECT * FROM info");
268
# Same example, this time reading "info.csv" as a table:
269
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});
270
$dbh->{'csv_tables'}->{'info'} = { 'file' => 'info.csv'};
271
$sth = $dbh->prepare("SELECT * FROM info");
276
THIS IS ALPHA SOFTWARE. It is *only* 'Alpha' because the interface (API)
277
is not finalized. The Alpha status does not reflect code quality or
283
The DBD::CSV module is yet another driver for the DBI (Database independent
284
interface for Perl). This one is based on the SQL "engine" SQL::Statement
285
and the abstract DBI driver DBD::File and implements access to
286
so-called CSV files (Comma separated values). Such files are mostly used for
287
exporting MS Access and MS Excel data.
289
See L<DBI(3)> for details on DBI, L<SQL::Statement(3)> for details on
290
SQL::Statement and L<DBD::File(3)> for details on the base class
296
The only system dependent feature that DBD::File uses, is the C<flock()>
297
function. Thus the module should run (in theory) on any system with
298
a working C<flock()>, in particular on all Unix machines and on Windows
299
NT. Under Windows 95 and MacOS the use of C<flock()> is disabled, thus
300
the module should still be usable,
302
Unlike other DBI drivers, you don't need an external SQL engine
303
or a running server. All you need are the following Perl modules,
304
available from any CPAN mirror, for example
306
ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module
312
the DBI (Database independent interface for Perl), version 1.00 or
321
this module is used for writing rows to or reading rows from CSV files.
328
Installing this module (and the prerequisites from above) is quite simple.
329
You just fetch the archive, extract it with
331
gzip -cd DBD-CSV-0.1000.tar.gz | tar xf -
333
(this is for Unix users, Windows users would prefer WinZip or something
334
similar) and then enter the following:
341
If any tests fail, let me know. Otherwise go on with
345
Note that you almost definitely need root or administrator permissions.
346
If you don't have them, read the ExtUtils::MakeMaker man page for details
347
on installing in your own directories. L<ExtUtils::MakeMaker>.
351
The level of SQL support available depends on the version of
352
SQL::Statement installed. Any version will support *basic*
353
CREATE, INSERT, DELETE, UPDATE, and SELECT statements. Only
354
versions of SQL::Statement 1.0 and above support additional
355
features such as table joins, string functions, etc. See the
356
documentation of the latest version of SQL::Statement for details.
358
=head2 Creating a database handle
360
Creating a database handle usually implies connecting to a database server.
361
Thus this command reads
364
my $dbh = DBI->connect("DBI:CSV:f_dir=$dir");
366
The directory tells the driver where it should create or open tables
367
(a.k.a. files). It defaults to the current directory, thus the following
370
$dbh = DBI->connect("DBI:CSV:");
371
$dbh = DBI->connect("DBI:CSV:f_dir=.");
373
(I was told, that VMS requires
375
$dbh = DBI->connect("DBI:CSV:f_dir=");
377
for whatever reasons.)
379
You may set other attributes in the DSN string, separated by semicolons.
382
=head2 Creating and dropping tables
384
You can create and drop tables with commands like the following:
386
$dbh->do("CREATE TABLE $table (id INTEGER, name CHAR(64))");
387
$dbh->do("DROP TABLE $table");
389
Note that currently only the column names will be stored and no other data.
390
Thus all other information including column type (INTEGER or CHAR(x), for
391
example), column attributes (NOT NULL, PRIMARY KEY, ...) will silently be
392
discarded. This may change in a later release.
394
A drop just removes the file without any warning.
396
See L<DBI(3)> for more details.
398
Table names cannot be arbitrary, due to restrictions of the SQL syntax.
399
I recommend that table names are valid SQL identifiers: The first
400
character is alphabetic, followed by an arbitrary number of alphanumeric
401
characters. If you want to use other files, the file names must start
402
with '/', './' or '../' and they must not contain white space.
405
=head2 Inserting, fetching and modifying data
407
The following examples insert some data in a table and fetch it back:
408
First all data in the string:
410
$dbh->do("INSERT INTO $table VALUES (1, "
411
. $dbh->quote("foobar") . ")");
413
Note the use of the quote method for escaping the word 'foobar'. Any
414
string must be escaped, even if it doesn't contain binary data.
416
Next an example using parameters:
418
$dbh->do("INSERT INTO $table VALUES (?, ?)", undef,
419
2, "It's a string!");
421
Note that you don't need to use the quote method here, this is done
422
automatically for you. This version is particularly well designed for
423
loops. Whenever performance is an issue, I recommend using this method.
425
You might wonder about the C<undef>. Don't wonder, just take it as it
426
is. :-) It's an attribute argument that I have never ever used and
427
will be parsed to the prepare method as a second argument.
430
To retrieve data, you can use the following:
432
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";
433
my($sth) = $dbh->prepare($query);
435
while (my $row = $sth->fetchrow_hashref) {
436
print("Found result row: id = ", $row->{'id'},
437
", name = ", $row->{'name'});
441
Again, column binding works: The same example again.
443
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";
444
my($sth) = $dbh->prepare($query);
447
$sth->bind_columns(undef, \$id, \$name);
448
while ($sth->fetch) {
449
print("Found result row: id = $id, name = $name\n");
453
Of course you can even use input parameters. Here's the same example
456
my($query) = "SELECT * FROM $table WHERE id = ?";
457
my($sth) = $dbh->prepare($query);
458
$sth->bind_columns(undef, \$id, \$name);
459
for (my($i) = 1; $i <= 2; $i++) {
462
print("Found result row: id = $id, name = $name\n");
467
See L<DBI(3)> for details on these methods. See L<SQL::Statement(3)> for
468
details on the WHERE clause.
470
Data rows are modified with the UPDATE statement:
472
$dbh->do("UPDATE $table SET id = 3 WHERE id = 1");
474
Likewise you use the DELETE statement for removing rows:
476
$dbh->do("DELETE FROM $table WHERE id > 1");
479
=head2 Error handling
481
In the above examples we have never cared about return codes. Of course,
482
this cannot be recommended. Instead we should have written (for example):
484
my($query) = "SELECT * FROM $table WHERE id = ?";
485
my($sth) = $dbh->prepare($query)
486
or die "prepare: " . $dbh->errstr();
487
$sth->bind_columns(undef, \$id, \$name)
488
or die "bind_columns: " . $dbh->errstr();
489
for (my($i) = 1; $i <= 2; $i++) {
491
or die "execute: " . $dbh->errstr();
493
print("Found result row: id = $id, name = $name\n");
497
or die "finish: " . $dbh->errstr();
499
Obviously this is tedious. Fortunately we have DBI's I<RaiseError>
502
$dbh->{'RaiseError'} = 1;
505
my($query) = "SELECT * FROM $table WHERE id = ?";
506
my($sth) = $dbh->prepare($query);
507
$sth->bind_columns(undef, \$id, \$name);
508
for (my($i) = 1; $i <= 2; $i++) {
511
print("Found result row: id = $id, name = $name\n");
516
if ($@) { die "SQL database error: $@"; }
518
This is not only shorter, it even works when using DBI methods within
524
The following attributes are handled by DBI itself and not by DBD::File,
525
thus they all work as expected:
530
CompatMode (Not used)
537
The following DBI attributes are handled by DBD::File:
551
Valid after C<$sth-E<gt>execute>
555
Valid after C<$sth-E<gt>prepare>
559
Valid after C<$sth-E<gt>execute>; undef for Non-Select statements.
563
Not really working. Always returns an array ref of one's, as DBD::CSV
564
doesn't verify input data. Valid after C<$sth-E<gt>execute>; undef for
565
non-Select statements.
569
These attributes and methods are not supported:
576
In addition to the DBI attributes, you can use the following dbh
583
This attribute is used for setting the directory where CSV files are
584
opened. Usually you set it in the dbh, it defaults to the current
585
directory ("."). However, it is overwritable in the statement handles.
593
=item csv_escape_char
599
The attributes I<csv_eol>, I<csv_sep_char>, I<csv_quote_char> and
600
I<csv_escape_char> are corresponding to the respective attributes of the
601
Text::CSV_XS object. You want to set these attributes if you have unusual
602
CSV files like F</etc/passwd> or MS Excel generated CSV files with a semicolon
603
as separator. Defaults are "\015\012", ';', '"' and '"', respectively.
605
The attributes are used to create an instance of the class I<csv_class>,
606
by default Text::CSV_XS. Alternatively you may pass an instance as
607
I<csv_csv>, the latter takes precedence. Note that the I<binary>
608
attribute I<must> be set to a true value in that case.
610
Additionally you may overwrite these attributes on a per-table base in
611
the I<csv_tables> attribute.
615
This hash ref is used for storing table dependent metadata. For any
616
table it contains an element with the table name as key and another
617
hash ref with the following attributes:
623
The tables file name; defaults to
625
"$dbh->{f_dir}/$table"
639
These correspond to the attributes I<csv_eol>, I<csv_sep_char>,
640
I<csv_quote_char>, I<csv_escape_char>, I<csv_class> and I<csv_csv>.
641
The difference is that they work on a per-table base.
647
By default DBD::CSV assumes that column names are stored in the first
648
row of the CSV file. If this is not the case, you can supply an array
649
ref of table names with the I<col_names> attribute.
651
If you supply an empty array ref, the driver will read the first row
652
for you, count the number of columns and create column names like
653
C<col0>, C<col1>, ...
655
In addition, if there are several rows of junk prior to the data you
656
want, you can set skip_rows to the number of rows to ignore. By
657
default, DBD::CSV assumes that column names are stored in the last row
658
skipped, but you can override this with the col_names attribute.
664
Example: Suggest you want to use F</etc/passwd> as a CSV file. :-)
665
There simplest way is:
668
my $dbh = DBI->connect("DBI:CSV:f_dir=/etc;csv_eol=\n;"
669
. "csv_sep_char=:;csv_quote_char=;"
670
. "csv_escape_char=");
671
$dbh->{'csv_tables'}->{'passwd'} = {
672
'col_names' => ["login", "password", "uid", "gid", "realname",
673
"directory", "shell"]
675
$sth = $dbh->prepare("SELECT * FROM passwd");
677
Another possibility where you leave all the defaults as they are and
678
overwrite them on a per table base:
681
my $dbh = DBI->connect("DBI:CSV:");
682
$dbh->{'csv_tables'}->{'passwd'} = {
685
'quote_char' => undef,
686
'escape_char' => undef,
687
'file' => '/etc/passwd',
688
'col_names' => ["login", "password", "uid", "gid", "realname",
689
"directory", "shell"]
691
$sth = $dbh->prepare("SELECT * FROM passwd");
694
=head2 Driver private methods
696
These methods are inherited from DBD::File:
702
The C<data_sources> method returns a list of subdirectories of the current
703
directory in the form "DBI:CSV:directory=$dirname".
705
If you want to read the subdirectories of another directory, use
707
my($drh) = DBI->install_driver("CSV");
708
my(@list) = $drh->data_sources('f_dir' => '/usr/local/csv_data' );
712
This method returns a list of file names inside $dbh->{'directory'}.
715
my($dbh) = DBI->connect("DBI:CSV:directory=/usr/local/csv_data");
716
my(@list) = $dbh->func('list_tables');
718
Note that the list includes all files contained in the directory, even
719
those that have non-valid table names, from the view of SQL. See
720
L<Creating and dropping tables> above.
725
=head2 Data restrictions
727
When inserting and fetching data, you will sometimes be surprised: DBD::CSV
728
doesn't correctly handle data types, in particular NULLs. If you insert
729
integers, it might happen, that fetch returns a string. Of course, a string
730
containing the integer, so that's perhaps not a real problem. But the
731
following will never work:
733
$dbh->do("INSERT INTO $table (id, name) VALUES (?, ?)",
735
$sth = $dbh->prepare("SELECT * FROM $table WHERE id IS NULL");
738
$sth->bind_columns(undef, \$id, \$name);
739
while ($sth->fetch) {
740
printf("Found result row: id = %s, name = %s\n",
741
defined($id) ? $id : "NULL",
742
defined($name) ? $name : "NULL");
746
The row we have just inserted, will never be returned! The reason is
747
obvious, if you examine the CSV file: The corresponding row looks
752
In other words, not a NULL is stored, but an empty string. CSV files
753
don't have a concept of NULL values. Surprisingly the above example
754
works, if you insert a NULL value for the name! Again, you find
755
the explanation by examining the CSV file:
759
In other words, DBD::CSV has "emulated" a NULL value by writing a row
760
with less columns. Of course this works only if the rightmost column
761
is NULL, the two rightmost columns are NULL, ..., but the leftmost
762
column will never be NULL!
764
See L<Creating and dropping tables> above for table name restrictions.
769
Extensions of DBD::CSV:
773
=item CSV file scanner
775
Write a simple CSV file scanner that reads a CSV file and attempts
776
to guess sep_char, quote_char, escape_char and eol automatically.
780
These are merely restrictions of the DBD::File or SQL::Statement
785
=item Table name mapping
787
Currently it is not possible to use files with names like C<names.csv>.
788
Instead you have to use soft links or rename files. As an alternative
789
one might use, for example a dbh attribute 'table_map'. It might be a
790
hash ref, the keys being the table names and the values being the file
793
=item Column name mapping
795
Currently the module assumes that column names are stored in the first
796
row. While this is fine in most cases, there should be a possibility
797
of setting column names and column number from the programmer: For
798
example MS Access doesn't export column names by default.
809
The module is using flock() internally. However, this function is not
810
available on platforms. Using flock() is disabled on MacOS and Windows
811
95: There's no locking at all (perhaps not so important on these
812
operating systems, as they are for single users anyways).
817
=head1 AUTHOR AND COPYRIGHT
819
This module is currently maintained by
822
<jeff@vpservices.com>
824
The original author is Jochen Wiedmann.
826
Copyright (C) 1998 by Jochen Wiedmann
830
You may distribute this module under the terms of either the GNU
831
General Public License or the Artistic License, as specified in
832
the Perl README file.
836
L<DBI(3)>, L<Text::CSV_XS(3)>, L<SQL::Statement(3)>
838
For help on the use of DBD::CSV, see the DBI users mailing list:
840
http://www.isc.org/dbi-lists.html
842
For general information on DBI see
844
http://www.symbolstone.org/technology/perl/DBI
3
# DBD::CSV - A DBI driver for CSV and similar structured files
5
# This module is currently maintained by
8
# <jeff@vpservices.com>
10
# The original author is Jochen Wiedmann.
12
# Copyright (C) 1998 by Jochen Wiedmann
14
# All rights reserved.
16
# You may distribute this module under the terms of either the GNU
17
# General Public License or the Artistic License, as specified in
18
# the Perl README file.
32
use vars qw(@ISA $VERSION $drh $err $errstr $sqlstate);
36
$VERSION = '0.21'; # jumped from 0.2002 to shorten version number
38
$err = 0; # holds error code for DBI::err
39
$errstr = ""; # holds error string for DBI::errstr
40
$sqlstate = ""; # holds error state for DBI::state
41
$drh = undef; # holds driver handle once initialised
43
package DBD::CSV::dr; # ====== DRIVER ======
46
use vars qw(@ISA @CSV_TYPES);
49
Text::CSV_XS::IV(), # SQL_TINYINT
50
Text::CSV_XS::IV(), # SQL_BIGINT
51
Text::CSV_XS::PV(), # SQL_LONGVARBINARY
52
Text::CSV_XS::PV(), # SQL_VARBINARY
53
Text::CSV_XS::PV(), # SQL_BINARY
54
Text::CSV_XS::PV(), # SQL_LONGVARCHAR
55
Text::CSV_XS::PV(), # SQL_ALL_TYPES
56
Text::CSV_XS::PV(), # SQL_CHAR
57
Text::CSV_XS::NV(), # SQL_NUMERIC
58
Text::CSV_XS::NV(), # SQL_DECIMAL
59
Text::CSV_XS::IV(), # SQL_INTEGER
60
Text::CSV_XS::IV(), # SQL_SMALLINT
61
Text::CSV_XS::NV(), # SQL_FLOAT
62
Text::CSV_XS::NV(), # SQL_REAL
63
Text::CSV_XS::NV(), # SQL_DOUBLE
66
@DBD::CSV::dr::ISA = qw(DBD::File::dr);
68
$DBD::CSV::dr::imp_data_size = 0;
69
$DBD::CSV::dr::data_sources_attr = undef;
71
sub connect ($$;$$$) {
72
my($drh, $dbname, $user, $auth, $attr) = @_;
73
my $dbh = $drh->DBD::File::dr::connect($dbname, $user, $auth, $attr);
74
$dbh->{'csv_tables'} ||= {};
79
package DBD::CSV::db; # ====== DATABASE ======
81
$DBD::CSV::db::imp_data_size = 0;
83
@DBD::CSV::db::ISA = qw(DBD::File::db);
85
sub csv_cache_sql_parser_object {
89
RaiseError => $dbh->FETCH('RaiseError'),
90
PrintError => $dbh->FETCH('PrintError'),
92
my $sql_flags = $dbh->FETCH('csv_sql') || {};
93
%$parser = (%$parser,%$sql_flags);
94
$parser = SQL::Parser->new($parser->{dialect},$parser);
95
$dbh->{csv_sql_parser_object} = $parser;
101
$dbh->STORE('Active',0);
106
$dbh->STORE('Active',0);
110
package DBD::CSV::st; # ====== STATEMENT ======
112
$DBD::CSV::st::imp_data_size = 0;
114
@DBD::CSV::st::ISA = qw(DBD::File::st);
117
package DBD::CSV::Statement;
119
@DBD::CSV::Statement::ISA = qw(DBD::File::Statement);
121
sub open_table ($$$$$) {
122
my($self, $data, $table, $createMode, $lockMode) = @_;
123
my $dbh = $data->{Database};
124
my $tables = $dbh->{csv_tables};
125
if (!exists($tables->{$table})) {
126
$tables->{$table} = {};
128
my $meta = $tables->{$table} || {};
129
my $csv = $meta->{csv} || $dbh->{csv_csv};
131
my $class = $meta->{class} || $dbh->{'csv_class'} ||
133
my %opts = ( 'binary' => 1 );
134
$opts{'eol'} = $meta->{'eol'} || $dbh->{'csv_eol'} || "\015\012";
136
exists($meta->{'sep_char'}) ? $meta->{'sep_char'} :
137
exists($dbh->{'csv_sep_char'}) ? $dbh->{'csv_sep_char'} : ",";
138
$opts{'quote_char'} =
139
exists($meta->{'quote_char'}) ? $meta->{'quote_char'} :
140
exists($dbh->{'csv_quote_char'}) ? $dbh->{'csv_quote_char'} :
142
$opts{'escape_char'} =
143
exists($meta->{'escape_char'}) ? $meta->{'escape_char'} :
144
exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} :
146
$csv = $meta->{csv} = $class->new(\%opts);
148
my $file = $meta->{file} || $table;
149
my $tbl = $self->SUPER::open_table($data, $file, $createMode, $lockMode);
151
$tbl->{'csv_csv'} = $csv;
152
my $types = $meta->{types};
154
# The 'types' array contains DBI types, but we need types
155
# suitable for Text::CSV_XS.
157
foreach (@{$types}) {
159
$_ = $DBD::CSV::CSV_TYPES[$_+6] || Text::CSV_XS::PV();
161
$_ = Text::CSV_XS::PV();
167
if (!$createMode and !$self->{ignore_missing_table}) {
168
my($array, $skipRows);
169
if (exists($meta->{skip_rows})) {
170
$skipRows = $meta->{skip_rows};
172
$skipRows = exists($meta->{col_names}) ? 0 : 1;
175
if (!($array = $tbl->fetch_row($data))) {
176
die "Missing first row";
178
$tbl->{col_names} = $array;
179
while ($skipRows--) {
180
$tbl->fetch_row($data);
183
$tbl->{first_row_pos} = $tbl->{fh}->tell();
184
if (exists($meta->{col_names})) {
185
$array = $tbl->{col_names} = $meta->{col_names};
186
} elsif (!$tbl->{col_names} || !@{$tbl->{col_names}}) {
187
# No column names given; fetch first row and create default
189
my $a = $tbl->{cached_row} = $tbl->fetch_row($data);
190
$array = $tbl->{'col_names'};
191
for (my $i = 0; $i < @$a; $i++) {
192
push(@$array, "col$i");
196
my $columns = $tbl->{col_nums};
197
foreach $col (@$array) {
198
$columns->{$col} = $i++;
206
package DBD::CSV::Table;
208
@DBD::CSV::Table::ISA = qw(DBD::File::Table);
211
my($self, $data) = @_;
213
if (exists($self->{cached_row})) {
214
$fields = delete($self->{cached_row});
217
my $csv = $self->{csv_csv};
218
local $/ = $csv->{'eol'};
219
$fields = $csv->getline($self->{'fh'});
221
die "Error while reading file " . $self->{'file'} . ": $!" if $!;
225
$self->{row} = (@$fields ? $fields : undef);
229
my($self, $data, $fields) = @_;
230
my($csv) = $self->{csv_csv};
231
my($fh) = $self->{'fh'};
233
# Remove undef from the right end of the fields, so that at least
234
# in these cases undef is returned from FetchRow
236
while (@$fields && !defined($fields->[$#$fields])) {
239
if (!$csv->print($fh, $fields)) {
240
die "Error while writing file " . $self->{'file'} . ": $!";
244
*push_names = \&push_row;
254
DBD::CSV - DBI driver for CSV files
259
$dbh = DBI->connect("DBI:CSV:f_dir=/home/joe/csvdb")
260
or die "Cannot connect: " . $DBI::errstr;
261
$sth = $dbh->prepare("CREATE TABLE a (id INTEGER, name CHAR(10))")
262
or die "Cannot prepare: " . $dbh->errstr();
263
$sth->execute() or die "Cannot execute: " . $sth->errstr();
268
# Read a CSV file with ";" as the separator, as exported by
269
# MS Excel. Note we need to escape the ";", otherwise it
270
# would be treated as an attribute separator.
271
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});
272
$sth = $dbh->prepare("SELECT * FROM info");
274
# Same example, this time reading "info.csv" as a table:
275
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});
276
$dbh->{'csv_tables'}->{'info'} = { 'file' => 'info.csv'};
277
$sth = $dbh->prepare("SELECT * FROM info");
282
THIS IS ALPHA SOFTWARE. It is *only* 'Alpha' because the interface (API)
283
is not finalized. The Alpha status does not reflect code quality or
289
The DBD::CSV module is yet another driver for the DBI (Database independent
290
interface for Perl). This one is based on the SQL "engine" SQL::Statement
291
and the abstract DBI driver DBD::File and implements access to
292
so-called CSV files (Comma separated values). Such files are mostly used for
293
exporting MS Access and MS Excel data.
295
See L<DBI(3)> for details on DBI, L<SQL::Statement(3)> for details on
296
SQL::Statement and L<DBD::File(3)> for details on the base class
302
The only system dependent feature that DBD::File uses, is the C<flock()>
303
function. Thus the module should run (in theory) on any system with
304
a working C<flock()>, in particular on all Unix machines and on Windows
305
NT. Under Windows 95 and MacOS the use of C<flock()> is disabled, thus
306
the module should still be usable,
308
Unlike other DBI drivers, you don't need an external SQL engine
309
or a running server. All you need are the following Perl modules,
310
available from any CPAN mirror, for example
312
ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module
318
the DBI (Database independent interface for Perl), version 1.00 or
327
this module is used for writing rows to or reading rows from CSV files.
334
Installing this module (and the prerequisites from above) is quite simple.
335
You just fetch the archive, extract it with
337
gzip -cd DBD-CSV-0.1000.tar.gz | tar xf -
339
(this is for Unix users, Windows users would prefer WinZip or something
340
similar) and then enter the following:
347
If any tests fail, let me know. Otherwise go on with
351
Note that you almost definitely need root or administrator permissions.
352
If you don't have them, read the ExtUtils::MakeMaker man page for details
353
on installing in your own directories. L<ExtUtils::MakeMaker>.
357
The level of SQL support available depends on the version of
358
SQL::Statement installed. Any version will support *basic*
359
CREATE, INSERT, DELETE, UPDATE, and SELECT statements. Only
360
versions of SQL::Statement 1.0 and above support additional
361
features such as table joins, string functions, etc. See the
362
documentation of the latest version of SQL::Statement for details.
364
=head2 Creating a database handle
366
Creating a database handle usually implies connecting to a database server.
367
Thus this command reads
370
my $dbh = DBI->connect("DBI:CSV:f_dir=$dir");
372
The directory tells the driver where it should create or open tables
373
(a.k.a. files). It defaults to the current directory, thus the following
376
$dbh = DBI->connect("DBI:CSV:");
377
$dbh = DBI->connect("DBI:CSV:f_dir=.");
379
(I was told, that VMS requires
381
$dbh = DBI->connect("DBI:CSV:f_dir=");
383
for whatever reasons.)
385
You may set other attributes in the DSN string, separated by semicolons.
388
=head2 Creating and dropping tables
390
You can create and drop tables with commands like the following:
392
$dbh->do("CREATE TABLE $table (id INTEGER, name CHAR(64))");
393
$dbh->do("DROP TABLE $table");
395
Note that currently only the column names will be stored and no other data.
396
Thus all other information including column type (INTEGER or CHAR(x), for
397
example), column attributes (NOT NULL, PRIMARY KEY, ...) will silently be
398
discarded. This may change in a later release.
400
A drop just removes the file without any warning.
402
See L<DBI(3)> for more details.
404
Table names cannot be arbitrary, due to restrictions of the SQL syntax.
405
I recommend that table names are valid SQL identifiers: The first
406
character is alphabetic, followed by an arbitrary number of alphanumeric
407
characters. If you want to use other files, the file names must start
408
with '/', './' or '../' and they must not contain white space.
411
=head2 Inserting, fetching and modifying data
413
The following examples insert some data in a table and fetch it back:
414
First all data in the string:
416
$dbh->do("INSERT INTO $table VALUES (1, "
417
. $dbh->quote("foobar") . ")");
419
Note the use of the quote method for escaping the word 'foobar'. Any
420
string must be escaped, even if it doesn't contain binary data.
422
Next an example using parameters:
424
$dbh->do("INSERT INTO $table VALUES (?, ?)", undef,
425
2, "It's a string!");
427
Note that you don't need to use the quote method here, this is done
428
automatically for you. This version is particularly well designed for
429
loops. Whenever performance is an issue, I recommend using this method.
431
You might wonder about the C<undef>. Don't wonder, just take it as it
432
is. :-) It's an attribute argument that I have never ever used and
433
will be parsed to the prepare method as a second argument.
436
To retrieve data, you can use the following:
438
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";
439
my($sth) = $dbh->prepare($query);
441
while (my $row = $sth->fetchrow_hashref) {
442
print("Found result row: id = ", $row->{'id'},
443
", name = ", $row->{'name'});
447
Again, column binding works: The same example again.
449
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";
450
my($sth) = $dbh->prepare($query);
453
$sth->bind_columns(undef, \$id, \$name);
454
while ($sth->fetch) {
455
print("Found result row: id = $id, name = $name\n");
459
Of course you can even use input parameters. Here's the same example
462
my($query) = "SELECT * FROM $table WHERE id = ?";
463
my($sth) = $dbh->prepare($query);
464
$sth->bind_columns(undef, \$id, \$name);
465
for (my($i) = 1; $i <= 2; $i++) {
468
print("Found result row: id = $id, name = $name\n");
473
See L<DBI(3)> for details on these methods. See L<SQL::Statement(3)> for
474
details on the WHERE clause.
476
Data rows are modified with the UPDATE statement:
478
$dbh->do("UPDATE $table SET id = 3 WHERE id = 1");
480
Likewise you use the DELETE statement for removing rows:
482
$dbh->do("DELETE FROM $table WHERE id > 1");
485
=head2 Error handling
487
In the above examples we have never cared about return codes. Of course,
488
this cannot be recommended. Instead we should have written (for example):
490
my($query) = "SELECT * FROM $table WHERE id = ?";
491
my($sth) = $dbh->prepare($query)
492
or die "prepare: " . $dbh->errstr();
493
$sth->bind_columns(undef, \$id, \$name)
494
or die "bind_columns: " . $dbh->errstr();
495
for (my($i) = 1; $i <= 2; $i++) {
497
or die "execute: " . $dbh->errstr();
499
print("Found result row: id = $id, name = $name\n");
503
or die "finish: " . $dbh->errstr();
505
Obviously this is tedious. Fortunately we have DBI's I<RaiseError>
508
$dbh->{'RaiseError'} = 1;
511
my($query) = "SELECT * FROM $table WHERE id = ?";
512
my($sth) = $dbh->prepare($query);
513
$sth->bind_columns(undef, \$id, \$name);
514
for (my($i) = 1; $i <= 2; $i++) {
517
print("Found result row: id = $id, name = $name\n");
522
if ($@) { die "SQL database error: $@"; }
524
This is not only shorter, it even works when using DBI methods within
530
The following attributes are handled by DBI itself and not by DBD::File,
531
thus they all work as expected:
536
CompatMode (Not used)
543
The following DBI attributes are handled by DBD::File:
557
Valid after C<$sth-E<gt>execute>
561
Valid after C<$sth-E<gt>prepare>
565
Valid after C<$sth-E<gt>execute>; undef for Non-Select statements.
569
Not really working. Always returns an array ref of one's, as DBD::CSV
570
doesn't verify input data. Valid after C<$sth-E<gt>execute>; undef for
571
non-Select statements.
575
These attributes and methods are not supported:
582
In addition to the DBI attributes, you can use the following dbh
589
This attribute is used for setting the directory where CSV files are
590
opened. Usually you set it in the dbh, it defaults to the current
591
directory ("."). However, it is overwritable in the statement handles.
599
=item csv_escape_char
605
The attributes I<csv_eol>, I<csv_sep_char>, I<csv_quote_char> and
606
I<csv_escape_char> are corresponding to the respective attributes of the
607
Text::CSV_XS object. You want to set these attributes if you have unusual
608
CSV files like F</etc/passwd> or MS Excel generated CSV files with a semicolon
609
as separator. Defaults are "\015\012", ';', '"' and '"', respectively.
611
The attributes are used to create an instance of the class I<csv_class>,
612
by default Text::CSV_XS. Alternatively you may pass an instance as
613
I<csv_csv>, the latter takes precedence. Note that the I<binary>
614
attribute I<must> be set to a true value in that case.
616
Additionally you may overwrite these attributes on a per-table base in
617
the I<csv_tables> attribute.
621
This hash ref is used for storing table dependent metadata. For any
622
table it contains an element with the table name as key and another
623
hash ref with the following attributes:
629
The tables file name; defaults to
631
"$dbh->{f_dir}/$table"
645
These correspond to the attributes I<csv_eol>, I<csv_sep_char>,
646
I<csv_quote_char>, I<csv_escape_char>, I<csv_class> and I<csv_csv>.
647
The difference is that they work on a per-table base.
653
By default DBD::CSV assumes that column names are stored in the first
654
row of the CSV file. If this is not the case, you can supply an array
655
ref of table names with the I<col_names> attribute. In that case the
656
attribute I<skip_first_row> will be set to FALSE.
658
If you supply an empty array ref, the driver will read the first row
659
for you, count the number of columns and create column names like
660
C<col0>, C<col1>, ...
666
Example: Suggest you want to use F</etc/passwd> as a CSV file. :-)
667
There simplest way is:
670
my $dbh = DBI->connect("DBI:CSV:f_dir=/etc;csv_eol=\n;"
671
. "csv_sep_char=:;csv_quote_char=;"
672
. "csv_escape_char=");
673
$dbh->{'csv_tables'}->{'passwd'} = {
674
'col_names' => ["login", "password", "uid", "gid", "realname",
675
"directory", "shell"]
677
$sth = $dbh->prepare("SELECT * FROM passwd");
679
Another possibility where you leave all the defaults as they are and
680
overwrite them on a per table base:
683
my $dbh = DBI->connect("DBI:CSV:");
684
$dbh->{'csv_tables'}->{'passwd'} = {
687
'quote_char' => undef,
688
'escape_char' => undef,
689
'file' => '/etc/passwd',
690
'col_names' => ["login", "password", "uid", "gid", "realname",
691
"directory", "shell"]
693
$sth = $dbh->prepare("SELECT * FROM passwd");
696
=head2 Driver private methods
698
These methods are inherited from DBD::File:
704
The C<data_sources> method returns a list of subdirectories of the current
705
directory in the form "DBI:CSV:directory=$dirname".
707
If you want to read the subdirectories of another directory, use
709
my($drh) = DBI->install_driver("CSV");
710
my(@list) = $drh->data_sources('f_dir' => '/usr/local/csv_data' );
714
This method returns a list of file names inside $dbh->{'directory'}.
717
my($dbh) = DBI->connect("DBI:CSV:directory=/usr/local/csv_data");
718
my(@list) = $dbh->func('list_tables');
720
Note that the list includes all files contained in the directory, even
721
those that have non-valid table names, from the view of SQL. See
722
L<Creating and dropping tables> above.
727
=head2 Data restrictions
729
When inserting and fetching data, you will sometimes be surprised: DBD::CSV
730
doesn't correctly handle data types, in particular NULLs. If you insert
731
integers, it might happen, that fetch returns a string. Of course, a string
732
containing the integer, so that's perhaps not a real problem. But the
733
following will never work:
735
$dbh->do("INSERT INTO $table (id, name) VALUES (?, ?)",
737
$sth = $dbh->prepare("SELECT * FROM $table WHERE id IS NULL");
740
$sth->bind_columns(undef, \$id, \$name);
741
while ($sth->fetch) {
742
printf("Found result row: id = %s, name = %s\n",
743
defined($id) ? $id : "NULL",
744
defined($name) ? $name : "NULL");
748
The row we have just inserted, will never be returned! The reason is
749
obvious, if you examine the CSV file: The corresponding row looks
754
In other words, not a NULL is stored, but an empty string. CSV files
755
don't have a concept of NULL values. Surprisingly the above example
756
works, if you insert a NULL value for the name! Again, you find
757
the explanation by examining the CSV file:
761
In other words, DBD::CSV has "emulated" a NULL value by writing a row
762
with less columns. Of course this works only if the rightmost column
763
is NULL, the two rightmost columns are NULL, ..., but the leftmost
764
column will never be NULL!
766
See L<Creating and dropping tables> above for table name restrictions.
771
Extensions of DBD::CSV:
775
=item CSV file scanner
777
Write a simple CSV file scanner that reads a CSV file and attempts
778
to guess sep_char, quote_char, escape_char and eol automatically.
782
These are merely restrictions of the DBD::File or SQL::Statement
787
=item Table name mapping
789
Currently it is not possible to use files with names like C<names.csv>.
790
Instead you have to use soft links or rename files. As an alternative
791
one might use, for example a dbh attribute 'table_map'. It might be a
792
hash ref, the keys being the table names and the values being the file
795
=item Column name mapping
797
Currently the module assumes that column names are stored in the first
798
row. While this is fine in most cases, there should be a possibility
799
of setting column names and column number from the programmer: For
800
example MS Access doesn't export column names by default.
811
The module is using flock() internally. However, this function is not
812
available on platforms. Using flock() is disabled on MacOS and Windows
813
95: There's no locking at all (perhaps not so important on these
814
operating systems, as they are for single users anyways).
819
=head1 AUTHOR AND COPYRIGHT
821
This module is currently maintained by
824
<jeff@vpservices.com>
826
The original author is Jochen Wiedmann.
828
Copyright (C) 1998 by Jochen Wiedmann
832
You may distribute this module under the terms of either the GNU
833
General Public License or the Artistic License, as specified in
834
the Perl README file.
838
L<DBI(3)>, L<Text::CSV_XS(3)>, L<SQL::Statement(3)>
840
For help on the use of DBD::CSV, see the DBI users mailing list:
842
http://www.isc.org/dbi-lists.html
844
For general information on DBI see
846
http://www.symbolstone.org/technology/perl/DBI