~ubuntu-branches/ubuntu/maverick/libdbd-csv-perl/maverick

« back to all changes in this revision

Viewing changes to lib/DBD/CSV.pm

Committer: Bazaar Package Importer
Author(s): Gunnar Wolf
Date: 2005-03-26 12:52:04 UTC
mfrom: (2.1.1 hoary)
Revision ID: james.westby@ubuntu.com-20050326125204-sflf8kdexpuv8n2b

Tags: 0.2100-2

Added debian/watch file

files added:
debian/watch

output

output/bench

output/testaa

output/testab

output/testac

output/testad

output/testae

output/testaf

output/testag

output/testah

output/testai

output/testaj

output/testak

output/testal

output/testam

output/testan

output/testao

output/testap

output/testaq

output/testar

output/testas

output/testat

files removed:
lib/DBD/File.pm

test.pl

files modified:
ChangeLog

MANIFEST

Makefile.PL

debian/changelog

debian/control

debian/rules

lib/Bundle/DBD/CSV.pm

lib/DBD/CSV.pm

t/00base.t *

t/10dsnlist.t *

t/20createdrop.t *

t/30insertfetch.t *

t/40bindparam.t *

t/40blobs.t *

t/40listfields.t *

t/40nulls.t *

t/40numrows.t *

t/50chopblanks.t *

t/50commit.t *

t/ak-dbd.t *

t/dbdadmin.t *

t/skeleton.test *

Show diffs side-by-side

added added

removed removed

lib/DBD/CSV.pm

# -*- perl -*-

# DBD::CSV - A DBI driver for CSV and similar structured files

# This module is currently maintained by

# Jeff Zucker

# <jeff@vpservices.com>

# The original author is Jochen Wiedmann.

# You may distribute this module under the terms of either the GNU

# General Public License or the Artistic License, as specified in

# the Perl README file.

require 5.004;

use strict;

require DynaLoader;

require DBD::File;

require IO::File;

package DBD::CSV;

use vars qw(@ISA $VERSION $drh $err $errstr $sqlstate);

@ISA = qw(DBD::File);

$VERSION = '0.2002';

$err = 0; # holds error code for DBI::err

$errstr = ""; # holds error string for DBI::errstr

$sqlstate = ""; # holds error state for DBI::state

$drh = undef; # holds driver handle once initialised

package DBD::CSV::dr; # ====== DRIVER ======

use Text::CSV_XS();

use vars qw(@ISA @CSV_TYPES);

@CSV_TYPES = (

Text::CSV_XS::IV(), # SQL_TINYINT

Text::CSV_XS::IV(), # SQL_BIGINT

Text::CSV_XS::PV(), # SQL_LONGVARBINARY

Text::CSV_XS::PV(), # SQL_VARBINARY

Text::CSV_XS::PV(), # SQL_BINARY

Text::CSV_XS::PV(), # SQL_LONGVARCHAR

Text::CSV_XS::PV(), # SQL_ALL_TYPES

Text::CSV_XS::PV(), # SQL_CHAR

Text::CSV_XS::NV(), # SQL_NUMERIC

Text::CSV_XS::NV(), # SQL_DECIMAL

Text::CSV_XS::IV(), # SQL_INTEGER

Text::CSV_XS::IV(), # SQL_SMALLINT

Text::CSV_XS::NV(), # SQL_FLOAT

Text::CSV_XS::NV(), # SQL_REAL

Text::CSV_XS::NV(), # SQL_DOUBLE

);

@DBD::CSV::dr::ISA = qw(DBD::File::dr);

$DBD::CSV::dr::imp_data_size = 0;

$DBD::CSV::dr::data_sources_attr = undef;

sub connect ($$;$$$) {

my($drh, $dbname, $user, $auth, $attr) = @_;

my $this = $drh->DBD::File::dr::connect($dbname, $user, $auth, $attr);

$this->{'csv_tables'} ||= {};

$this;

}

package DBD::CSV::db; # ====== DATABASE ======

$DBD::CSV::db::imp_data_size = 0;

@DBD::CSV::db::ISA = qw(DBD::File::db);

sub csv_cache_sql_parser_object {

my $dbh = shift;

my $parser = {

dialect => 'CSV',

RaiseError => $dbh->FETCH('RaiseError'),

PrintError => $dbh->FETCH('PrintError'),

};

my $sql_flags = $dbh->FETCH('csv_sql') || {};

%$parser = (%$parser,%$sql_flags);

$parser = SQL::Parser->new($parser->{dialect},$parser);

$dbh->{csv_sql_parser_object} = $parser;

100

return $parser;

101

}

102

103

104

105

package DBD::CSV::st; # ====== STATEMENT ======

106

107

$DBD::CSV::st::imp_data_size = 0;

108

109

@DBD::CSV::st::ISA = qw(DBD::File::st);

110

111

112

package DBD::CSV::Statement;

113

114

@DBD::CSV::Statement::ISA = qw(DBD::File::Statement);

115

116

sub open_table ($$$$$) {

117

my($self, $data, $table, $createMode, $lockMode) = @_;

118

my $dbh = $data->{Database};

119

my $tables = $dbh->{csv_tables};

120

if (!exists($tables->{$table})) {

121

$tables->{$table} = {};

122

}

123

my $meta = $tables->{$table} || {};

124

my $csv = $meta->{csv} || $dbh->{csv_csv};

125

if (!$csv) {

126

my $class = $meta->{class} || $dbh->{'csv_class'} ||

127

'Text::CSV_XS';

128

my %opts = ( 'binary' => 1 );

129

$opts{'eol'} = $meta->{'eol'} || $dbh->{'csv_eol'} || "\015\012";

130

$opts{'sep_char'} =

131

exists($meta->{'sep_char'}) ? $meta->{'sep_char'} :

132

exists($dbh->{'csv_sep_char'}) ? $dbh->{'csv_sep_char'} : ",";

133

$opts{'quote_char'} =

134

exists($meta->{'quote_char'}) ? $meta->{'quote_char'} :

135

exists($dbh->{'csv_quote_char'}) ? $dbh->{'csv_quote_char'} :

136

'"';

137

$opts{'escape_char'} =

138

exists($meta->{'escape_char'}) ? $meta->{'escape_char'} :

139

exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} :

140

'"';

141

$csv = $meta->{csv} = $class->new(\%opts);

142

}

143

my $file = $meta->{file} || $table;

144

my $tbl = $self->SUPER::open_table($data, $file, $createMode, $lockMode);

145

if ($tbl) {

146

$tbl->{'csv_csv'} = $csv;

147

my $types = $meta->{types};

148

if ($types) {

149

# The 'types' array contains DBI types, but we need types

150

# suitable for Text::CSV_XS.

151

my $t = [];

152

foreach (@{$types}) {

153

if ($_) {

154

$_ = $DBD::CSV::CSV_TYPES[$_+6] || Text::CSV_XS::PV();

155

} else {

156

$_ = Text::CSV_XS::PV();

157

}

158

push(@$t, $_);

159

}

160

$tbl->{types} = $t;

161

}

162

if (!$createMode) {

163

my($array, $skipRows);

164

if (exists($meta->{skip_rows})) {

165

$skipRows = $meta->{skip_rows};

166

} else {

167

$skipRows = exists($meta->{col_names}) ? 0 : 1;

168

}

169

if ($skipRows) {

170

while ($skipRows--) {

171

die "Missing first row"

172

if !($array = $tbl->fetch_row($data));

173

}

174

$tbl->{col_names} = $array;

175

}

176

$tbl->{first_row_pos} = $tbl->{fh}->tell();

177

if (exists($meta->{col_names})) {

178

$array = $tbl->{col_names} = $meta->{col_names};

179

}

180

if (!$tbl->{col_names} || !@{$tbl->{col_names}}) {

181

# No column names given; fetch first row and create default

182

# names.

183

my $a = $tbl->{cached_row} = $tbl->fetch_row($data);

184

$array = $tbl->{'col_names'};

185

for (my $i = 0; $i < @$a; $i++) {

186

push(@$array, "col$i");

187

}

188

}

189

my($col, $i);

190

my $columns = $tbl->{col_nums};

191

foreach $col (@$array) {

192

$columns->{$col} = $i++;

193

}

194

}

195

}

196

$tbl;

197

}

198

199

200

package DBD::CSV::Table;

201

202

@DBD::CSV::Table::ISA = qw(DBD::File::Table);

203

204

sub fetch_row ($$) {

205

my($self, $data) = @_;

206

my $fields;

207

if (exists($self->{cached_row})) {

208

$fields = delete($self->{cached_row});

209

} else {

210

$! = 0;

211

my $csv = $self->{csv_csv};

212

local $/ = $csv->{'eol'};

213

$fields = $csv->getline($self->{'fh'});

214

if (!$fields) {

215

die "Error while reading file " . $self->{'file'} . ": $!" if $!;

216

return undef;

217

}

218

}

219

$self->{row} = (@$fields ? $fields : undef);

220

}

221

222

sub push_row ($$$) {

223

my($self, $data, $fields) = @_;

224

my($csv) = $self->{csv_csv};

225

my($fh) = $self->{'fh'};

226

227

# Remove undef from the right end of the fields, so that at least

228

# in these cases undef is returned from FetchRow

229

230

while (@$fields && !defined($fields->[$#$fields])) {

231

pop @$fields;

232

}

233

if (!$csv->print($fh, $fields)) {

234

die "Error while writing file " . $self->{'file'} . ": $!";

235

}

236

237

}

238

*push_names = \&push_row;

239

240

241

242

243

244

__END__

245

246

=head1 NAME

247

248

DBD::CSV - DBI driver for CSV files

249

250

=head1 SYNOPSIS

251

252

use DBI;

253

$dbh = DBI->connect("DBI:CSV:f_dir=/home/joe/csvdb")

254

or die "Cannot connect: " . $DBI::errstr;

255

$sth = $dbh->prepare("CREATE TABLE a (id INTEGER, name CHAR(10))")

256

or die "Cannot prepare: " . $dbh->errstr();

257

$sth->execute() or die "Cannot execute: " . $sth->errstr();

258

$sth->finish();

259

$dbh->disconnect();

260

261

262

# Read a CSV file with ";" as the separator, as exported by

263

# MS Excel. Note we need to escape the ";", otherwise it

264

# would be treated as an attribute separator.

265

$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});

266

$sth = $dbh->prepare("SELECT * FROM info");

267

268

# Same example, this time reading "info.csv" as a table:

269

$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});

270

$dbh->{'csv_tables'}->{'info'} = { 'file' => 'info.csv'};

271

$sth = $dbh->prepare("SELECT * FROM info");

272

273

274

=head1 WARNING

275

276

THIS IS ALPHA SOFTWARE. It is *only* 'Alpha' because the interface (API)

277

is not finalized. The Alpha status does not reflect code quality or

278

stability.

279

280

281

=head1 DESCRIPTION

282

283

The DBD::CSV module is yet another driver for the DBI (Database independent

284

interface for Perl). This one is based on the SQL "engine" SQL::Statement

285

and the abstract DBI driver DBD::File and implements access to

286

so-called CSV files (Comma separated values). Such files are mostly used for

287

exporting MS Access and MS Excel data.

288

289

See L<DBI(3)> for details on DBI, L<SQL::Statement(3)> for details on

290

SQL::Statement and L<DBD::File(3)> for details on the base class

291

DBD::File.

292

293

294

=head2 Prerequisites

295

296

The only system dependent feature that DBD::File uses, is the C<flock()>

297

function. Thus the module should run (in theory) on any system with

298

a working C<flock()>, in particular on all Unix machines and on Windows

299

NT. Under Windows 95 and MacOS the use of C<flock()> is disabled, thus

300

the module should still be usable,

301

302

Unlike other DBI drivers, you don't need an external SQL engine

303

or a running server. All you need are the following Perl modules,

304

available from any CPAN mirror, for example

305

306

ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module

307

308

=over 4

309

310

=item DBI

311

312

the DBI (Database independent interface for Perl), version 1.00 or

313

a later release

314

315

=item SQL::Statement

316

317

a simple SQL engine

318

319

=item Text::CSV_XS

320

321

this module is used for writing rows to or reading rows from CSV files.

322

323

=back

324

325

326

=head2 Installation

327

328

Installing this module (and the prerequisites from above) is quite simple.

329

You just fetch the archive, extract it with

330

331

gzip -cd DBD-CSV-0.1000.tar.gz | tar xf -

332

333

(this is for Unix users, Windows users would prefer WinZip or something

334

similar) and then enter the following:

335

336

cd DBD-CSV-0.1000

337

perl Makefile.PL

338

make

339

make test

340

341

If any tests fail, let me know. Otherwise go on with

342

343

make install

344

345

Note that you almost definitely need root or administrator permissions.

346

If you don't have them, read the ExtUtils::MakeMaker man page for details

347

on installing in your own directories. L<ExtUtils::MakeMaker>.

348

349

=head2

350

351

The level of SQL support available depends on the version of

352

SQL::Statement installed. Any version will support *basic*

353

CREATE, INSERT, DELETE, UPDATE, and SELECT statements. Only

354

versions of SQL::Statement 1.0 and above support additional

355

features such as table joins, string functions, etc. See the

356

documentation of the latest version of SQL::Statement for details.

357

358

=head2 Creating a database handle

359

360

Creating a database handle usually implies connecting to a database server.

361

Thus this command reads

362

363

use DBI;

364

my $dbh = DBI->connect("DBI:CSV:f_dir=$dir");

365

366

The directory tells the driver where it should create or open tables

367

(a.k.a. files). It defaults to the current directory, thus the following

368

are equivalent:

369

370

$dbh = DBI->connect("DBI:CSV:");

371

$dbh = DBI->connect("DBI:CSV:f_dir=.");

372

373

(I was told, that VMS requires

374

375

$dbh = DBI->connect("DBI:CSV:f_dir=");

376

377

for whatever reasons.)

378

379

You may set other attributes in the DSN string, separated by semicolons.

380

381

382

=head2 Creating and dropping tables

383

384

You can create and drop tables with commands like the following:

385

386

$dbh->do("CREATE TABLE $table (id INTEGER, name CHAR(64))");

387

$dbh->do("DROP TABLE $table");

388

389

Note that currently only the column names will be stored and no other data.

390

Thus all other information including column type (INTEGER or CHAR(x), for

391

example), column attributes (NOT NULL, PRIMARY KEY, ...) will silently be

392

discarded. This may change in a later release.

393

394

A drop just removes the file without any warning.

395

396

See L<DBI(3)> for more details.

397

398

Table names cannot be arbitrary, due to restrictions of the SQL syntax.

399

I recommend that table names are valid SQL identifiers: The first

400

character is alphabetic, followed by an arbitrary number of alphanumeric

401

characters. If you want to use other files, the file names must start

402

with '/', './' or '../' and they must not contain white space.

403

404

405

=head2 Inserting, fetching and modifying data

406

407

The following examples insert some data in a table and fetch it back:

408

First all data in the string:

409

410

$dbh->do("INSERT INTO $table VALUES (1, "

411

. $dbh->quote("foobar") . ")");

412

413

Note the use of the quote method for escaping the word 'foobar'. Any

414

string must be escaped, even if it doesn't contain binary data.

415

416

Next an example using parameters:

417

418

$dbh->do("INSERT INTO $table VALUES (?, ?)", undef,

419

2, "It's a string!");

420

421

Note that you don't need to use the quote method here, this is done

422

automatically for you. This version is particularly well designed for

423

loops. Whenever performance is an issue, I recommend using this method.

424

425

You might wonder about the C<undef>. Don't wonder, just take it as it

426

is. :-) It's an attribute argument that I have never ever used and

427

will be parsed to the prepare method as a second argument.

428

429

430

To retrieve data, you can use the following:

431

432

my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";

433

my($sth) = $dbh->prepare($query);

434

$sth->execute();

435

while (my $row = $sth->fetchrow_hashref) {

436

print("Found result row: id = ", $row->{'id'},

437

", name = ", $row->{'name'});

438

}

439

$sth->finish();

440

441

Again, column binding works: The same example again.

442

443

my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";

444

my($sth) = $dbh->prepare($query);

445

$sth->execute();

446

my($id, $name);

447

$sth->bind_columns(undef, \$id, \$name);

448

while ($sth->fetch) {

449

print("Found result row: id = $id, name = $name\n");

450

}

451

$sth->finish();

452

453

Of course you can even use input parameters. Here's the same example

454

for the third time:

455

456

my($query) = "SELECT * FROM $table WHERE id = ?";

457

my($sth) = $dbh->prepare($query);

458

$sth->bind_columns(undef, \$id, \$name);

459

for (my($i) = 1; $i <= 2; $i++) {

460

$sth->execute($id);

461

if ($sth->fetch) {

462

print("Found result row: id = $id, name = $name\n");

463

}

464

$sth->finish();

465

}

466

467

See L<DBI(3)> for details on these methods. See L<SQL::Statement(3)> for

468

details on the WHERE clause.

469

470

Data rows are modified with the UPDATE statement:

471

472

$dbh->do("UPDATE $table SET id = 3 WHERE id = 1");

473

474

Likewise you use the DELETE statement for removing rows:

475

476

$dbh->do("DELETE FROM $table WHERE id > 1");

477

478

479

=head2 Error handling

480

481

In the above examples we have never cared about return codes. Of course,

482

this cannot be recommended. Instead we should have written (for example):

483

484

my($query) = "SELECT * FROM $table WHERE id = ?";

485

my($sth) = $dbh->prepare($query)

486

or die "prepare: " . $dbh->errstr();

487

$sth->bind_columns(undef, \$id, \$name)

488

or die "bind_columns: " . $dbh->errstr();

489

for (my($i) = 1; $i <= 2; $i++) {

490

$sth->execute($id)

491

or die "execute: " . $dbh->errstr();

492

if ($sth->fetch) {

493

print("Found result row: id = $id, name = $name\n");

494

}

495

}

496

$sth->finish($id)

497

or die "finish: " . $dbh->errstr();

498

499

Obviously this is tedious. Fortunately we have DBI's I<RaiseError>

500

attribute:

501

502

$dbh->{'RaiseError'} = 1;

503

$@ = '';

504

eval {

505

my($query) = "SELECT * FROM $table WHERE id = ?";

506

my($sth) = $dbh->prepare($query);

507

$sth->bind_columns(undef, \$id, \$name);

508

for (my($i) = 1; $i <= 2; $i++) {

509

$sth->execute($id);

510

if ($sth->fetch) {

511

print("Found result row: id = $id, name = $name\n");

512

}

513

}

514

$sth->finish($id);

515

};

516

if ($@) { die "SQL database error: $@"; }

517

518

This is not only shorter, it even works when using DBI methods within

519

subroutines.

520

521

522

=head2 Metadata

523

524

The following attributes are handled by DBI itself and not by DBD::File,

525

thus they all work as expected:

526

527

Active

528

ActiveKids

529

CachedKids

530

CompatMode (Not used)

531

InactiveDestroy

532

Kids

533

PrintError

534

RaiseError

535

Warn (Not used)

536

537

The following DBI attributes are handled by DBD::File:

538

539

=over 4

540

541

=item AutoCommit

542

543

Always on

544

545

=item ChopBlanks

546

547

Works

548

549

=item NUM_OF_FIELDS

550

551

Valid after C<$sth-E<gt>execute>

552

553

=item NUM_OF_PARAMS

554

555

Valid after C<$sth-E<gt>prepare>

556

557

=item NAME

558

559

Valid after C<$sth-E<gt>execute>; undef for Non-Select statements.

560

561

=item NULLABLE

562

563

Not really working. Always returns an array ref of one's, as DBD::CSV

564

doesn't verify input data. Valid after C<$sth-E<gt>execute>; undef for

565

non-Select statements.

566

567

=back

568

569

These attributes and methods are not supported:

570

571

bind_param_inout

572

CursorName

573

LongReadLen

574

LongTruncOk

575

576

In addition to the DBI attributes, you can use the following dbh

577

attributes:

578

579

=over 8

580

581

=item f_dir

582

583

This attribute is used for setting the directory where CSV files are

584

opened. Usually you set it in the dbh, it defaults to the current

585

directory ("."). However, it is overwritable in the statement handles.

586

587

=item csv_eol

588

589

=item csv_sep_char

590

591

=item csv_quote_char

592

593

=item csv_escape_char

594

595

=item csv_class

596

597

=item csv_csv

598

599

The attributes I<csv_eol>, I<csv_sep_char>, I<csv_quote_char> and

600

I<csv_escape_char> are corresponding to the respective attributes of the

601

Text::CSV_XS object. You want to set these attributes if you have unusual

602

CSV files like F</etc/passwd> or MS Excel generated CSV files with a semicolon

603

as separator. Defaults are "\015\012", ';', '"' and '"', respectively.

604

605

The attributes are used to create an instance of the class I<csv_class>,

606

by default Text::CSV_XS. Alternatively you may pass an instance as

607

I<csv_csv>, the latter takes precedence. Note that the I<binary>

608

attribute I<must> be set to a true value in that case.

609

610

Additionally you may overwrite these attributes on a per-table base in

611

the I<csv_tables> attribute.

612

613

=item csv_tables

614

615

This hash ref is used for storing table dependent metadata. For any

616

table it contains an element with the table name as key and another

617

hash ref with the following attributes:

618

619

=over 12

620

621

=item file

622

623

The tables file name; defaults to

624

625

"$dbh->{f_dir}/$table"

626

627

=item eol

628

629

=item sep_char

630

631

=item quote_char

632

633

=item escape_char

634

635

=item class

636

637

=item csv

638

639

These correspond to the attributes I<csv_eol>, I<csv_sep_char>,

640

I<csv_quote_char>, I<csv_escape_char>, I<csv_class> and I<csv_csv>.

641

The difference is that they work on a per-table base.

642

643

=item col_names

644

645

=item skip_rows

646

647

By default DBD::CSV assumes that column names are stored in the first

648

row of the CSV file. If this is not the case, you can supply an array

649

ref of table names with the I<col_names> attribute.

650

651

If you supply an empty array ref, the driver will read the first row

652

for you, count the number of columns and create column names like

653

C<col0>, C<col1>, ...

654

655

In addition, if there are several rows of junk prior to the data you

656

want, you can set skip_rows to the number of rows to ignore. By

657

default, DBD::CSV assumes that column names are stored in the last row

658

skipped, but you can override this with the col_names attribute.

659

660

=back

661

662

=back

663

664

Example: Suggest you want to use F</etc/passwd> as a CSV file. :-)

665

There simplest way is:

666

667

require DBI;

668

my $dbh = DBI->connect("DBI:CSV:f_dir=/etc;csv_eol=\n;"

669

. "csv_sep_char=:;csv_quote_char=;"

670

. "csv_escape_char=");

671

$dbh->{'csv_tables'}->{'passwd'} = {

672

'col_names' => ["login", "password", "uid", "gid", "realname",

673

"directory", "shell"]

674

};

675

$sth = $dbh->prepare("SELECT * FROM passwd");

676

677

Another possibility where you leave all the defaults as they are and

678

overwrite them on a per table base:

679

680

require DBI;

681

my $dbh = DBI->connect("DBI:CSV:");

682

$dbh->{'csv_tables'}->{'passwd'} = {

683

'eol' => "\n",

684

'sep_char' => ":",

685

'quote_char' => undef,

686

'escape_char' => undef,

687

'file' => '/etc/passwd',

688

'col_names' => ["login", "password", "uid", "gid", "realname",

689

"directory", "shell"]

690

};

691

$sth = $dbh->prepare("SELECT * FROM passwd");

692

693

694

=head2 Driver private methods

695

696

These methods are inherited from DBD::File:

697

698

=over 4

699

700

=item data_sources

701

702

The C<data_sources> method returns a list of subdirectories of the current

703

directory in the form "DBI:CSV:directory=$dirname".

704

705

If you want to read the subdirectories of another directory, use

706

707

my($drh) = DBI->install_driver("CSV");

708

my(@list) = $drh->data_sources('f_dir' => '/usr/local/csv_data' );

709

710

=item list_tables

711

712

This method returns a list of file names inside $dbh->{'directory'}.

713

Example:

714

715

my($dbh) = DBI->connect("DBI:CSV:directory=/usr/local/csv_data");

716

my(@list) = $dbh->func('list_tables');

717

718

Note that the list includes all files contained in the directory, even

719

those that have non-valid table names, from the view of SQL. See

720

L<Creating and dropping tables> above.

721

722

=back

723

724

725

=head2 Data restrictions

726

727

When inserting and fetching data, you will sometimes be surprised: DBD::CSV

728

doesn't correctly handle data types, in particular NULLs. If you insert

729

integers, it might happen, that fetch returns a string. Of course, a string

730

containing the integer, so that's perhaps not a real problem. But the

731

following will never work:

732

733

$dbh->do("INSERT INTO $table (id, name) VALUES (?, ?)",

734

undef, "foo bar");

735

$sth = $dbh->prepare("SELECT * FROM $table WHERE id IS NULL");

736

$sth->execute();

737

my($id, $name);

738

$sth->bind_columns(undef, \$id, \$name);

739

while ($sth->fetch) {

740

printf("Found result row: id = %s, name = %s\n",

741

defined($id) ? $id : "NULL",

742

defined($name) ? $name : "NULL");

743

}

744

$sth->finish();

745

746

The row we have just inserted, will never be returned! The reason is

747

obvious, if you examine the CSV file: The corresponding row looks

748

749

750

"","foo bar"

751

752

In other words, not a NULL is stored, but an empty string. CSV files

753

don't have a concept of NULL values. Surprisingly the above example

754

works, if you insert a NULL value for the name! Again, you find

755

the explanation by examining the CSV file:

756

757

758

759

In other words, DBD::CSV has "emulated" a NULL value by writing a row

760

with less columns. Of course this works only if the rightmost column

761

is NULL, the two rightmost columns are NULL, ..., but the leftmost

762

column will never be NULL!

763

764

See L<Creating and dropping tables> above for table name restrictions.

765

766

767

=head1 TODO

768

769

Extensions of DBD::CSV:

770

771

=over 4

772

773

=item CSV file scanner

774

775

Write a simple CSV file scanner that reads a CSV file and attempts

776

to guess sep_char, quote_char, escape_char and eol automatically.

777

778

=back

779

780

These are merely restrictions of the DBD::File or SQL::Statement

781

modules:

782

783

=over 4

784

785

=item Table name mapping

786

787

Currently it is not possible to use files with names like C<names.csv>.

788

Instead you have to use soft links or rename files. As an alternative

789

one might use, for example a dbh attribute 'table_map'. It might be a

790

hash ref, the keys being the table names and the values being the file

791

names.

792

793

=item Column name mapping

794

795

Currently the module assumes that column names are stored in the first

796

row. While this is fine in most cases, there should be a possibility

797

of setting column names and column number from the programmer: For

798

example MS Access doesn't export column names by default.

799

800

=back

801

802

803

=head1 KNOWN BUGS

804

805

=over 8

806

807

=item *

808

809

The module is using flock() internally. However, this function is not

810

available on platforms. Using flock() is disabled on MacOS and Windows

811

95: There's no locking at all (perhaps not so important on these

812

operating systems, as they are for single users anyways).

813

814

=back

815

816

817

=head1 AUTHOR AND COPYRIGHT

818

819

This module is currently maintained by

820

821

Jeff Zucker

822

<jeff@vpservices.com>

823

824

The original author is Jochen Wiedmann.

825

826

827

828

829

830

You may distribute this module under the terms of either the GNU

831

General Public License or the Artistic License, as specified in

832

the Perl README file.

833

834

=head1 SEE ALSO

835

836

L<DBI(3)>, L<Text::CSV_XS(3)>, L<SQL::Statement(3)>

837

838

For help on the use of DBD::CSV, see the DBI users mailing list:

839

840

http://www.isc.org/dbi-lists.html

841

842

For general information on DBI see

843

844

http://www.symbolstone.org/technology/perl/DBI

845

846

=cut

# -*- perl -*-

# DBD::CSV - A DBI driver for CSV and similar structured files

# This module is currently maintained by

# Jeff Zucker

# <jeff@vpservices.com>

# The original author is Jochen Wiedmann.

# You may distribute this module under the terms of either the GNU

# General Public License or the Artistic License, as specified in

# the Perl README file.

require 5.004;

use strict;

require DynaLoader;

require DBD::File;

require IO::File;

package DBD::CSV;

use vars qw(@ISA $VERSION $drh $err $errstr $sqlstate);

@ISA = qw(DBD::File);

$VERSION = '0.21'; # jumped from 0.2002 to shorten version number

$err = 0; # holds error code for DBI::err

$errstr = ""; # holds error string for DBI::errstr

$sqlstate = ""; # holds error state for DBI::state

$drh = undef; # holds driver handle once initialised

package DBD::CSV::dr; # ====== DRIVER ======

use Text::CSV_XS();

use vars qw(@ISA @CSV_TYPES);

@CSV_TYPES = (

Text::CSV_XS::IV(), # SQL_TINYINT

Text::CSV_XS::IV(), # SQL_BIGINT

Text::CSV_XS::PV(), # SQL_LONGVARBINARY

Text::CSV_XS::PV(), # SQL_VARBINARY

Text::CSV_XS::PV(), # SQL_BINARY

Text::CSV_XS::PV(), # SQL_LONGVARCHAR

Text::CSV_XS::PV(), # SQL_ALL_TYPES

Text::CSV_XS::PV(), # SQL_CHAR

Text::CSV_XS::NV(), # SQL_NUMERIC

Text::CSV_XS::NV(), # SQL_DECIMAL

Text::CSV_XS::IV(), # SQL_INTEGER

Text::CSV_XS::IV(), # SQL_SMALLINT

Text::CSV_XS::NV(), # SQL_FLOAT

Text::CSV_XS::NV(), # SQL_REAL

Text::CSV_XS::NV(), # SQL_DOUBLE

);

@DBD::CSV::dr::ISA = qw(DBD::File::dr);

$DBD::CSV::dr::imp_data_size = 0;

$DBD::CSV::dr::data_sources_attr = undef;

sub connect ($$;$$$) {

my($drh, $dbname, $user, $auth, $attr) = @_;

my $dbh = $drh->DBD::File::dr::connect($dbname, $user, $auth, $attr);

$dbh->{'csv_tables'} ||= {};

$dbh->{Active} = 1;

$dbh;

}

package DBD::CSV::db; # ====== DATABASE ======

$DBD::CSV::db::imp_data_size = 0;

@DBD::CSV::db::ISA = qw(DBD::File::db);

sub csv_cache_sql_parser_object {

my $dbh = shift;

my $parser = {

dialect => 'CSV',

RaiseError => $dbh->FETCH('RaiseError'),

PrintError => $dbh->FETCH('PrintError'),

};

my $sql_flags = $dbh->FETCH('csv_sql') || {};

%$parser = (%$parser,%$sql_flags);

$parser = SQL::Parser->new($parser->{dialect},$parser);

$dbh->{csv_sql_parser_object} = $parser;

return $parser;

}

sub DESTROY {

100

my $dbh = shift;

101

$dbh->STORE('Active',0);

102

undef;

103

}

104

sub disconnect {

105

my $dbh = shift;

106

$dbh->STORE('Active',0);

107

108

}

109

110

package DBD::CSV::st; # ====== STATEMENT ======

111

112

$DBD::CSV::st::imp_data_size = 0;

113

114

@DBD::CSV::st::ISA = qw(DBD::File::st);

115

116

117

package DBD::CSV::Statement;

118

119

@DBD::CSV::Statement::ISA = qw(DBD::File::Statement);

120

121

sub open_table ($$$$$) {

122

my($self, $data, $table, $createMode, $lockMode) = @_;

123

my $dbh = $data->{Database};

124

my $tables = $dbh->{csv_tables};

125

if (!exists($tables->{$table})) {

126

$tables->{$table} = {};

127

}

128

my $meta = $tables->{$table} || {};

129

my $csv = $meta->{csv} || $dbh->{csv_csv};

130

if (!$csv) {

131

my $class = $meta->{class} || $dbh->{'csv_class'} ||

132

'Text::CSV_XS';

133

my %opts = ( 'binary' => 1 );

134

$opts{'eol'} = $meta->{'eol'} || $dbh->{'csv_eol'} || "\015\012";

135

$opts{'sep_char'} =

136

exists($meta->{'sep_char'}) ? $meta->{'sep_char'} :

137

exists($dbh->{'csv_sep_char'}) ? $dbh->{'csv_sep_char'} : ",";

138

$opts{'quote_char'} =

139

exists($meta->{'quote_char'}) ? $meta->{'quote_char'} :

140

exists($dbh->{'csv_quote_char'}) ? $dbh->{'csv_quote_char'} :

141

'"';

142

$opts{'escape_char'} =

143

exists($meta->{'escape_char'}) ? $meta->{'escape_char'} :

144

exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} :

145

'"';

146

$csv = $meta->{csv} = $class->new(\%opts);

147

}

148

my $file = $meta->{file} || $table;

149

my $tbl = $self->SUPER::open_table($data, $file, $createMode, $lockMode);

150

if ($tbl) {

151

$tbl->{'csv_csv'} = $csv;

152

my $types = $meta->{types};

153

if ($types) {

154

# The 'types' array contains DBI types, but we need types

155

# suitable for Text::CSV_XS.

156

my $t = [];

157

foreach (@{$types}) {

158

if ($_) {

159

$_ = $DBD::CSV::CSV_TYPES[$_+6] || Text::CSV_XS::PV();

160

} else {

161

$_ = Text::CSV_XS::PV();

162

}

163

push(@$t, $_);

164

}

165

$tbl->{types} = $t;

166

}

167

if (!$createMode and !$self->{ignore_missing_table}) {

168

my($array, $skipRows);

169

if (exists($meta->{skip_rows})) {

170

$skipRows = $meta->{skip_rows};

171

} else {

172

$skipRows = exists($meta->{col_names}) ? 0 : 1;

173

}

174

if ($skipRows--) {

175

if (!($array = $tbl->fetch_row($data))) {

176

die "Missing first row";

177

}

178

$tbl->{col_names} = $array;

179

while ($skipRows--) {

180

$tbl->fetch_row($data);

181

}

182

}

183

$tbl->{first_row_pos} = $tbl->{fh}->tell();

184

if (exists($meta->{col_names})) {

185

$array = $tbl->{col_names} = $meta->{col_names};

186

} elsif (!$tbl->{col_names} || !@{$tbl->{col_names}}) {

187

# No column names given; fetch first row and create default

188

# names.

189

my $a = $tbl->{cached_row} = $tbl->fetch_row($data);

190

$array = $tbl->{'col_names'};

191

for (my $i = 0; $i < @$a; $i++) {

192

push(@$array, "col$i");

193

}

194

}

195

my($col, $i);

196

my $columns = $tbl->{col_nums};

197

foreach $col (@$array) {

198

$columns->{$col} = $i++;

199

}

200

}

201

}

202

$tbl;

203

}

204

205

206

package DBD::CSV::Table;

207

208

@DBD::CSV::Table::ISA = qw(DBD::File::Table);

209

210

sub fetch_row ($$) {

211

my($self, $data) = @_;

212

my $fields;

213

if (exists($self->{cached_row})) {

214

$fields = delete($self->{cached_row});

215

} else {

216

$! = 0;

217

my $csv = $self->{csv_csv};

218

local $/ = $csv->{'eol'};

219

$fields = $csv->getline($self->{'fh'});

220

if (!$fields) {

221

die "Error while reading file " . $self->{'file'} . ": $!" if $!;

222

return undef;

223

}

224

}

225

$self->{row} = (@$fields ? $fields : undef);

226

}

227

228

sub push_row ($$$) {

229

my($self, $data, $fields) = @_;

230

my($csv) = $self->{csv_csv};

231

my($fh) = $self->{'fh'};

232

233

# Remove undef from the right end of the fields, so that at least

234

# in these cases undef is returned from FetchRow

235

236

while (@$fields && !defined($fields->[$#$fields])) {

237

pop @$fields;

238

}

239

if (!$csv->print($fh, $fields)) {

240

die "Error while writing file " . $self->{'file'} . ": $!";

241

}

242

243

}

244

*push_names = \&push_row;

245

246

247

248

249

250

__END__

251

252

=head1 NAME

253

254

DBD::CSV - DBI driver for CSV files

255

256

=head1 SYNOPSIS

257

258

use DBI;

259

$dbh = DBI->connect("DBI:CSV:f_dir=/home/joe/csvdb")

260

or die "Cannot connect: " . $DBI::errstr;

261

$sth = $dbh->prepare("CREATE TABLE a (id INTEGER, name CHAR(10))")

262

or die "Cannot prepare: " . $dbh->errstr();

263

$sth->execute() or die "Cannot execute: " . $sth->errstr();

264

$sth->finish();

265

$dbh->disconnect();

266

267

268

# Read a CSV file with ";" as the separator, as exported by

269

# MS Excel. Note we need to escape the ";", otherwise it

270

# would be treated as an attribute separator.

271

$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});

272

$sth = $dbh->prepare("SELECT * FROM info");

273

274

# Same example, this time reading "info.csv" as a table:

275

$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;});

276

$dbh->{'csv_tables'}->{'info'} = { 'file' => 'info.csv'};

277

$sth = $dbh->prepare("SELECT * FROM info");

278

279

280

=head1 WARNING

281

282

THIS IS ALPHA SOFTWARE. It is *only* 'Alpha' because the interface (API)

283

is not finalized. The Alpha status does not reflect code quality or

284

stability.

285

286

287

=head1 DESCRIPTION

288

289

The DBD::CSV module is yet another driver for the DBI (Database independent

290

interface for Perl). This one is based on the SQL "engine" SQL::Statement

291

and the abstract DBI driver DBD::File and implements access to

292

so-called CSV files (Comma separated values). Such files are mostly used for

293

exporting MS Access and MS Excel data.

294

295

See L<DBI(3)> for details on DBI, L<SQL::Statement(3)> for details on

296

SQL::Statement and L<DBD::File(3)> for details on the base class

297

DBD::File.

298

299

300

=head2 Prerequisites

301

302

The only system dependent feature that DBD::File uses, is the C<flock()>

303

function. Thus the module should run (in theory) on any system with

304

a working C<flock()>, in particular on all Unix machines and on Windows

305

NT. Under Windows 95 and MacOS the use of C<flock()> is disabled, thus

306

the module should still be usable,

307

308

Unlike other DBI drivers, you don't need an external SQL engine

309

or a running server. All you need are the following Perl modules,

310

available from any CPAN mirror, for example

311

312

ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module

313

314

=over 4

315

316

=item DBI

317

318

the DBI (Database independent interface for Perl), version 1.00 or

319

a later release

320

321

=item SQL::Statement

322

323

a simple SQL engine

324

325

=item Text::CSV_XS

326

327

this module is used for writing rows to or reading rows from CSV files.

328

329

=back

330

331

332

=head2 Installation

333

334

Installing this module (and the prerequisites from above) is quite simple.

335

You just fetch the archive, extract it with

336

337

gzip -cd DBD-CSV-0.1000.tar.gz | tar xf -

338

339

(this is for Unix users, Windows users would prefer WinZip or something

340

similar) and then enter the following:

341

342

cd DBD-CSV-0.1000

343

perl Makefile.PL

344

make

345

make test

346

347

If any tests fail, let me know. Otherwise go on with

348

349

make install

350

351

Note that you almost definitely need root or administrator permissions.

352

If you don't have them, read the ExtUtils::MakeMaker man page for details

353

on installing in your own directories. L<ExtUtils::MakeMaker>.

354

355

=head2

356

357

The level of SQL support available depends on the version of

358

SQL::Statement installed. Any version will support *basic*

359

CREATE, INSERT, DELETE, UPDATE, and SELECT statements. Only

360

versions of SQL::Statement 1.0 and above support additional

361

features such as table joins, string functions, etc. See the

362

documentation of the latest version of SQL::Statement for details.

363

364

=head2 Creating a database handle

365

366

Creating a database handle usually implies connecting to a database server.

367

Thus this command reads

368

369

use DBI;

370

my $dbh = DBI->connect("DBI:CSV:f_dir=$dir");

371

372

The directory tells the driver where it should create or open tables

373

(a.k.a. files). It defaults to the current directory, thus the following

374

are equivalent:

375

376

$dbh = DBI->connect("DBI:CSV:");

377

$dbh = DBI->connect("DBI:CSV:f_dir=.");

378

379

(I was told, that VMS requires

380

381

$dbh = DBI->connect("DBI:CSV:f_dir=");

382

383

for whatever reasons.)

384

385

You may set other attributes in the DSN string, separated by semicolons.

386

387

388

=head2 Creating and dropping tables

389

390

You can create and drop tables with commands like the following:

391

392

$dbh->do("CREATE TABLE $table (id INTEGER, name CHAR(64))");

393

$dbh->do("DROP TABLE $table");

394

395

Note that currently only the column names will be stored and no other data.

396

Thus all other information including column type (INTEGER or CHAR(x), for

397

example), column attributes (NOT NULL, PRIMARY KEY, ...) will silently be

398

discarded. This may change in a later release.

399

400

A drop just removes the file without any warning.

401

402

See L<DBI(3)> for more details.

403

404

Table names cannot be arbitrary, due to restrictions of the SQL syntax.

405

I recommend that table names are valid SQL identifiers: The first

406

character is alphabetic, followed by an arbitrary number of alphanumeric

407

characters. If you want to use other files, the file names must start

408

with '/', './' or '../' and they must not contain white space.

409

410

411

=head2 Inserting, fetching and modifying data

412

413

The following examples insert some data in a table and fetch it back:

414

First all data in the string:

415

416

$dbh->do("INSERT INTO $table VALUES (1, "

417

. $dbh->quote("foobar") . ")");

418

419

Note the use of the quote method for escaping the word 'foobar'. Any

420

string must be escaped, even if it doesn't contain binary data.

421

422

Next an example using parameters:

423

424

$dbh->do("INSERT INTO $table VALUES (?, ?)", undef,

425

2, "It's a string!");

426

427

Note that you don't need to use the quote method here, this is done

428

automatically for you. This version is particularly well designed for

429

loops. Whenever performance is an issue, I recommend using this method.

430

431

You might wonder about the C<undef>. Don't wonder, just take it as it

432

is. :-) It's an attribute argument that I have never ever used and

433

will be parsed to the prepare method as a second argument.

434

435

436

To retrieve data, you can use the following:

437

438

my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";

439

my($sth) = $dbh->prepare($query);

440

$sth->execute();

441

while (my $row = $sth->fetchrow_hashref) {

442

print("Found result row: id = ", $row->{'id'},

443

", name = ", $row->{'name'});

444

}

445

$sth->finish();

446

447

Again, column binding works: The same example again.

448

449

my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id";

450

my($sth) = $dbh->prepare($query);

451

$sth->execute();

452

my($id, $name);

453

$sth->bind_columns(undef, \$id, \$name);

454

while ($sth->fetch) {

455

print("Found result row: id = $id, name = $name\n");

456

}

457

$sth->finish();

458

459

Of course you can even use input parameters. Here's the same example

460

for the third time:

461

462

my($query) = "SELECT * FROM $table WHERE id = ?";

463

my($sth) = $dbh->prepare($query);

464

$sth->bind_columns(undef, \$id, \$name);

465

for (my($i) = 1; $i <= 2; $i++) {

466

$sth->execute($id);

467

if ($sth->fetch) {

468

print("Found result row: id = $id, name = $name\n");

469

}

470

$sth->finish();

471

}

472

473

See L<DBI(3)> for details on these methods. See L<SQL::Statement(3)> for

474

details on the WHERE clause.

475

476

Data rows are modified with the UPDATE statement:

477

478

$dbh->do("UPDATE $table SET id = 3 WHERE id = 1");

479

480

Likewise you use the DELETE statement for removing rows:

481

482

$dbh->do("DELETE FROM $table WHERE id > 1");

483

484

485

=head2 Error handling

486

487

In the above examples we have never cared about return codes. Of course,

488

this cannot be recommended. Instead we should have written (for example):

489

490

my($query) = "SELECT * FROM $table WHERE id = ?";

491

my($sth) = $dbh->prepare($query)

492

or die "prepare: " . $dbh->errstr();

493

$sth->bind_columns(undef, \$id, \$name)

494

or die "bind_columns: " . $dbh->errstr();

495

for (my($i) = 1; $i <= 2; $i++) {

496

$sth->execute($id)

497

or die "execute: " . $dbh->errstr();

498

if ($sth->fetch) {

499

print("Found result row: id = $id, name = $name\n");

500

}

501

}

502

$sth->finish($id)

503

or die "finish: " . $dbh->errstr();

504

505

Obviously this is tedious. Fortunately we have DBI's I<RaiseError>

506

attribute:

507

508

$dbh->{'RaiseError'} = 1;

509

$@ = '';

510

eval {

511

my($query) = "SELECT * FROM $table WHERE id = ?";

512

my($sth) = $dbh->prepare($query);

513

$sth->bind_columns(undef, \$id, \$name);

514

for (my($i) = 1; $i <= 2; $i++) {

515

$sth->execute($id);

516

if ($sth->fetch) {

517

print("Found result row: id = $id, name = $name\n");

518

}

519

}

520

$sth->finish($id);

521

};

522

if ($@) { die "SQL database error: $@"; }

523

524

This is not only shorter, it even works when using DBI methods within

525

subroutines.

526

527

528

=head2 Metadata

529

530

The following attributes are handled by DBI itself and not by DBD::File,

531

thus they all work as expected:

532

533

Active

534

ActiveKids

535

CachedKids

536

CompatMode (Not used)

537

InactiveDestroy

538

Kids

539

PrintError

540

RaiseError

541

Warn (Not used)

542

543

The following DBI attributes are handled by DBD::File:

544

545

=over 4

546

547

=item AutoCommit

548

549

Always on

550

551

=item ChopBlanks

552

553

Works

554

555

=item NUM_OF_FIELDS

556

557

Valid after C<$sth-E<gt>execute>

558

559

=item NUM_OF_PARAMS

560

561

Valid after C<$sth-E<gt>prepare>

562

563

=item NAME

564

565

Valid after C<$sth-E<gt>execute>; undef for Non-Select statements.

566

567

=item NULLABLE

568

569

Not really working. Always returns an array ref of one's, as DBD::CSV

570

doesn't verify input data. Valid after C<$sth-E<gt>execute>; undef for

571

non-Select statements.

572

573

=back

574

575

These attributes and methods are not supported:

576

577

bind_param_inout

578

CursorName

579

LongReadLen

580

LongTruncOk

581

582

In addition to the DBI attributes, you can use the following dbh

583

attributes:

584

585

=over 8

586

587

=item f_dir

588

589

This attribute is used for setting the directory where CSV files are

590

opened. Usually you set it in the dbh, it defaults to the current

591

directory ("."). However, it is overwritable in the statement handles.

592

593

=item csv_eol

594

595

=item csv_sep_char

596

597

=item csv_quote_char

598

599

=item csv_escape_char

600

601

=item csv_class

602

603

=item csv_csv

604

605

The attributes I<csv_eol>, I<csv_sep_char>, I<csv_quote_char> and

606

I<csv_escape_char> are corresponding to the respective attributes of the

607

Text::CSV_XS object. You want to set these attributes if you have unusual

608

CSV files like F</etc/passwd> or MS Excel generated CSV files with a semicolon

609

as separator. Defaults are "\015\012", ';', '"' and '"', respectively.

610

611

The attributes are used to create an instance of the class I<csv_class>,

612

by default Text::CSV_XS. Alternatively you may pass an instance as

613

I<csv_csv>, the latter takes precedence. Note that the I<binary>

614

attribute I<must> be set to a true value in that case.

615

616

Additionally you may overwrite these attributes on a per-table base in

617

the I<csv_tables> attribute.

618

619

=item csv_tables

620

621

This hash ref is used for storing table dependent metadata. For any

622

table it contains an element with the table name as key and another

623

hash ref with the following attributes:

624

625

=over 12

626

627

=item file

628

629

The tables file name; defaults to

630

631

"$dbh->{f_dir}/$table"

632

633

=item eol

634

635

=item sep_char

636

637

=item quote_char

638

639

=item escape_char

640

641

=item class

642

643

=item csv

644

645

These correspond to the attributes I<csv_eol>, I<csv_sep_char>,

646

I<csv_quote_char>, I<csv_escape_char>, I<csv_class> and I<csv_csv>.

647

The difference is that they work on a per-table base.

648

649

=item col_names

650

651

=item skip_first_row

652

653

By default DBD::CSV assumes that column names are stored in the first

654

row of the CSV file. If this is not the case, you can supply an array

655

ref of table names with the I<col_names> attribute. In that case the

656

attribute I<skip_first_row> will be set to FALSE.

657

658

If you supply an empty array ref, the driver will read the first row

659

for you, count the number of columns and create column names like

660

C<col0>, C<col1>, ...

661

662

=back

663

664

=back

665

666

Example: Suggest you want to use F</etc/passwd> as a CSV file. :-)

667

There simplest way is:

668

669

require DBI;

670

my $dbh = DBI->connect("DBI:CSV:f_dir=/etc;csv_eol=\n;"

671

. "csv_sep_char=:;csv_quote_char=;"

672

. "csv_escape_char=");

673

$dbh->{'csv_tables'}->{'passwd'} = {

674

'col_names' => ["login", "password", "uid", "gid", "realname",

675

"directory", "shell"]

676

};

677

$sth = $dbh->prepare("SELECT * FROM passwd");

678

679

Another possibility where you leave all the defaults as they are and

680

overwrite them on a per table base:

681

682

require DBI;

683

my $dbh = DBI->connect("DBI:CSV:");

684

$dbh->{'csv_tables'}->{'passwd'} = {

685

'eol' => "\n",

686

'sep_char' => ":",

687

'quote_char' => undef,

688

'escape_char' => undef,

689

'file' => '/etc/passwd',

690

'col_names' => ["login", "password", "uid", "gid", "realname",

691

"directory", "shell"]

692

};

693

$sth = $dbh->prepare("SELECT * FROM passwd");

694

695

696

=head2 Driver private methods

697

698

These methods are inherited from DBD::File:

699

700

=over 4

701

702

=item data_sources

703

704

The C<data_sources> method returns a list of subdirectories of the current

705

directory in the form "DBI:CSV:directory=$dirname".

706

707

If you want to read the subdirectories of another directory, use

708

709

my($drh) = DBI->install_driver("CSV");

710

my(@list) = $drh->data_sources('f_dir' => '/usr/local/csv_data' );

711

712

=item list_tables

713

714

This method returns a list of file names inside $dbh->{'directory'}.

715

Example:

716

717

my($dbh) = DBI->connect("DBI:CSV:directory=/usr/local/csv_data");

718

my(@list) = $dbh->func('list_tables');

719

720

Note that the list includes all files contained in the directory, even

721

those that have non-valid table names, from the view of SQL. See

722

L<Creating and dropping tables> above.

723

724

=back

725

726

727

=head2 Data restrictions

728

729

When inserting and fetching data, you will sometimes be surprised: DBD::CSV

730

doesn't correctly handle data types, in particular NULLs. If you insert

731

integers, it might happen, that fetch returns a string. Of course, a string

732

containing the integer, so that's perhaps not a real problem. But the

733

following will never work:

734

735

$dbh->do("INSERT INTO $table (id, name) VALUES (?, ?)",

736

undef, "foo bar");

737

$sth = $dbh->prepare("SELECT * FROM $table WHERE id IS NULL");

738

$sth->execute();

739

my($id, $name);

740

$sth->bind_columns(undef, \$id, \$name);

741

while ($sth->fetch) {

742

printf("Found result row: id = %s, name = %s\n",

743

defined($id) ? $id : "NULL",

744

defined($name) ? $name : "NULL");

745

}

746

$sth->finish();

747

748

The row we have just inserted, will never be returned! The reason is

749

obvious, if you examine the CSV file: The corresponding row looks

750

751

752

"","foo bar"

753

754

In other words, not a NULL is stored, but an empty string. CSV files

755

don't have a concept of NULL values. Surprisingly the above example

756

works, if you insert a NULL value for the name! Again, you find

757

the explanation by examining the CSV file:

758

759

760

761

In other words, DBD::CSV has "emulated" a NULL value by writing a row

762

with less columns. Of course this works only if the rightmost column

763

is NULL, the two rightmost columns are NULL, ..., but the leftmost

764

column will never be NULL!

765

766

See L<Creating and dropping tables> above for table name restrictions.

767

768

769

=head1 TODO

770

771

Extensions of DBD::CSV:

772

773

=over 4

774

775

=item CSV file scanner

776

777

Write a simple CSV file scanner that reads a CSV file and attempts

778

to guess sep_char, quote_char, escape_char and eol automatically.

779

780

=back

781

782

These are merely restrictions of the DBD::File or SQL::Statement

783

modules:

784

785

=over 4

786

787

=item Table name mapping

788

789

Currently it is not possible to use files with names like C<names.csv>.

790

Instead you have to use soft links or rename files. As an alternative

791

one might use, for example a dbh attribute 'table_map'. It might be a

792

hash ref, the keys being the table names and the values being the file

793

names.

794

795

=item Column name mapping

796

797

Currently the module assumes that column names are stored in the first

798

row. While this is fine in most cases, there should be a possibility

799

of setting column names and column number from the programmer: For

800

example MS Access doesn't export column names by default.

801

802

=back

803

804

805

=head1 KNOWN BUGS

806

807

=over 8

808

809

=item *

810

811

The module is using flock() internally. However, this function is not

812

available on platforms. Using flock() is disabled on MacOS and Windows

813

95: There's no locking at all (perhaps not so important on these

814

operating systems, as they are for single users anyways).

815

816

=back

817

818

819

=head1 AUTHOR AND COPYRIGHT

820

821

This module is currently maintained by

822

823

Jeff Zucker

824

<jeff@vpservices.com>

825

826

The original author is Jochen Wiedmann.

827

828

829

830

831

832

You may distribute this module under the terms of either the GNU

833

General Public License or the Artistic License, as specified in

834

the Perl README file.

835

836

=head1 SEE ALSO

837

838

L<DBI(3)>, L<Text::CSV_XS(3)>, L<SQL::Statement(3)>

839

840

For help on the use of DBD::CSV, see the DBI users mailing list:

841

842

http://www.isc.org/dbi-lists.html

843

844

For general information on DBI see

845

846

http://www.symbolstone.org/technology/perl/DBI

847

848

=cut

Older »