1
# $Id: hapmap.pm,v 1.8.4.1 2006/10/02 23:10:23 sendu Exp $
3
# BioPerl module for Bio::PopGen::IO::hapmap
5
# Cared for by Rich Dobson <r.j.dobson-at-qmul.ac.uk>
7
# Copyright Rich Dobson
9
# You may distribute this module under the same terms as perl itself
11
# POD documentation - main docs before the code
15
Bio::PopGen::IO::hapmap - A parser for HapMap output data
19
# Do not use directly, use through the Bio::PopGen::IO driver
22
my $io = new Bio::PopGen::IO(-format => 'hapmap',
23
-file => 'data.hapmap');
25
# Some IO might support reading in a population at a time
28
while( my $ind = $io->next_individual ) {
29
push @population, $ind;
34
A driver module for Bio::PopGen::IO for parsing hapmap data.
40
User feedback is an integral part of the evolution of this and other
41
Bioperl modules. Send your comments and suggestions preferably to
42
the Bioperl mailing list. Your participation is much appreciated.
44
bioperl-l@bioperl.org - General discussion
45
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49
Report bugs to the Bioperl bug tracking system to help us keep track
50
of the bugs and their resolution. Bug reports can be submitted via
53
http://bugzilla.open-bio.org/
55
=head1 AUTHOR - Rich Dobson
57
Email r.j.dobson-at-qmul.ac.uk
61
Jason Stajich, jason-at-bioperl.org
65
The rest of the documentation details each of the object methods.
66
Internal methods are usually preceded with a _
71
# Let the code begin...
73
package Bio::PopGen::IO::hapmap;
74
use vars qw($FieldDelim $AlleleDelim $NoHeader $StartingCol);
77
($FieldDelim,$AlleleDelim,$NoHeader,$StartingCol) =( '\s+','',0,11);
79
use Bio::PopGen::Individual;
80
use Bio::PopGen::Population;
81
use Bio::PopGen::Genotype;
83
use base qw(Bio::PopGen::IO);
89
Usage : my $obj = new Bio::PopGen::IO::hapmap();
90
Function: Builds a new Bio::PopGen::IO::hapmap object
91
Returns : an instance of Bio::PopGen::IO::hapmap
92
Args : [optional, these are the current defaults]
93
-field_delimiter => ','
94
-allele_delimiter=> '\s+'
96
-starting_column => 11
103
my($self, @args) = @_;
105
$Bio::PopGen::Genotype::BlankAlleles='';
107
my ($fieldsep,$all_sep,
108
$noheader, $start_col) = $self->_rearrange([qw(FIELD_DELIMITER
114
$self->flag('no_header', defined $noheader ? $noheader : $NoHeader);
115
$self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
116
$self->flag('allele_delimiter',defined $all_sep ? $all_sep : $AlleleDelim);
117
$self->starting_column(defined $start_col ? $start_col : $StartingCol );
119
$self->{'_header'} = undef;
127
Usage : $obj->flag($flagname,$newval)
128
Function: Get/Set the flag value
129
Returns : value of a flag (a boolean)
130
Args : A flag name, currently we expect
131
'no_header', 'field_delimiter', or 'allele_delimiter'
132
on set, new value (a boolean or undef, optional)
139
my $fieldname = shift;
140
return unless defined $fieldname;
141
return $self->{'_flag'}->{$fieldname} = shift if @_;
142
return $self->{'_flag'}->{$fieldname};
149
my (@cols,@rows,@idheader);
150
while ($_ = $self->_readline){
152
next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
153
if( /^rs\#\s+alleles\s+chrom\s+pos\s+strand/ ) {
154
@idheader = split $self->flag('field_delimiter');
156
push @cols, [split $self->flag('field_delimiter')];
159
my $startingcol = $self->starting_column;
161
$self->{'_header'} = [ map { $_->[0] } @cols];
162
for my $n ($startingcol.. $#{ $cols[ 0 ]}) {
163
my $column = [ $idheader[$n],
164
map{ $_->[ $n ] } @cols ];
165
push (@rows, $column);
167
$self->{'_pivot'} = [@rows];
172
=head2 next_individual
174
Title : next_individual
175
Usage : my $ind = $popgenio->next_individual;
176
Function: Retrieve the next individual from a dataset
177
Returns : A Bio::PopGen::IndividualI object
180
See L<Bio::PopGen::IndividualI>
184
sub next_individual {
186
unless($self->{'_pivot'}){
187
#if it's the first time then pivot the table and store.
188
#Lines will now be read from the stored pivot version of the input file
192
$_ = $self->{'_pivot'}->[$self->{'_i'}++];
194
return unless defined $_;
196
# Store all the marker related info. Now that the pivot has taken
197
# place this is in the first few lines of the file Maybe this
198
# should be put in a marker object. Doesn't seem to fit too well
201
my ($samp,@marker_results) = @$_;
203
# at some point use all this info
205
foreach my $m ( @marker_results ) {
209
if( defined $self->{'_header'} ) {
210
$markername = $self->{'_header'}->[$i-1];
212
$markername = "Marker$i";
215
my @alleles = split($self->flag('allele_delimiter'), $m);
216
if( @alleles != 2 ) {
217
$self->warn("$m for $samp\n");
219
$m = new Bio::PopGen::Genotype(-alleles => \@alleles,
220
-marker_name => $markername,
221
-individual_id=> $samp);
226
return new Bio::PopGen::Individual(-unique_id => $samp,
227
-genotypes => \@marker_results);
231
=head2 next_population
233
Title : next_population
234
Usage : my $ind = $popgenio->next_population;
235
Function: Retrieve the next population from a dataset
236
Returns : Bio::PopGen::PopulationI object
238
Note : Many implementation will not implement this
240
See L<Bio::PopGen::PopulationI>
244
sub next_population {
247
while( my $ind = $self->next_individual ) {
250
Bio::PopGen::Population->new(-individuals => \@inds);
253
=head2 write_individual
255
Title : write_individual
256
Usage : $popgenio->write_individual($ind);
257
Function: Write an individual out in the file format
258
NOT SUPPORTED BY hapmap format
260
Args : Bio::PopGen::PopulationI object(s)
262
See L<Bio::PopGen::PopulationI>
266
sub write_individual {
267
my ($self,@inds) = @_;
269
# data from hapmap is output, not input, so
270
# we don't need a method for writing and input file
272
$self->throw_not_implemented();
275
=head2 write_population
277
Title : write_population
278
Usage : $popgenio->write_population($pop);
279
Function: Write a population out in the file format
280
NOT SUPPORTED BY hapmap format
282
Args : Bio::PopGen::PopulationI object(s)
283
Note : Many implementation will not implement this
285
See L<Bio::PopGen::PopulationI>
289
sub write_population {
290
my ($self,@inds) = @_;
291
$self->throw_not_implemented();
295
=head2 starting_column
297
Title : starting_column
298
Usage : $obj->starting_column($newval)
299
Function: Column where data starts
301
Returns : value of starting_column (a scalar)
302
Args : on set, new value (a scalar or undef, optional)
309
return $self->{'starting_column'} = shift if @_;
310
return $self->{'starting_column'};