~ubuntu-branches/ubuntu/saucy/bioperl/saucy-proposed

« back to all changes in this revision

Viewing changes to Bio/Assembly/IO/tigr.pm

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2009-03-10 07:19:11 UTC
  • mfrom: (1.2.3 upstream)
  • Revision ID: james.westby@ubuntu.com-20090310071911-fukqzw54pyb1f0bd
Tags: 1.6.0-2
* Removed patch system (not used):
  - removed instuctions in debian/rules;
  - removed quilt from Build-Depends in debian/control.
* Re-enabled tests:
  - uncommented test command in debian/rules;
  - uncommented previously missing build-dependencies in debian/control.
  - Re-enabled tests and uncommented build-dependencies accordingly.
* Removed libmodule-build-perl and libtest-harness-perl from
  Build-Depends-Indep (provided by perl-modules).
* Better cleaning of empty directories using find -type d -empty -delete
  instead of rmdir in debian/rules (LP: #324001).

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# $Id: tigr.pm 14941 2008-10-21 01:44:12Z fangly $
 
2
#
 
3
# BioPerl module for Bio::Assembly::IO::tigr
 
4
#
 
5
# Copyright by Florent Angly
 
6
#
 
7
# You may distribute this module under the same terms as Perl itself
 
8
#
 
9
# POD documentation - main docs before the code
 
10
 
 
11
=head1 NAME
 
12
 
 
13
Bio::Assembly::IO::tigr - Driver to read and write assembly files in the TIGR
 
14
Assembler v2 default format.
 
15
 
 
16
=head1 SYNOPSIS
 
17
 
 
18
    # Building an input stream
 
19
    use Bio::Assembly::IO;
 
20
 
 
21
    # Assembly loading methods
 
22
    my $asmio = Bio::Assembly::IO->new( -file   => 'SGC0-424.tasm',
 
23
                                        -format => 'tigr' );
 
24
    my $scaffold = $asmio->next_assembly;
 
25
 
 
26
    # Do some things on contigs...
 
27
 
 
28
    # Assembly writing methods
 
29
    my $outasm = Bio::Assembly::IO->new( -file   => ">SGC0-modified.tasm",
 
30
                                         -format => 'tigr' );
 
31
    $outasm->write_assembly( -scaffold => $assembly,
 
32
                             -singlets => 1 );
 
33
 
 
34
=head1 DESCRIPTION
 
35
 
 
36
This package loads and writes assembly information in/from files in the default
 
37
TIGR Assembler v2 format. The files are lassie-formatted and often have the
 
38
.tasm extension. This module was written to be used as a driver module for
 
39
Bio::Assembly::IO input/output.
 
40
 
 
41
=head2 Implementation
 
42
 
 
43
Assemblies are loaded into Bio::Assembly::Scaffold objects composed of
 
44
Bio::Assembly::Contig and Bio::Assembly::Singlet objects. Since aligned reads
 
45
and contig gapped consensus can be obtained in the tasm files, only
 
46
aligned/gapped sequences are added to the different BioPerl objects.
 
47
 
 
48
Additional assembly information is stored as features. Contig objects have
 
49
SeqFeature information associated with the primary_tag:
 
50
 
 
51
    _main_contig_feature:$contig_id -> misc contig information
 
52
    _quality_clipping:$read_id      -> quality clipping position
 
53
 
 
54
Read objects have sub_seqFeature information associated with the
 
55
primary_tag:
 
56
 
 
57
    _main_read_feature:$read_id     -> misc read information
 
58
 
 
59
Singlets are considered by TIGR Assembler as contigs of one sequence and are
 
60
represented here with features having these primary_tag: 
 
61
 
 
62
    _main_contig_feature:$contig_id
 
63
    _quality_clipping:$read_primary_id
 
64
    _main_read_feature:$read_primary_id
 
65
    _aligned_coord:$read_primary_id
 
66
 
 
67
=head1 THE TIGR TASM LASSIEFORMAT
 
68
 
 
69
=head2 Description
 
70
 
 
71
In the TIGR tasm lassie format, contigs are separated by a line containing a single
 
72
pipe character "|", whereas the reads in a contig are separated by a blank line.
 
73
Singlets can be present in the file and are represented as a contig
 
74
composed of a single sequence.
 
75
 
 
76
Other than the two above-mentioned separators, each line has an attribute name,
 
77
followed a tab and then an attribute value.
 
78
 
 
79
The tasm format is used by more TIGR applications than just TIGR Assembler.
 
80
Some of the attributes are not used by TIGR Assembler or have constant values.
 
81
They are indicated by an asterisk *
 
82
 
 
83
Contigs have the following attributes:
 
84
 
 
85
    asmbl_id   -> contig ID
 
86
    sequence   -> contig ungapped consensus sequence (ambiguities are lowercase)
 
87
    lsequence  -> gapped consensus sequence (lowercase ambiguities)
 
88
    quality    -> gapped consensus quality score (in hexadecimal)
 
89
    seq_id     -> *
 
90
    com_name   -> *
 
91
    type       -> *
 
92
    method     -> always 'asmg' *
 
93
    ed_status  -> *
 
94
    redundancy -> fold coverage of the contig consensus
 
95
    perc_N     -> percent of ambiguities in the contig consensus
 
96
    seq#       -> number of sequences in the contig
 
97
    full_cds   -> *
 
98
    cds_start  -> start of coding sequence *
 
99
    cds_end    -> end of coding sequence *
 
100
    ed_pn      -> name of editor (always 'GRA') *
 
101
    ed_date    -> date and time of edition
 
102
    comment    -> some comments *
 
103
    frameshift -> *
 
104
 
 
105
Each read has the following attributes:
 
106
 
 
107
    seq_name  -> read name
 
108
    asm_lend  -> position of first base on contig ungapped consensus sequence
 
109
    asm_rend  -> position of last base on contig ungapped consensus sequence
 
110
    seq_lend  -> start of quality-trimmed sequence (aligned read coordinates)
 
111
    seq_rend  -> end of quality-trimmed sequence (aligned read coordinates)
 
112
    best      -> always '0' *
 
113
    comment   -> some comments *
 
114
    db        -> database name associated with the sequence (e.g. >my_db|seq1234)
 
115
    offset    -> offset of the sequence (gapped consensus coordinates)
 
116
    lsequence -> aligned read sequence (ambiguities are uppercase)
 
117
 
 
118
When asm_rend E<lt> asm_lend, the sequence was on the complementary DNA strand but
 
119
its reverse complement is shown in the aligned sequence of the assembly file,
 
120
not the original read.
 
121
 
 
122
Ambiguities are reflected in the contig consensus sequence as
 
123
lowercase IUPAC characters: a c g t u m r w s y k x n . In the read
 
124
sequences, however, ambiguities are uppercase: M R W S Y K X N
 
125
 
 
126
=head2 Example
 
127
 
 
128
Example of a contig containing three sequences:
 
129
 
 
130
    sequence    CGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCGCAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT
 
131
    lsequence   CGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAsCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCyGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAaGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT
 
132
    quality     0x0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0505050505050505050E0505160505050505050505050505050505050505050505050505050505050303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303030303090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0404040404040404041604040404040404040404040404040404040404040404040404040404040404040404040404040404040E0404040404040404040B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090909090B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B0B
 
133
    asmbl_id    93
 
134
    seq_id      
 
135
    com_name    
 
136
    type        
 
137
    method      asmg
 
138
    ed_status   
 
139
    redundancy  1.11
 
140
    perc_N      0.20
 
141
    seq#        3
 
142
    full_cds    
 
143
    cds_start   
 
144
    cds_end     
 
145
    ed_pn       GRA
 
146
    ed_date     08/16/07 17:10:12
 
147
    comment     
 
148
    frameshift  
 
149
 
 
150
    seq_name    SDSU_RFPERU_010_C09.x01.phd.1
 
151
    asm_lend    1
 
152
    asm_rend    4423
 
153
    seq_lend    1
 
154
    seq_rend    442
 
155
    best        0
 
156
    comment     
 
157
    db  
 
158
    offset      0
 
159
    lsequence   CGATGCTGTACGGCTGTTGCGACAGATTGCGCTGGGTCGATACCGCGTTGGTGATCGGCTTGTTCAGCGGGCTCTGGTTCGGCGACAGCGCGGCGATCTTGGCGGCTGCGAAGGTTGCCGGCGCAATCATGCGCTGCTGACCGTTGACCTGGTCCTGCCAGTACACCCAGTCGCCCACCATGACCTTCAGCGCGTAGCTGTCACAGCCGGCTGTGGTCAGCGCAGTGGCGACGGTGGTGTAGGAGGCGCCAGCAACACCTTGGGTGATCATGTAGCAGCCTTCTGACAGGCCGTAGGTCAGCATGGTCGGCCACTGGGTACCAGTCAGTCGGGTCAACCGAGATTCG-CAGCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGG
 
160
 
 
161
    seq_name    SDSU_RFPERU_002_H12.x01.phd.1
 
162
    asm_lend    339
 
163
    asm_rend    940
 
164
    seq_lend    1
 
165
    seq_rend    602
 
166
    best        0
 
167
    comment     
 
168
    db  
 
169
    offset      338
 
170
    lsequence   CGAGATTCGCCACCTGAGCGCCACTGCCGCGCAGAGCGTACATGCCCTTGCGGGTCGCGCCGGTAACACCATCCACGCCGATCAGAACTGCGTCGGTGATGGTGGTGTTACCCGAGGTGCCAGTGGTGAAGGCGACGGTCTGGGTGCTGGCCACAGGCGCCAGAGTGGTCGCGCCAACGGTGGCGATGACCAGTTGCGATGGGCCACGGATACCTGACTGCCCGTTGTTCACGGCGCTGACGATGTTCTGCCACAGCGCCAGGCCAGAGCCGGTGATGTTGTCGAACACTTCGGGCGCAACGCCAGGGAGCGAGACGGTCAGCTTCCAGCTCGAAGCAGCGGAGCCAGTAGCCAGGGCGGCGCTGAGCGAGTTGCCGAGCGTGCCGGTGTAGAACGCGGTCAGCGTGGCGCCGGTGGCGGCGGCAGTGTCCTTCAGCGCACTGGTCGCGGCGGTGTCGGTGCCGTCAGTGACGCGCACGGCGCGGATGTTCGAGGCGCCGCCCTGGATTGATACCGCCAGCGCGGTGCACAGGTCGTACTTGCGCACGGTCCGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATA-GCGTGGCGC
 
171
 
 
172
    seq_name    SDSU_RFPERU_009_E07.x01.phd.1
 
173
    asm_lend    880
 
174
    asm_rend    1520
 
175
    seq_lend    641
 
176
    seq_rend    1
 
177
    best        0
 
178
    comment     
 
179
    db  
 
180
    offset      8803
 
181
    lsequence   CGCACGGTCTGAGTGCCGAACTTCTGCGATGCGTCACCTGGCGAGCCGATAAGCGTGGCGCTGTTCACCGGCCCCCAGTCAGCAATGCCGACGATGCCGAGAATGTCAGTCGGGACGCCATTGATGTAGCGGGTCTTGGGCGCCACTATTTGTATGTACAAATCTGGCGCAGATAAAGCCGCCGTATTCAAATAACCAGCAGGATAGATAGGCATCACGCCTCCAGAATGAAAAAGGCCACCGATTAGGTGGCCTTTGTTGTGTTCGGCTGGCTGTTAGAGCAGCAGCCCGTTTTCCCGCGCAAACGCGAATGGGTCCTTGTCATGCTTCCTGCAATTGCAGGTAGGACAAAGAATTTGCAGGTTGGATTTGTCGTTCGATCCGCCCTTTGCAAGCGGGAACACGTGGTCAACGTGATACCCATCCCTTATGGATATAGTGCACATGGCGCATTTCCAGCGCTGAGCAGCCAGCAAAAATTTTATGTCGTCGCCGGTGTGTGAGCCGACAGCATTTTTCTTGCGAGCCTTGTATGTCCGCGAGAGTGAACGAACTTGCTCCTTGTTGGCTGTCTTCCAGAGCTTTTGAGTAAGCGCACAGAGATCCTTGTTTCTTGATCTCCACTCTCTGGTTGCGGAAAT
 
182
    |
 
183
 
 
184
...
 
185
 
 
186
=head1 FEEDBACK
 
187
 
 
188
=head2 Mailing Lists
 
189
 
 
190
User feedback is an integral part of the evolution of this and other
 
191
BioPerl modules. Send your comments and suggestions preferably to the
 
192
BioPerl mailing lists. Your participation is much appreciated.
 
193
 
 
194
  bioperl-l@bioperl.org                 - General discussion
 
195
  http://bio.perl.org/MailList.html     - About the mailing lists
 
196
 
 
197
=head2 Reporting Bugs
 
198
 
 
199
Report bugs to the BioPerl bug tracking system to help us keep track
 
200
the bugs and their resolution. Bug reports can be submitted via email
 
201
or the web:
 
202
 
 
203
  bioperl-bugs@bio.perl.org
 
204
  http://bugzilla.bioperl.org/
 
205
 
 
206
=head1 AUTHOR - Florent E Angly
 
207
 
 
208
Email florent dot angly at gmail dot com
 
209
 
 
210
=head1 APPENDIX
 
211
 
 
212
The rest of the documentation details each of the object
 
213
methods. Internal methods are usually preceded with a "_".
 
214
 
 
215
=cut
 
216
 
 
217
package Bio::Assembly::IO::tigr;
 
218
 
 
219
use strict;
 
220
use Bio::Seq::Quality;
 
221
use Bio::LocatableSeq;
 
222
use Bio::Assembly::IO;
 
223
use Bio::Assembly::Scaffold;
 
224
use Bio::Assembly::Contig;
 
225
use Bio::Assembly::Singlet;
 
226
 
 
227
use base qw(Bio::Assembly::IO);
 
228
 
 
229
my $progname = 'TIGR Assembler';
 
230
 
 
231
=head2 next_assembly
 
232
 
 
233
 Title   : next_assembly
 
234
 Usage   : my $scaffold = $asmio->next_assembly()
 
235
 Function: return the next assembly in the tasm-formatted stream
 
236
 Returns : Bio::Assembly::Scaffold object
 
237
 Args    : none
 
238
 
 
239
=cut
 
240
 
 
241
sub next_assembly {
 
242
    my $self = shift; # object reference
 
243
    
 
244
    # Create a new scaffold to hold the contigs
 
245
    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);
 
246
    
 
247
    # Contig and read related
 
248
    my $contigobj;
 
249
    my $iscontig = 1;
 
250
    my %contiginfo;
 
251
    my $isread = 0;
 
252
    my %readinfo;
 
253
    
 
254
    # Loop over all assembly file lines
 
255
    while ($_ = $self->_readline) {
 
256
        chomp;
 
257
        if ( /^\|/ ) {  # a line with a single pipe |
 
258
            # The end of a read from a contig, the start of a new contig
 
259
            $iscontig = 1;
 
260
            $isread   = 0;
 
261
            # Store read info
 
262
            if ($contiginfo{'seqnum'} > 1) {
 
263
                # This is a read in a contig
 
264
                my $readobj = $self->_store_read(\%readinfo, $contigobj);
 
265
            } elsif ($contiginfo{'seqnum'} == 1) {
 
266
                # This is a singlet
 
267
                my $singletobj = $self->_store_singlet(\%readinfo, \%contiginfo,
 
268
                    $scaffoldobj);
 
269
            } else {
 
270
                # That should not happen
 
271
                $self->throw("Unhandled exception");
 
272
            }
 
273
            # Clear read info
 
274
            undef %readinfo;
 
275
            # Clear contig info
 
276
            undef $contigobj;
 
277
            undef %contiginfo;
 
278
        } elsif ( /^$/ ) {  # a blank line
 
279
            if ($iscontig) {
 
280
                # The end of a contig, the start of a read in that contig
 
281
                $iscontig = 0;
 
282
                $isread   = 1;
 
283
                # Store contig info
 
284
                $contigobj = $self->_store_contig( \%contiginfo, $contigobj,
 
285
                    $scaffoldobj ) if $contiginfo{'seqnum'} > 1;
 
286
            } elsif ($isread) {
 
287
                # The end of read in a contig, the start of a new one in
 
288
                # the same contig
 
289
                $iscontig = 0;
 
290
                $isread   = 1;
 
291
                # Store read info
 
292
                if ($contiginfo{'seqnum'} > 1) {
 
293
                    # This is a read in a contig
 
294
                    my $readobj = $self->_store_read(\%readinfo, $contigobj);
 
295
                } elsif ($contiginfo{'seqnum'} == 1) {
 
296
                    # This is a singlet
 
297
                    my $singletobj = $self->_store_singlet(\%readinfo,
 
298
                        \%contiginfo, $scaffoldobj);
 
299
                } else {
 
300
                  # That should not happen
 
301
                  $self->throw("Unhandled exception");
 
302
                }
 
303
                # Clear read info
 
304
                undef %readinfo;
 
305
            } else {
 
306
                # That should not happen
 
307
                $self->throw("Unhandled exception");
 
308
            }
 
309
        } else {
 
310
            if ($iscontig) {
 
311
                # Parse contig
 
312
                if    (/^sequence\t(.*)/)     {$contiginfo{'sequence'}   = $1; next}
 
313
                elsif (/^lsequence\t(.*)/)    {$contiginfo{'lsequence'}  = $1; next}
 
314
                elsif (/^quality\t(.*)/)      {$contiginfo{'quality'}    = $1; next}
 
315
                elsif (/^asmbl_id\t(.*)/)     {$contiginfo{'asmbl_id'}   = $1; next}
 
316
                elsif (/^seq_id\t(.*)/)       {$contiginfo{'seq_id'}     = $1; next}
 
317
                elsif (/^com_name\t(.*)/)     {$contiginfo{'com_name'}   = $1; next}
 
318
                elsif (/^type\t(.*)/)         {$contiginfo{'type'}       = $1; next}
 
319
                elsif (/^method\t(.*)/)       {$contiginfo{'method'}     = $1; next}
 
320
                elsif (/^ed_status\t(.*)/)    {$contiginfo{'ed_status'}  = $1; next}
 
321
                elsif (/^redundancy\t(.*)/)   {$contiginfo{'redundancy'} = $1; next}
 
322
                elsif (/^perc_N\t(.*)/)       {$contiginfo{'perc_N'}     = $1; next}
 
323
                elsif (/^seq\#\t(.*)/)        {$contiginfo{'seqnum'}     = $1; next}
 
324
                elsif (/^full_cds\t(.*)/)     {$contiginfo{'full_cds'}   = $1; next}
 
325
                elsif (/^cds_start\t(.*)/)    {$contiginfo{'cds_start'}  = $1; next}
 
326
                elsif (/^cds_end\t(.*)/)      {$contiginfo{'cds_end'}    = $1; next}
 
327
                elsif (/^ed_pn\t(.*)/)        {$contiginfo{'ed_pn'}      = $1; next}
 
328
                elsif (/^ed_date\t(.*\s.*)/)  {$contiginfo{'ed_date'}    = $1; next}
 
329
                elsif (/^comment\t(.*)/)      {$contiginfo{'comment'}    = $1; next}
 
330
                elsif (/^frameshift\t(.*)/)   {$contiginfo{'frameshift'} = $1; next}
 
331
                else {
 
332
                    $self->throw("Format unknown at line $.:\n$_\nIs your file".
 
333
                        " really a TIGR Assembler tasm-formatted file?");
 
334
                }
 
335
            } elsif ($isread) {
 
336
                # Parse read info
 
337
                if    (/^seq_name\t(.*)/)  {$readinfo{'seq_name'}  = $1; next}
 
338
                elsif (/^asm_lend\t(.*)/)  {$readinfo{'asm_lend'}  = $1; next}
 
339
                elsif (/^asm_rend\t(.*)/)  {$readinfo{'asm_rend'}  = $1; next}
 
340
                elsif (/^seq_lend\t(.*)/)  {$readinfo{'seq_lend'}  = $1; next}
 
341
                elsif (/^seq_rend\t(.*)/)  {$readinfo{'seq_rend'}  = $1; next}
 
342
                elsif (/^best\t(.*)/)      {$readinfo{'best'}      = $1; next}
 
343
                elsif (/^comment\t(.*)/)   {$readinfo{'comment'}   = $1; next}
 
344
                elsif (/^db\t(.*)/)        {$readinfo{'db'}        = $1; next}
 
345
                elsif (/^offset\t(.*)/)    {$readinfo{'offset'}    = $1; next}
 
346
                elsif (/^lsequence\t(.*)/) {$readinfo{'lsequence'} = $1; next}
 
347
                else {
 
348
                    $self->throw("Format unknown at line $.:\n$_\nIs your file".
 
349
                        " really a TIGR Assembler tasm-formatted file?");
 
350
                }
 
351
            } else {
 
352
                # That shouldn't happen
 
353
                $self->throw("Unhandled exception");                
 
354
            }
 
355
        }
 
356
    }
 
357
    # Store read info for last read
 
358
    if (defined $contiginfo{'seqnum'}) {
 
359
        if ($contiginfo{'seqnum'} > 1) {
 
360
            # This is a read in a contig
 
361
            my $readobj = $self->_store_read(\%readinfo, $contigobj);
 
362
        } elsif ($contiginfo{'seqnum'} == 1) {
 
363
            # This is a singlet
 
364
            my $singletobj = $self->_store_singlet(\%readinfo, \%contiginfo,
 
365
                $scaffoldobj);
 
366
        } else {
 
367
            # That should not happen
 
368
            $self->throw("Unhandled exception");
 
369
        }
 
370
    }
 
371
    # Clear read info for last read
 
372
    undef %readinfo;
 
373
    # Clear contig info for last contig
 
374
    undef $contigobj;
 
375
    undef %contiginfo;
 
376
    
 
377
    $scaffoldobj->update_seq_list();
 
378
    
 
379
    return $scaffoldobj;
 
380
}
 
381
 
 
382
=head2 _qual_hex2dec
 
383
 
 
384
    Title   : _qual_hex2dec
 
385
    Usage   : my dec_quality = $self->_qual_hex2dec($hex_quality);
 
386
    Function: convert an hexadecimal quality score into a decimal quality score 
 
387
    Returns : string
 
388
    Args    : string
 
389
 
 
390
=cut
 
391
 
 
392
sub _qual_hex2dec {
 
393
    my ($self, $qual) = @_;
 
394
    $qual =~ s/^0x(.*)$/$1/;
 
395
    $qual =~ s/(..)/hex($1).' '/eg;
 
396
    return $qual;
 
397
}
 
398
 
 
399
=head2 _qual_dec2hex
 
400
 
 
401
    Title   : _qual_dec2hex
 
402
    Usage   : my hex_quality = $self->_qual_dec2hex($dec_quality);
 
403
    Function: convert a decimal quality score into an hexadecimal quality score 
 
404
    Returns : string
 
405
    Args    : string
 
406
 
 
407
=cut
 
408
 
 
409
sub _qual_dec2hex {
 
410
    my ($self, $qual) = @_;
 
411
    $qual =~ s/(\d+)\s*/sprintf('%02X', $1)/eg;
 
412
    $qual = '0x'.$qual;
 
413
    return $qual;
 
414
}
 
415
 
 
416
=head2 _store_contig
 
417
 
 
418
    Title   : _store_contig
 
419
    Usage   : my $contigobj; $contigobj = $self->_store_contig(
 
420
              \%contiginfo, $contigobj, $scaffoldobj);
 
421
    Function: store information of a contig belonging to a scaffold in the
 
422
              appropriate object
 
423
    Returns : Bio::Assembly::Contig object
 
424
    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold
 
425
 
 
426
=cut
 
427
 
 
428
sub _store_contig {
 
429
    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;
 
430
 
 
431
    # Create a contig and attach it to scaffold
 
432
    $contigobj = Bio::Assembly::Contig->new(
 
433
        -id     => $$contiginfo{'asmbl_id'},
 
434
        -source => $progname,
 
435
        -strand => 1
 
436
    );
 
437
    $scaffoldobj->add_contig($contigobj);
 
438
 
 
439
    # Create a gapped consensus sequence and attach it to contig
 
440
    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});
 
441
    my $consensus = Bio::LocatableSeq->new(
 
442
        -id    => $$contiginfo{'asmbl_id'},
 
443
        -seq   => $$contiginfo{'lsequence'},
 
444
        -start => 1,
 
445
    );
 
446
    $contigobj->set_consensus_sequence($consensus);
 
447
 
 
448
    # Create an gapped consensus quality score and attach it to contig
 
449
    $$contiginfo{'quality'} = $self->_qual_hex2dec($$contiginfo{'quality'});
 
450
    my $qual = Bio::Seq::Quality->new(
 
451
        -id   => $$contiginfo{'asmbl_id'},
 
452
        -qual => $$contiginfo{'quality'}
 
453
    );
 
454
    $contigobj->set_consensus_quality($qual);
 
455
 
 
456
    # Add other misc contig information as features of the contig
 
457
    my $contigtags = Bio::SeqFeature::Generic->new(
 
458
        -primary_tag => "_main_contig_feature:$$contiginfo{'asmbl_id'}",
 
459
        -start       => 1,
 
460
        -end         => $contigobj->get_consensus_length(),
 
461
        -strand      => 1,
 
462
        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},
 
463
                          'com_name'   => $$contiginfo{'com_name'},
 
464
                          'type'       => $$contiginfo{'type'},
 
465
                          'method'     => $$contiginfo{'method'},
 
466
                          'ed_status'  => $$contiginfo{'ed_status'},
 
467
                          'full_cds'   => $$contiginfo{'full_cds'},
 
468
                          'cds_start'  => $$contiginfo{'cds_start'},
 
469
                          'cds_end'    => $$contiginfo{'cds_end'},
 
470
                          'ed_pn'      => $$contiginfo{'ed_pn'},
 
471
                          'ed_date'    => $$contiginfo{'ed_date'},
 
472
                          'comment'    => $$contiginfo{'comment'},
 
473
                          'frameshift' => $$contiginfo{'frameshift'} }
 
474
    );
 
475
    $contigobj->add_features([ $contigtags ], 1);
 
476
 
 
477
    return $contigobj;
 
478
}
 
479
 
 
480
=head2 _store_read
 
481
 
 
482
    Title   : _store_read
 
483
    Usage   : my $readobj = $self->_store_read(\%readinfo, $contigobj);
 
484
    Function: store information of a read belonging to a contig in the appropriate object
 
485
    Returns : Bio::LocatableSeq
 
486
    Args    : hash, Bio::Assembly::Contig
 
487
 
 
488
=cut
 
489
 
 
490
sub _store_read {
 
491
   my ($self, $readinfo, $contigobj) = @_;
 
492
 
 
493
   # Create an aligned read object
 
494
   #$$readinfo{'llength'} = length($$readinfo{'lsequence'});
 
495
   $$readinfo{'strand'}  = ($$readinfo{'seq_rend'} > $$readinfo{'seq_lend'} ? 1 : -1);
 
496
   my $readobj = Bio::LocatableSeq->new(
 
497
       # the ids of sequence objects are supposed to include the db name in it, i.e. "big_db|seq1234"
 
498
       # that's how sequence ids coming from the fasta parser are at least
 
499
       -display_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),
 
500
       -primary_id => $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'}),
 
501
       -seq        => $$readinfo{'lsequence'},      
 
502
       -start      => 1,
 
503
       -strand     => $$readinfo{'strand'},
 
504
       -alphabet   => 'dna'
 
505
   );
 
506
 
 
507
   # Add read location and sequence to contig (in 'gapped consensus' coordinates)
 
508
   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates
 
509
   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq
 
510
   my $alncoord = Bio::SeqFeature::Generic->new(
 
511
       -primary_tag => $readobj->id,
 
512
       -start       => $$readinfo{'aln_start'},
 
513
       -end         => $$readinfo{'aln_end'},
 
514
       -strand      => $$readinfo{'strand'},
 
515
       -tag         => { 'contig' => $contigobj->id() }
 
516
   );
 
517
   $contigobj->set_seq_coord($alncoord, $readobj);
 
518
 
 
519
   # Add quality clipping read information in contig features
 
520
   # (from 'aligned read' to 'gapped consensus' coordinates)
 
521
   $$readinfo{'clip_start'} = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_lend'});
 
522
   $$readinfo{'clip_end'}   = $contigobj->change_coord('aligned '.$readobj->id, 'gapped consensus', $$readinfo{'seq_rend'});
 
523
   my $clipcoord = Bio::SeqFeature::Generic->new(
 
524
       -primary_tag => '_quality_clipping:'.$readobj->id,
 
525
       -start       => $$readinfo{'clip_start'},
 
526
       -end         => $$readinfo{'clip_end'},
 
527
       -strand      => $$readinfo{'strand'}
 
528
   );
 
529
   $clipcoord->attach_seq($readobj);
 
530
   $contigobj->add_features([ $clipcoord ], 0);
 
531
   
 
532
   # Add other misc read information as subsequence feature
 
533
   my $readtags = Bio::SeqFeature::Generic->new(
 
534
       -primary_tag => '_main_read_feature:'.$readobj->id,
 
535
       -start       => $$readinfo{'aln_start'},
 
536
       -end         => $$readinfo{'aln_end'},
 
537
       -strand      => $$readinfo{'strand'},
 
538
       -tag         => { 'best'    => $$readinfo{'best'},
 
539
                         'comment' => $$readinfo{'comment'} }
 
540
   );
 
541
   $alncoord->add_sub_SeqFeature($readtags);
 
542
 
 
543
   return $readobj;
 
544
}
 
545
 
 
546
=head2 _store_singlet
 
547
 
 
548
    Title   : _store_singlet
 
549
    Usage   : my $singletobj = $self->_store_read(\%readinfo, \%contiginfo,
 
550
                  $scaffoldobj);
 
551
    Function: store information of a singlet belonging to a scaffold in the appropriate object
 
552
    Returns : Bio::Assembly::Singlet
 
553
    Args    : hash, hash, Bio::Assembly::Scaffold
 
554
 
 
555
=cut
 
556
 
 
557
sub _store_singlet {
 
558
    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;
 
559
    # Singlets in TIGR_Assembler are represented as a contig of one sequence
 
560
    # We try to simulate this duality by playing around with the Singlet object
 
561
    
 
562
    my $contigid = $$contiginfo{'asmbl_id'};
 
563
    my $readid   = $self->_merge_seq_name_and_db($$readinfo{'seq_name'}, $$readinfo{'db'});
 
564
    
 
565
    # Create a sequence object
 
566
    #$$contiginfo{'llength'} = length($$contiginfo{'lsequence'});
 
567
    my $seqobj = Bio::Seq::Quality->new(
 
568
       -primary_id => $contigid, # unique id in assembly (contig name)
 
569
       -display_id => $readid,
 
570
       -seq        => $$contiginfo{'lsequence'}, # do not use $$readinfo as ambiguities are uppercase
 
571
       -start      => 1,
 
572
       -strand     => $$readinfo{'strand'},
 
573
       -alphabet   => 'dna',
 
574
       -qual => $self->_qual_hex2dec($$contiginfo{'quality'})    
 
575
   );
 
576
 
 
577
   # Create singlet from sequence and add it to scaffold
 
578
   my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj );
 
579
   $scaffoldobj->add_singlet($singletobj);
 
580
 
 
581
   # Add other misc contig information as features of the singlet
 
582
   my $contigtags = Bio::SeqFeature::Generic->new(
 
583
        -primary_tag => "_main_contig_feature:$contigid",
 
584
        -start       => 1,
 
585
        -end         => $singletobj->get_consensus_length(),
 
586
        -strand      => 1,
 
587
        -tag         => { 'seq_id'     => $$contiginfo{'seq_id'},
 
588
                          'com_name'   => $$contiginfo{'com_name'},
 
589
                          'type'       => $$contiginfo{'type'},
 
590
                          'method'     => $$contiginfo{'method'},
 
591
                          'ed_status'  => $$contiginfo{'ed_status'},
 
592
                          'full_cds'   => $$contiginfo{'full_cds'},
 
593
                          'cds_start'  => $$contiginfo{'cds_start'},
 
594
                          'cds_end'    => $$contiginfo{'cds_end'},
 
595
                          'ed_pn'      => $$contiginfo{'ed_pn'},
 
596
                          'ed_date'    => $$contiginfo{'ed_date'},
 
597
                          'comment'    => $$contiginfo{'comment'},
 
598
                          'frameshift' => $$contiginfo{'frameshift'} }
 
599
   );
 
600
   $singletobj->add_features([ $contigtags ], 1);
 
601
 
 
602
   # Add read location and sequence to singlet features (in 'gapped consensus' coordinates)
 
603
   $$readinfo{'aln_start'} = $$readinfo{'offset'} + 1; # seq offset is in gapped coordinates
 
604
   $$readinfo{'aln_end'} = $$readinfo{'aln_start'} + length($$readinfo{'lsequence'}) - 1; # lsequence is aligned seq
 
605
 
 
606
   my $alncoord = Bio::SeqFeature::Generic->new(
 
607
       -primary_tag => "_aligned_coord:$readid",
 
608
       -start       => $$readinfo{'aln_start'},
 
609
       -end         => $$readinfo{'aln_end'},
 
610
       -strand      => $$readinfo{'strand'},
 
611
       -tag         => { 'contig' => $contigid }
 
612
   );
 
613
   $alncoord->attach_seq($singletobj->seqref);
 
614
   $singletobj->add_features([ $alncoord ], 0);
 
615
 
 
616
   # Add quality clipping read information in singlet features
 
617
   # (from 'aligned read' to 'gapped consensus' coordinates)
 
618
   $$readinfo{'clip_start'} = $$readinfo{'seq_lend'};
 
619
   $$readinfo{'clip_end'}   = $$readinfo{'seq_rend'};
 
620
   my $clipcoord = Bio::SeqFeature::Generic->new(
 
621
       -primary_tag => "_quality_clipping:$readid",
 
622
       -start       => $$readinfo{'clip_start'},
 
623
       -end         => $$readinfo{'clip_end'},
 
624
       -strand      => $$readinfo{'strand'},
 
625
       -tag         => { 'contig' => $contigid }
 
626
   );
 
627
   $clipcoord->attach_seq($singletobj->seqref);
 
628
   $singletobj->add_features([ $clipcoord ], 0);
 
629
   
 
630
   # Add other misc read information as subsequence feature
 
631
   my $readtags = Bio::SeqFeature::Generic->new(
 
632
       -primary_tag => "_main_read_feature:$readid",
 
633
       -start       => $$readinfo{'aln_start'},
 
634
       -end         => $$readinfo{'aln_end'},
 
635
       -strand      => $$readinfo{'strand'},
 
636
       -tag         => { 'best'    => $$readinfo{'best'},
 
637
                         'comment' => $$readinfo{'comment'} }
 
638
   );
 
639
   $alncoord->add_sub_SeqFeature($readtags);
 
640
      
 
641
   return $singletobj;
 
642
}
 
643
 
 
644
=head2 write_assembly
 
645
 
 
646
    Title   : write_assembly
 
647
    Usage   : $ass_io->write_assembly($assembly)
 
648
    Function: Write the assembly object in TIGR Assembler compatible tasm lassie  
 
649
              format
 
650
    Returns : 1 on success, 0 for error
 
651
    Args    : A Bio::Assembly::Scaffold object
 
652
 
 
653
=cut
 
654
 
 
655
sub write_assembly {
 
656
    my ($self,@args) = @_;    
 
657
    my ($scaffoldobj, $singlets) = $self->_rearrange([qw(SCAFFOLD SINGLETS)], @args);
 
658
    
 
659
    # Sanity check
 
660
    if ( !$scaffoldobj || !$scaffoldobj->isa('Bio::Assembly::Scaffold') ) {
 
661
        $self->warn("Must provide a Bio::Align::AlignI object when calling
 
662
            write_assembly");
 
663
        next;
 
664
    }
 
665
 
 
666
    # Get list of objects - contigs and singlets
 
667
    my @cont_ids = $scaffoldobj->get_contig_ids;
 
668
    my @sing_ids = $scaffoldobj->get_singlet_ids;
 
669
    my %did;
 
670
    my $decimal_format = '%.2f';
 
671
    for (my $i = 0; $i < scalar @sing_ids ; $i++) {
 
672
      # singlet display id (string)
 
673
      my $display_id = $sing_ids[$i];
 
674
      # singlet primary id (unique, numerical)
 
675
      my $primary_id = $scaffoldobj->get_singlet_by_id($display_id)->seqref->primary_id;
 
676
      $sing_ids[$i] = $primary_id;
 
677
      $did{$primary_id} = $display_id;
 
678
    }
 
679
    my @ids = (@cont_ids, @sing_ids);
 
680
    @ids = sort { $a <=> $b } @ids; # list with contig ids and singlet primary id
 
681
    my $numobj = scalar @ids;
 
682
 
 
683
    # Output all contigs and singlets (sorted by increasing id number)
 
684
    for (my $i = 0 ; $i < $numobj ; $i++) {
 
685
        
 
686
        my $objid = $ids[$i];
 
687
        
 
688
        if (defined $did{$objid}) { 
 
689
            # This is a singlet
 
690
            next unless ($singlets);
 
691
 
 
692
            my $contigid = $objid;
 
693
            my $readid   = $did{$objid};            
 
694
            my $singletobj = $scaffoldobj->get_singlet_by_id($readid);
 
695
            
 
696
            # Get contig information
 
697
            my $contanno = (grep
 
698
                { $_->primary_tag eq "_main_contig_feature:$contigid" }
 
699
                $singletobj->get_features_collection->get_all_features
 
700
            )[0];
 
701
            my %contiginfo;
 
702
            $contiginfo{'sequence'}   = $singletobj->seqref->seq;
 
703
            $contiginfo{'lsequence'}  = $contiginfo{'sequence'};
 
704
            $contiginfo{'quality'}    = $self->_qual_dec2hex(
 
705
                join ' ', @{$singletobj->seqref->qual});
 
706
            $contiginfo{'asmbl_id'}   = $contigid;
 
707
            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];   
 
708
            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];
 
709
            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];
 
710
            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];
 
711
            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];
 
712
            $contiginfo{'redundancy'} = sprintf($decimal_format, 1);
 
713
            $contiginfo{'perc_N'}     = sprintf(
 
714
                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));
 
715
            $contiginfo{'seqnum'}     = 1;
 
716
            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];
 
717
            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];
 
718
            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];
 
719
            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];
 
720
            $contiginfo{'ed_date'}    = $self->_date_time;
 
721
            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];
 
722
            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];
 
723
 
 
724
            # Check that no tag value is undef
 
725
            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};
 
726
            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};
 
727
            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};
 
728
            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};
 
729
            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};
 
730
            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};
 
731
            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};
 
732
            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};
 
733
            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};
 
734
            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};
 
735
            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};
 
736
            
 
737
            # Print contig information
 
738
            $self->_print(
 
739
                "sequence\t$contiginfo{'sequence'}\n".
 
740
                "lsequence\t$contiginfo{'lsequence'}\n".
 
741
                "quality\t$contiginfo{'quality'}\n".
 
742
                "asmbl_id\t$contiginfo{'asmbl_id'}\n".
 
743
                "seq_id\t$contiginfo{'seq_id'}\n".
 
744
                "com_name\t$contiginfo{'com_name'}\n".
 
745
                "type\t$contiginfo{'type'}\n".
 
746
                "method\t$contiginfo{'method'}\n".
 
747
                "ed_status\t$contiginfo{'ed_status'}\n".
 
748
                "redundancy\t$contiginfo{'redundancy'}\n".
 
749
                "perc_N\t$contiginfo{'perc_N'}\n".
 
750
                "seq#\t$contiginfo{'seqnum'}\n".
 
751
                "full_cds\t$contiginfo{'full_cds'}\n".
 
752
                "cds_start\t$contiginfo{'cds_start'}\n".
 
753
                "cds_end\t$contiginfo{'cds_end'}\n".
 
754
                "ed_pn\t$contiginfo{'ed_pn'}\n".
 
755
                "ed_date\t$contiginfo{'ed_date'}\n".
 
756
                "comment\t$contiginfo{'comment'}\n".
 
757
                "frameshift\t$contiginfo{'frameshift'}\n".
 
758
                "\n"
 
759
            );
 
760
                        
 
761
            # Get read information
 
762
            my ($seq_name, $db) = $self->_split_seq_name_and_db($readid);
 
763
            my $clipcoord = (grep
 
764
                { $_->primary_tag eq "_quality_clipping:$readid"}
 
765
                $singletobj->get_features_collection->get_all_features
 
766
            )[0];
 
767
            my $alncoord  = (grep
 
768
                { $_->primary_tag eq "_aligned_coord:$readid"}
 
769
                $singletobj->get_features_collection->get_all_features
 
770
            )[0];
 
771
            my $readanno = (grep
 
772
                { $_->primary_tag eq "_main_read_feature:$readid" }
 
773
                $singletobj->get_seq_coord($singletobj->seqref)->get_SeqFeatures
 
774
            )[0];
 
775
            my %readinfo;
 
776
            $readinfo{'seq_name'}  = $seq_name;
 
777
            $readinfo{'asm_lend'}  = $alncoord->location->start;
 
778
            $readinfo{'asm_rend'}  = $alncoord->location->end;
 
779
            $readinfo{'seq_lend'}  = $clipcoord->location->start;
 
780
            $readinfo{'seq_rend'}  = $clipcoord->location->end;
 
781
            $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];
 
782
            $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];
 
783
            $readinfo{'db'}        = $db;         
 
784
            $readinfo{'offset'}    = 0;
 
785
            # ambiguities in read sequence are uppercase
 
786
            $readinfo{'lsequence'} = uc($contiginfo{'lsequence'});
 
787
            
 
788
            # Check that no tag value is undef
 
789
            $readinfo{'best'}    = '' unless defined $readinfo{'best'};
 
790
            $readinfo{'comment'} = '' unless defined $readinfo{'comment'};
 
791
 
 
792
            # Print read information
 
793
            $self->_print(
 
794
                "seq_name\t$readinfo{'seq_name'}\n".
 
795
                "asm_lend\t$readinfo{'asm_lend'}\n".
 
796
                "asm_rend\t$readinfo{'asm_rend'}\n".
 
797
                "seq_lend\t$readinfo{'seq_lend'}\n".
 
798
                "seq_rend\t$readinfo{'seq_rend'}\n".
 
799
                "best\t$readinfo{'best'}\n".
 
800
                "comment\t$readinfo{'comment'}\n".
 
801
                "db\t$readinfo{'db'}\n".
 
802
                "offset\t$readinfo{'offset'}\n".
 
803
                "lsequence\t$readinfo{'lsequence'}\n"
 
804
            );
 
805
            if ($i+1 < $numobj) {
 
806
                $self->_print("|\n");
 
807
            }
 
808
        } else {
 
809
            # This is a contig
 
810
            my $contigid = $objid;
 
811
            my $contigobj = $scaffoldobj->get_contig_by_id($contigid);
 
812
 
 
813
            # Skip contigs of 1 sequence (singlets) if needed
 
814
            next if ($contigobj->no_sequences == 1) && (!$singlets);
 
815
            
 
816
            # Get contig information
 
817
            my $contanno = (grep
 
818
                { $_->primary_tag eq "_main_contig_feature:$contigid" }
 
819
                $contigobj->get_features_collection->get_all_features
 
820
            )[0];
 
821
            my %contiginfo;
 
822
            $contiginfo{'sequence'}   = $self->_ungap(
 
823
                $contigobj->get_consensus_sequence->seq);
 
824
            $contiginfo{'lsequence'}  = $contigobj->get_consensus_sequence->seq;
 
825
            $contiginfo{'quality'}    = $self->_qual_dec2hex(
 
826
                join ' ', @{$contigobj->get_consensus_quality->qual});
 
827
            $contiginfo{'asmbl_id'}   = $contigid;
 
828
            $contiginfo{'seq_id'}     = ($contanno->get_tag_values('seq_id'))[0];
 
829
            $contiginfo{'com_name'}   = ($contanno->get_tag_values('com_name'))[0];
 
830
            $contiginfo{'type'}       = ($contanno->get_tag_values('type'))[0];
 
831
            $contiginfo{'method'}     = ($contanno->get_tag_values('method'))[0];
 
832
            $contiginfo{'ed_status'}  = ($contanno->get_tag_values('ed_status'))[0];
 
833
            $contiginfo{'redundancy'} = sprintf(
 
834
                $decimal_format, $self->_redundancy($contigobj));
 
835
            $contiginfo{'perc_N'}     = sprintf(
 
836
                $decimal_format, $self->_perc_N($contiginfo{'sequence'}));
 
837
            $contiginfo{'seqnum'}     = $contigobj->no_sequences;
 
838
            $contiginfo{'full_cds'}   = ($contanno->get_tag_values('full_cds'))[0];
 
839
            $contiginfo{'cds_start'}  = ($contanno->get_tag_values('cds_start'))[0];
 
840
            $contiginfo{'cds_end'}    = ($contanno->get_tag_values('cds_end'))[0];
 
841
            $contiginfo{'ed_pn'}      = ($contanno->get_tag_values('ed_pn'))[0];
 
842
            $contiginfo{'ed_date'}    = $self->_date_time;
 
843
            $contiginfo{'comment'}    = ($contanno->get_tag_values('comment'))[0];
 
844
            $contiginfo{'frameshift'} = ($contanno->get_tag_values('frameshift'))[0];
 
845
            
 
846
            # Check that no tag value is undef
 
847
            $contiginfo{'seq_id'}     = '' unless defined $contiginfo{'seq_id'};
 
848
            $contiginfo{'com_name'}   = '' unless defined $contiginfo{'com_name'};
 
849
            $contiginfo{'type'}       = '' unless defined $contiginfo{'type'};
 
850
            $contiginfo{'method'}     = '' unless defined $contiginfo{'method'};
 
851
            $contiginfo{'ed_status'}  = '' unless defined $contiginfo{'ed_status'};
 
852
            $contiginfo{'full_cds'}   = '' unless defined $contiginfo{'full_cds'};
 
853
            $contiginfo{'cds_start'}  = '' unless defined $contiginfo{'cds_start'};
 
854
            $contiginfo{'cds_end'}    = '' unless defined $contiginfo{'cds_end'};
 
855
            $contiginfo{'ed_pn'}      = '' unless defined $contiginfo{'ed_pn'};
 
856
            $contiginfo{'comment'}    = '' unless defined $contiginfo{'comment'};
 
857
            $contiginfo{'frameshift'} = '' unless defined $contiginfo{'frameshift'};
 
858
                       
 
859
            # Print contig information
 
860
            $self->_print(
 
861
                "sequence\t$contiginfo{'sequence'}\n".
 
862
                "lsequence\t$contiginfo{'lsequence'}\n".
 
863
                "quality\t$contiginfo{'quality'}\n".
 
864
                "asmbl_id\t$contiginfo{'asmbl_id'}\n".
 
865
                "seq_id\t$contiginfo{'seq_id'}\n".
 
866
                "com_name\t$contiginfo{'com_name'}\n".
 
867
                "type\t$contiginfo{'type'}\n".
 
868
                "method\t$contiginfo{'method'}\n".
 
869
                "ed_status\t$contiginfo{'ed_status'}\n".
 
870
                "redundancy\t$contiginfo{'redundancy'}\n".
 
871
                "perc_N\t$contiginfo{'perc_N'}\n".
 
872
                "seq#\t$contiginfo{'seqnum'}\n".
 
873
                "full_cds\t$contiginfo{'full_cds'}\n".
 
874
                "cds_start\t$contiginfo{'cds_start'}\n".
 
875
                "cds_end\t$contiginfo{'cds_end'}\n".
 
876
                "ed_pn\t$contiginfo{'ed_pn'}\n".
 
877
                "ed_date\t$contiginfo{'ed_date'}\n".
 
878
                "comment\t$contiginfo{'comment'}\n".
 
879
                "frameshift\t$contiginfo{'frameshift'}\n".
 
880
                "\n"
 
881
            );
 
882
            my $seqno = 0;
 
883
            for my $readobj ( $contigobj->each_seq() ) {
 
884
                $seqno++;
 
885
                
 
886
                # Get read information
 
887
                my ($seq_name, $db) = $self->_split_seq_name_and_db($readobj->id);
 
888
                my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset)
 
889
                    = $self->_coord($readobj, $contigobj);
 
890
                my $readanno = ( grep 
 
891
                    { $_->primary_tag eq '_main_read_feature:'.$readobj->primary_id }
 
892
                    $contigobj->get_seq_coord($readobj)->get_SeqFeatures
 
893
                )[0];
 
894
                my %readinfo;                
 
895
                $readinfo{'seq_name'}  = $seq_name;
 
896
                $readinfo{'asm_lend'}  = $asm_lend;
 
897
                $readinfo{'asm_rend'}  = $asm_rend;
 
898
                $readinfo{'seq_lend'}  = $seq_lend;
 
899
                $readinfo{'seq_rend'}  = $seq_rend;                
 
900
                $readinfo{'best'}      = ($readanno->get_tag_values('best'))[0];
 
901
                $readinfo{'comment'}   = ($readanno->get_tag_values('comment'))[0];
 
902
                $readinfo{'db'}        = $db;
 
903
                $readinfo{'offset'}    = $offset;   
 
904
                $readinfo{'lsequence'} = $readobj->seq(); 
 
905
                         
 
906
                # Check that no tag value is undef
 
907
                $readinfo{'best'}    = '' unless defined $readinfo{'best'};
 
908
                $readinfo{'comment'} = '' unless defined $readinfo{'comment'};
 
909
    
 
910
                # Print read information
 
911
                $self->_print(
 
912
                    "seq_name\t$readinfo{'seq_name'}\n".
 
913
                    "asm_lend\t$readinfo{'asm_lend'}\n".
 
914
                    "asm_rend\t$readinfo{'asm_rend'}\n".
 
915
                    "seq_lend\t$readinfo{'seq_lend'}\n".
 
916
                    "seq_rend\t$readinfo{'seq_rend'}\n".
 
917
                    "best\t$readinfo{'best'}\n".
 
918
                    "comment\t$readinfo{'comment'}\n".
 
919
                    "db\t$readinfo{'db'}\n".
 
920
                    "offset\t$readinfo{'offset'}\n".
 
921
                    "lsequence\t$readinfo{'lsequence'}\n"
 
922
                );
 
923
                if ($seqno < $contiginfo{'seqnum'}) {
 
924
                    $self->_print("\n");
 
925
                } elsif (($seqno == $contiginfo{'seqnum'}) && ($i+1 < $numobj)) {
 
926
                    $self->_print("|\n");
 
927
                }
 
928
            }
 
929
        }
 
930
    }
 
931
    return 1;
 
932
}
 
933
 
 
934
=head2 _perc_N
 
935
 
 
936
    Title   : _perc_N
 
937
    Usage   : my $perc_N = $ass_io->_perc_N($sequence_string)
 
938
    Function: Calculate the percent of ambiguities in a sequence.
 
939
              M R W S Y K X N are regarded as ambiguites in an aligned read
 
940
              sequence by TIGR Assembler. In the case of a gapped contig
 
941
              consensus sequence, all lowercase symbols are ambiguities, i.e.:
 
942
              a c g t u m r w s y k x n.
 
943
    Returns : decimal number
 
944
    Args    : string
 
945
 
 
946
=cut
 
947
 
 
948
sub _perc_N {
 
949
    my ($self, $seq_string) = @_;
 
950
    $self->throw("Cannot accept an empty sequence") if length($seq_string) == 0;
 
951
    my $perc_N = 0;
 
952
    for my $base ( split //, $seq_string ) {
 
953
        # individual base matches an ambiguity?
 
954
        if (( $base =~ m/[x|n|m|r|w|s|y|k]/i ) || ( $base =~ m/[a|c|g|t|u]/ ) ) {
 
955
            $perc_N++;
 
956
        }
 
957
    }
 
958
    $perc_N = $perc_N * 100 / length $seq_string;
 
959
    return $perc_N;
 
960
}
 
961
 
 
962
=head2 _redundancy
 
963
 
 
964
    Title   : _redundancy
 
965
    Usage   : my $ref = $ass_io->_redundancy($contigobj)
 
966
    Function: Calculate the fold coverage (redundancy) of a contig consensus
 
967
              (average number of read base pairs covering the consensus)
 
968
    Returns : decimal number
 
969
    Args    : Bio::Assembly::Contig
 
970
 
 
971
=cut
 
972
 
 
973
sub _redundancy {
 
974
    # redundancy = (sum of all aligned read lengths - ( number of gaps in gapped
 
975
    # consensus + number of gaps in aligned reads that are also in the consensus ) )
 
976
    # / length of ungapped consensus
 
977
    my ($self, $contigobj) = @_;
 
978
    my $redundancy = 0;
 
979
    
 
980
    # sum of all aligned read lengths
 
981
    my $read_tot = 0;
 
982
    for my $readobj ( $contigobj->each_seq ) {
 
983
        my $read_length = length($readobj->seq);
 
984
        $read_tot += $read_length;
 
985
    }
 
986
    $redundancy += $read_tot;
 
987
    
 
988
    # - respected gaps
 
989
    my $consensus_sequence = $contigobj->get_consensus_sequence->seq;
 
990
    my @consensus_gaps = ();
 
991
    $contigobj->_register_gaps($consensus_sequence, \@consensus_gaps);
 
992
    my $respected_gaps = scalar(@consensus_gaps);
 
993
    if ($respected_gaps > 0) {
 
994
        my @cons_arr = split //, $consensus_sequence;
 
995
        for my $gap_pos_cons ( @consensus_gaps ) {
 
996
            for my $readobj ( $contigobj->each_seq ) {
 
997
                my $readid = $readobj->id;
 
998
                my $read_start = $contigobj->change_coord(
 
999
                    "aligned $readid", 'gapped consensus', $readobj->start);
 
1000
                my $read_end   = $contigobj->change_coord(
 
1001
                    "aligned $readid", 'gapped consensus', $readobj->end  );
 
1002
                # skip this if consensus gap position not within in the read boundaries
 
1003
                next if ( ($gap_pos_cons < $read_start)
 
1004
                    || ($gap_pos_cons > $read_end) );
 
1005
                # does the read position have read have a gap?
 
1006
                my @read_arr = split //, $readobj->seq;                
 
1007
                my $gap_pos_read = $contigobj->change_coord(
 
1008
                    'gapped consensus', "aligned $readid", $gap_pos_cons);
 
1009
                if ($read_arr[$gap_pos_read-1] eq $cons_arr[$gap_pos_cons-1]) {
 
1010
                    $respected_gaps++;
 
1011
                }
 
1012
            }
 
1013
        }
 
1014
    }
 
1015
    $redundancy -= $respected_gaps;
 
1016
    
 
1017
    # / length of ungapped consensus
 
1018
    my $contig_length = length($self->_ungap($contigobj->get_consensus_sequence->seq));
 
1019
    $redundancy /= $contig_length;
 
1020
    
 
1021
    return $redundancy;
 
1022
}
 
1023
 
 
1024
=head2 _ungap
 
1025
 
 
1026
    Title   : _ungap
 
1027
    Usage   : my $ungapped = $ass_io->_ungap($gapped)
 
1028
    Function: Remove the gaps from a sequence. Gaps are - in TIGR Assembler
 
1029
    Returns : string
 
1030
    Args    : string
 
1031
 
 
1032
=cut
 
1033
 
 
1034
sub _ungap {
 
1035
    my ($self, $seq_string) = @_;
 
1036
    $seq_string =~ s/-//g;
 
1037
    return $seq_string;
 
1038
}
 
1039
 
 
1040
=head2 _date_time
 
1041
 
 
1042
    Title   : _date_time
 
1043
    Usage   : my $timepoint = $ass_io->date_time
 
1044
    Function: Get date and time (MM//DD/YY HH:MM:SS)
 
1045
    Returns : string
 
1046
    Args    : none
 
1047
 
 
1048
=cut
 
1049
 
 
1050
sub _date_time {
 
1051
    my ($self) = @_;
 
1052
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
 
1053
    my $formatted_date_time = 
 
1054
        sprintf('%02d', $mon+1).'/'.
 
1055
        sprintf('%02d', $mday).'/'.
 
1056
        sprintf('%02d', $year % 100).
 
1057
        ' '.
 
1058
        sprintf('%02d', $hour).':'.
 
1059
        sprintf('%02d', $min).':'.
 
1060
        sprintf('%02d',$sec)
 
1061
    ;
 
1062
    return $formatted_date_time;
 
1063
}
 
1064
 
 
1065
=head2 _split_seq_name_and_db
 
1066
 
 
1067
    Title   : _split_seq_name_and_db
 
1068
    Usage   : my ($seqname, $db) = $ass_io->_split_seq_name_and_db($id)
 
1069
    Function: Extract seq_name and db from sequence id
 
1070
    Returns : seq_name, db
 
1071
    Args    : id
 
1072
 
 
1073
=cut
 
1074
 
 
1075
sub _split_seq_name_and_db {
 
1076
    my ($self, $id) = @_;
 
1077
    my $seq_name = '';
 
1078
    my $db       = '';
 
1079
    if ($id =~ m/(\S+)\|(\S+)/) {
 
1080
        $db       = $1;
 
1081
        $seq_name = $2;
 
1082
    } else {
 
1083
        $seq_name = $id;
 
1084
    }
 
1085
    return ($seq_name, $db);
 
1086
}
 
1087
 
 
1088
=head2 _merge_seq_name_and_db
 
1089
 
 
1090
    Title   : _merge_seq_name_and_db
 
1091
    Usage   : my $id = $ass_io->_merge_seq_name_and_db($seq_name, $db)
 
1092
    Function: Construct id from seq_name and db
 
1093
    Returns : id
 
1094
    Args    : seq_name, db
 
1095
 
 
1096
=cut
 
1097
 
 
1098
sub _merge_seq_name_and_db {
 
1099
    my ($self, $seq_name, $db) = @_;
 
1100
    my $id = '';
 
1101
    if ($db) {
 
1102
        $id = $db.'|'.$seq_name;
 
1103
    } else {
 
1104
        $id = $seq_name;
 
1105
    }
 
1106
    return $id;
 
1107
}
 
1108
 
 
1109
=head2 _coord
 
1110
 
 
1111
    Title   : _coord
 
1112
    Usage   : my $id = $ass_io->__coord($readobj, $contigobj)
 
1113
    Function: Get different coordinates for the read
 
1114
    Returns : number, number, number, number, number
 
1115
    Args    : Bio::Assembly::Seq, Bio::Assembly::Contig
 
1116
 
 
1117
=cut
 
1118
 
 
1119
sub _coord {
 
1120
    my ($self, $readobj, $contigobj) = @_;
 
1121
    my ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset) = (0, 0, 0, 0, 0);
 
1122
 
 
1123
    # Get read gapped consensus coordinates from contig and calculate
 
1124
    # asm_lend and asm_rend in ungapped consensus
 
1125
    my $aln_lend = $contigobj->get_seq_coord($readobj)->location->start;
 
1126
    my $aln_rend = $contigobj->get_seq_coord($readobj)->location->end;
 
1127
    $asm_lend = $contigobj->change_coord(
 
1128
        'gapped consensus', 'ungapped consensus', $aln_lend);
 
1129
    $asm_rend = $contigobj->change_coord(
 
1130
        'gapped consensus', 'ungapped consensus', $aln_rend);
 
1131
  
 
1132
    # Get gapped consensus coordinates for quality-clipped reads from contig 
 
1133
    # annotation and determine seq_lend and seq_rend in unaligned sequence coord
 
1134
    my $readclip = (grep
 
1135
        { $_->primary_tag eq '_quality_clipping:'.$readobj->primary_id }
 
1136
        $contigobj->get_features_collection->get_all_features
 
1137
    )[0];
 
1138
    my $clip_lend = $readclip->location->start;
 
1139
    my $clip_rend = $readclip->location->end;    
 
1140
    $seq_lend = $contigobj->change_coord(
 
1141
        'gapped consensus', 'aligned '.$readobj->id, $clip_lend);
 
1142
    $seq_rend = $contigobj->change_coord(
 
1143
        'gapped consensus', 'aligned '.$readobj->id, $clip_rend);    
 
1144
    
 
1145
    # Offset
 
1146
    $offset = $aln_lend - 1;
 
1147
    
 
1148
    return ($asm_lend, $asm_rend, $seq_lend, $seq_rend, $offset);
 
1149
}
 
1150
 
 
1151
1;
 
1152
 
 
1153
__END__