1
package Circos::Karyotype;
7
Circos::Karyotype - karyotype routines for Circos
11
This module is not meant to be used directly.
15
Circos is an application for the generation of publication-quality,
16
circularly composited renditions of genomic data and related
19
Circos is particularly suited for visualizing alignments, conservation
20
and intra and inter-chromosomal relationships. However, Circos can be
21
used to plot any kind of 2D data in a circular layout - its use is not
22
limited to genomics. Circos' use of lines to relate position pairs
23
(ribbons add a thickness parameter to each end) is effective to
24
display relationships between objects or positions on one or more
27
All documentation is in the form of tutorials at L<http://www.circos.ca>.
31
# -------------------------------------------------------------------
41
use Carp qw( carp confess croak );
44
#use File::Spec::Functions;
46
use Math::VecStat qw(max);
48
use Params::Validate qw(:all);
49
#use Regexp::Common qw(number);
51
#use POSIX qw(floor ceil);
53
use lib "$FindBin::RealBin";
54
use lib "$FindBin::RealBin/../lib";
55
use lib "$FindBin::RealBin/lib";
57
use Circos::Configuration;
58
use Circos::Constants;
63
################################################################
64
# Read the karyotype file and parsed chromosome and band locations.
66
# Chromosomes have the format
68
# chr hs1 1 0 247249719 green
69
# chr hs2 2 0 242951149 green
73
# band hs1 p36.33 p36.33 0 2300000 gneg
74
# band hs1 p36.32 p36.32 2300000 5300000 gpos25
78
# field name label start end color options
80
# Note that name and label can be different. The label (e.g. 1) is what appears
81
# on the image, but the name (e.g. hs1) is what is used in the input data file.
83
# v0.57 - parent field of chr is no longer required
84
# - old 7-field karyotype still supported
85
# v0.52 - additional error checks and tidying
89
fetch_conf("debug_validate") && validate( @_, { file => 1 } );
91
my $files = Circos::Configuration::make_parameter_list_array($params{file});
93
for my $file (@$files) {
94
read_karyotype_file($file,$karyotype);
99
sub read_karyotype_file {
100
my ($file,$karyotype) = @_;
101
$file = locate_file( file => $file, name=>"karyotype" );
103
if (! keys %$karyotype) {
106
my @prev_index = map { $_->{chr}{display_order} } values %$karyotype;
107
$chr_index = 1 + max @prev_index;
110
my $delim_rx = fetch_conf("file_delim") || undef;
111
if ($delim_rx && fetch_conf("file_delim_collapse")) {
115
open(F,$file) or fatal_error("io","cannot_read",$file,"karyotype",$!);
120
next if is_blank($line);
121
next if is_comment($line);
123
my @tok = $delim_rx ? split($delim_rx,$line) : split;
125
my ($field,$parent,$name,$label,$start,$end,$color,$options) = @tok;
127
#fatal_error("data","bad_karyotype_format",$file,$line);
129
if ( ! is_number($start,"int") || ! is_number($end,"int") ) {
130
fatal_error("data","malformed_karyotype_coordinates",$start,$end);
132
if ( $end <= $start ) {
133
fatal_error("data","inconsistent_karyotype_coordinates",$start,$end);
136
my $set = make_set($start,$end,norev=>1);
138
# karyotype data structure is a hash with each chromosome being a value
139
# keyed by chromosome name. Bands form a list within the chromosome
140
# data structure, keyed by 'band'.
146
size => $set->cardinality,
151
options => parse_options($options)
154
if ( $field =~ /chr/ ) {
155
# chromosome entries have a few additional fields
156
# chr, scale, display_order
157
$data->{chr} = $name;
159
$data->{display_order} = $chr_index++;
160
if ( $karyotype->{$data->{chr}}{chr} ) {
161
fatal_error("data","repeated_chr_in_karyotype",$data->{chr});
163
# check if color override has been specified
164
my $color_conf_str = fetch_conf("chromosomes_color") || fetch_conf("chromosome_color");
165
if ($color_conf_str) {
166
my @color_list = @{Circos::Configuration::make_parameter_list_array($color_conf_str)};
167
for my $is_rx (1,0) {
168
for my $color_pair (@color_list) {
169
my ($key,$value) = split(/\s*=\s*/,$color_pair);
170
my $rx = parse_as_rx($key);
176
my $match = match_string($name,$rx||$key);
177
$data->{color} = $value if $match;
181
# chromosome is keyed by its name
182
$karyotype->{ $name }{chr} = $data;
183
} elsif ( $field =~ /band/ ) {
184
# band entries have the 'chr' key point to the name of parent chromosome
185
$data->{chr} = $parent;
186
push @{ $karyotype->{ $parent }{band} }, $data;
188
# for now, die hard here. There are no other line types supported.
189
fatal_error("data","unknown_karyotype_line",$field,$line);
195
################################################################
197
# Verify the contents of the karyotype data structure. Basic
198
# error checking also happens in read_karyotype (above). Here,
199
# we perform more detailed diagnostics.
201
# The following are checked
204
# FATAL a band has no associated chromosome
205
# FATAL band coordinates extend outside chromosome
206
# FATAL two bands overlap by more than max_band_overlap
207
# WARN a chromosome has a parent field definition
208
# WARN bands do not completely cover the chromosome
210
sub validate_karyotype {
211
fetch_conf("debug_validate") && validate( @_, { karyotype => 1 } );
213
my $karyotype = $params{karyotype};
214
for my $chr ( keys %$karyotype ) {
215
if ( !$karyotype->{$chr}{chr} ) {
216
fatal_error("data","band_on_missing_chr",$chr);
218
if ( $karyotype->{$chr}{chr}{parent} ne $DASH ) {
219
printwarning("Chromosome [$chr] has a parent field. Chromosome parents are not currently supported");
222
my $chrset = $karyotype->{$chr}{chr}{set};
223
my $bandcoverage = Set::IntSpan->new();
225
for my $band ( make_list( $karyotype->{$chr}{band} ) ) {
226
if ( $band->{set}->diff($chrset)->cardinality ) {
227
fatal_error("data","band_sticks_out",$band->{name},$chr);
228
} elsif ( $band->{set}->intersect($bandcoverage)->cardinality > 1 ) {
229
printwarning("data","Band [$band->{name}] for chromosome [$chr] overlaps with other bands");
231
$bandcoverage = $bandcoverage->union( $band->{set} );
233
if ($bandcoverage->cardinality && $bandcoverage->cardinality < $chrset->cardinality ) {
234
printwarning("Bands for chromosome [$chr] do not cover entire chromosome");
239
################################################################
240
# Assign sort_idx key to each chromosome derived by sorting
241
# the chromosomes by an internal digit in the chromosome name,
242
# if it is found. Any other chromosomes are sorted
246
fetch_conf("debug_validate") && validate( @_, { karyotype => 1 } );
248
my $k = $params{karyotype};
250
my @chrs = sort { $k->{$a}{chr}{display_order} <=> $k->{$b}{chr}{display_order} } keys %$k;
252
my @chrs_native_sort;
254
if (my @chrs_w_num = grep($_ =~ /\d/, @chrs)) {
255
# if there are any chromosomes with a number in them, place them first and
256
# sort them by the (preceeding) non-numerical string first, then the number
257
my $rxnd = qr/^(\D+)/;
260
for my $c (@chrs_w_num) {
261
my ($non_digit) = $c =~ /$rxnd/;
262
my ($digit) = $c =~ /$rxd/;
263
push @chrs_for_sort, {chr => $c,
264
non_digit => $non_digit || $EMPTY_STR,
265
digit => $digit || 0};
267
push @chrs_native_sort, map { $_->{chr} } sort { ( $a->{non_digit} cmp $b->{non_digit} ) ||
268
( $a->{digit} <=> $b->{digit} ) } @chrs_for_sort;
270
push @chrs_native_sort, sort { $a cmp $b } grep($_ !~ /\d/, @chrs);
271
for my $i (0..@chrs_native_sort-1) {
272
my $chr = $chrs_native_sort[$i];
273
$k->{$chr}{chr}{sort_idx} = $i;