1
# $Id: ProteinGraph.pm,v 1.35.4.1 2006/10/02 23:10:18 sendu Exp $
3
# BioPerl module for Bio::Graph::ProteinGraph
5
# You may distribute this module under the same terms as perl itself
6
# POD documentation - main docs before the code
10
Bio::Graph::ProteinGraph - a representation of a protein interaction graph.
15
my $graphio = Bio::Graph::IO->new(-file => 'myfile.dat',
17
my $graph = $graphio->next_network();
19
=head2 Using ProteinGraph
21
# Remove duplicate interactions from within a dataset
22
$graph->remove_dup_edges();
24
# Get a node (represented by a sequence object) from the graph.
25
my $seqobj = $gr->nodes_by_id('P12345');
27
# Get clustering coefficient of a given node.
28
my $cc = $gr->clustering_coefficient($graph->nodes_by_id('NP_023232'));
29
if ($cc != -1) { ## result is -1 if cannot be calculated
30
print "CC for NP_023232 is $cc";
34
my $density = $gr->density();
36
# Get connected subgraphs
37
my @graphs = $gr->components();
40
$gr->remove_nodes($gr->nodes_by_id('P12345'));
42
# How many interactions are there?
43
my $count = $gr->edge_count;
45
# How many nodes are there?
46
my $ncount = $gr->node_count();
48
# Let's get interactions above a threshold confidence score.
49
my $edges = $gr->edges;
50
for my $edge (keys %$edges) {
51
if (defined($edges->{$edge}->weight()) &&
52
$edges->{$edge}->weight() > 0.6) {
53
print $edges->{$edge}->object_id(), "\t",
54
$edges->{$edge}->weight(),"\n";
58
# Get interactors of your favourite protein
59
my $node = $graph->nodes_by_id('NP_023232');
60
my @neighbors = $graph->neighbors($node);
61
print " NP_023232 interacts with ";
62
print join " ,", map{$_->object_id()} @neighbors;
65
# Annotate your sequences with interaction info
66
my @seqs; ## array of sequence objects
68
if ($graph->has_node($seq->accession_number)) {
69
my $node = $graph->nodes_by_id( $seq->accession_number);
70
my @neighbors = $graph->neighbors($node);
71
for my $n (@neighbors) {
72
my $ft = Bio::SeqFeature::Generic->new(
73
-primary_tag => 'Interactor',
74
-tags => { id => $n->accession_number }
76
$seq->add_SeqFeature($ft);
81
# Get proteins with > 10 interactors
82
my @nodes = $graph->nodes();
84
for my $node (@nodes) {
85
if ($graph->neighbor_count($node) > 10) {
89
print "the following proteins have > 10 interactors:\n";
90
print join "\n", map{$_->object_id()} @hubs;
92
# Merge graphs 1 and 2 and flag duplicate edges
94
my @duplicates = $g1->dup_edges();
95
print "these interactions exist in $g1 and $g2:\n";
96
print join "\n", map{$_->object_id} @duplicates;
98
=head2 Creating networks from your own data
100
If you have interaction data in your own format, e.g.
102
edgeid node1 node2 score
104
my $io = Bio::Root::IO->new(-file => 'mydata');
105
my $gr = Bio::Graph::ProteinGraph->new();
106
my %seen = (); # to record seen nodes
107
while (my $l = $io->_readline() ) {
109
# Parse out your data...
110
my ($e_id, $n1, $n2, $sc) = split /\s+/, $l;
112
# ...then make nodes if they don't already exist in the graph...
114
for my $n ($n1, $n2 ) {
115
if (!exists($seen{$n})) {
116
push @nodes, Bio::Seq->new(-accession_number => $n);
117
$seen{$n} = $nodes[$#nodes];
119
push @nodes, $seen{$n};
124
# ...and add a new edge to the graph
125
my $edge = Bio::Graph::Edge->new(-nodes => \@nodes,
128
$gr->add_edge($edge);
132
A ProteinGraph is a representation of a protein interaction network.
133
It derives most of its functionality from the L<Bio::Graph::SimpleGraph>
134
module, but is adapted to be able to use protein identifiers to
137
This graph can use any objects that implement L<Bio::AnnotatableI> and
138
L<Bio::IdentifiableI> interfaces. L<Bio::Seq> (but not L<Bio::PrimarySeqI>)
139
objects can therefore be used for the nodes but any object that supports
140
annotation objects and the object_id() method should work fine.
142
At present it is fairly 'lightweight' in that it represents nodes and
143
edges but does not contain all the data about experiment ids etc. found
144
in the Protein Standards Initiative schema. Hopefully that will be
147
A dataset may contain duplicate or redundant interactions.
148
Duplicate interactions are interactions that occur twice in the dataset
149
but with a different interaction ID, perhaps from a different
150
experiment. The dup_edges method will retrieve these.
152
Redundant interaction are interactions that occur twice or more in a
153
dataset with the same interaction id. These are more likely to be
154
due to database errors. These methods are useful when merging 2
155
datasets using the union() method. Interactions present in both
156
datasets, with different IDs, will be duplicate edges.
158
=head2 For Developers
160
In this module, nodes are represented by L<Bio::Seq::RichSeq> objects
161
containing all possible database identifiers but no sequence, as
162
parsed from the interaction files. However, a node represented by a
163
L<Bio::PrimarySeq> object should work fine too.
165
Edges are represented by L<Bio::Graph::Edge> objects. In order to
166
work with SimpleGraph these objects must be array references, with the
167
first 2 elements being references to the 2 nodes. More data can be
168
added in $e[2]. etc. Edges should be L<Bio::Graph::Edge> objects, which
169
are L<Bio::IdentifiableI> implementing objects.
171
At present edges only have an identifier and a weight() method, to
172
hold confidence data, but subclasses of this could hold all the
173
interaction data held in an XML document.
175
So, a graph has the following data:
177
1. A hash of nodes ('_nodes'), where keys are the text representation of a
178
nodes memory address and values are the sequence object references.
180
2. A hash of neighbors ('_neighbors'), where keys are the text representation of a
181
nodes memory address and a value is a reference to a list of
182
neighboring node references.
184
3. A hash of edges ('_edges'), where a key is a text representation of the 2 nodes.
185
E.g., "address1,address2" as a string, and values are Bio::Graph::Edge
188
4. Look up hash ('_id_map') for finding a node by any of its ids.
190
5. Look up hash for edges ('_edge_id_map') for retrieving an edge
191
object from its identifier.
193
6. Hash ('_components').
195
7. An array of duplicate edges ('_dup_edges').
197
8. Hash ('_is_connected').
201
To use this code you will need the Clone.pm module availabe from CPAN.
202
You also need Class::AutoClass, available from CPAN as well. To read in
203
XML data you will need XML::Twig available from CPAN.
207
L<Bio::Graph::SimpleGraph>
210
L<Bio::Graph::IO::dip>
211
L<Bio::Graph::IO::psi_xml>
217
User feedback is an integral part of the evolution of this and other
218
Bioperl modules. Send your comments and suggestions preferably to one
219
of the Bioperl mailing lists. Your participation is much appreciated.
221
bioperl-l@bioperl.org - General discussion
222
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
224
=head2 Reporting Bugs
226
Report bugs to the Bioperl bug tracking system to help us keep track
227
the bugs and their resolution. Bug reports can be submitted via the
230
http://bugzilla.open-bio.org/
234
Richard Adams - this module, Graph::IO modules.
236
Email richard.adams@ed.ac.uk
240
Nat Goodman - SimpleGraph.pm, and all underlying graph algorithms.
245
package Bio::Graph::ProteinGraph;
246
use Bio::Graph::Edge;
248
use base qw(Bio::Graph::SimpleGraph);
253
purpose : Is a protein in the graph?
254
usage : if ($g->has_node('NP_23456')) {....}
255
returns : 1 if true, 0 if false
256
arguments : A sequence identifier.
262
my ($self, $arg) = @_;
264
$self->throw ("I need a sequence identifier!");
266
my @nodes = $self->nodes_by_id($arg);
267
if (defined($nodes[0])){return 1;}else{return 0};
274
Purpose : get node memory address from an id
275
Usage : my @neighbors= $self->neighbors($self->nodes_by_id('O232322'))
276
Returns : a SimpleGraph node representation ( a text representation
277
of a node needed for other graph methods e.g.,
279
Arguments : a protein identifier., e.g., its accession number.
286
my @nodes = $self->_ids(@_);
287
wantarray? @nodes: $nodes[0];
295
Purpose : To merge two graphs together, flagging interactions as
297
Usage : $g1->union($g2), where g1 and g2 are 2 graph objects.
298
Returns : void, $g1 is modified
299
Arguments : A Graph object of the same class as the calling object.
300
Description : This method merges 2 graphs. The calling graph is modified,
301
the parameter graph ($g2) in usage) is unchanged. To take
302
account of differing IDs identifying the same protein, all
303
ids are compared. The following rules are used to modify $g1.
305
First of all both graphs are scanned for nodes that share
308
1. If 2 nodes(proteins) share an interaction in both graphs,
309
the edge in graph 2 is copied to graph 1 and added as a
310
duplicate edge to graph 1,
312
2. If 2 nodes interact in $g2 but not $g1, but both nodes exist
313
in $g1, the attributes of the interaction in $g2 are
314
used to make a new edge in $g1.
316
3. If 2 nodes interact in g2 but not g1, and 1 of them is a new
317
protein, that protein is put in $g1 and a new edge made to
320
4. At present, if there is an interaction in $g2 composed of a
321
pair of interactors that are not present in $g1, they are
322
not copied to $g1. This is rather conservative but prevents
323
the problem of having redundant nodes in $g1 due to the same
324
protein being identified by different ids in the same graph.
334
Graph 2: X1 P1 P2 - will be added as duplicate to Graph1
335
X2 P1 X4 - X4 added to Graph 1 and new edge made
336
X3 P2 P3 - new edge links existing proteins in G1
337
X4 Z4 Z5 - not added to Graph1. Are these different
338
proteins or synonyms for proteins in G1?
344
my ($self, $other) = @_;
345
my $class = ref($self);
346
if (!$other->isa($class)) {
347
$self->throw("I need a ". $class . " object, not a [".
348
ref($other). "] object");
351
my %detected_common_nodes;
352
my %seen_ids; # holds ids of nodes already known to be common.
354
## for each node see if Ids are in common between the 2 graphs
355
## just get1 common id per sequence.
357
## Produces too many common nodes we only need 1 common id between nodes.
358
for my $id (sort keys %{$self->{'_id_map'}}) {
359
if (exists($other->{'_id_map'}{$id}) ) {
360
## check if this node has a commonlink kown lready:
361
my $node = $self->nodes_by_id($id);
362
my $acc = $node->object_id;
363
if (!exists($detected_common_nodes{$acc})) {
364
push @common_nodes, $id; ## we store the common id
365
$detected_common_nodes{$acc} = undef; ## this means we won't store >1 common identifier
370
## now cyle through common nodes..
371
$self->debug( "there are ". scalar @common_nodes. " common nodes\n");
373
for my $common (@common_nodes) {
374
if ($i++ % 10 ==0 ) {
377
## get neighbours of common node for self and other
378
my @self_ns = $self->neighbors($self->nodes_by_id($common));
379
my @other_ns = $other->neighbors($other->nodes_by_id($common));
381
## now get all ids of all neighbours
382
my %self_n_ids = $self->_get_ids(@self_ns); # get all ids of neighbors
384
## cycle through other neighbors
385
for my $other_n(@other_ns){
386
my %other_n_ids = $self->_get_ids($other_n); # get ids of single other neighbor
388
## case (1) in description
389
## do any ids in other graph exist in self ?
390
# if yes, @int_match is defined, interaction does not involve a new node
391
my @int_match = grep{exists($self->{'_id_map'}{$_}) } keys %other_n_ids;
396
## we cycle through until we have an edge defined, this deals with
397
## multiple id matches
398
while (!$edge && $i <= $#int_match){
400
## get edge from other graph
401
my $other_edge = $other->edge(
402
[$other->nodes_by_id($common),
403
$other->nodes_by_id($other_n->object_id)]
407
my $edge = Bio::Graph::Edge->new(
408
-weight=> $other_edge->weight(),
409
-id => $other_edge->object_id(),
410
-nodes =>[$self->nodes_by_id($common),
411
$self->nodes_by_id($int_match[$i])
413
## add it to self graph.
414
## add_edge() works out if the edge is a new,
415
## duplicate or a redundant edge.
416
$self->add_edge($edge);
421
## but if other neighbour is entirely new, clone it and
424
my $other_edge = $other->edge($other->nodes_by_id($other_n->object_id()),
425
$other->nodes_by_id($common));
426
my $new = clone($other_n);
427
$self->add_edge(Bio::Graph::Edge->new(
428
-weight => $other_edge->weight(),
429
-id => $other_edge->object_id(),
430
-nodes =>[$new, $self->nodes_by_id($common)],
434
## add new ids to self graph look up table
435
map {$self->{'_id_map'}{$_} = $new} keys %other_n_ids;
444
Purpose : returns number of unique interactions, excluding
445
redundancies/duplicates
448
Usage : my $count = $graph->edge_count;
455
return scalar keys %{$self->_edges};
462
Purpose : returns number of nodes.
465
Usage : my $count = $graph->node_count;
472
return scalar keys %{$self->_nodes};
476
=head2 neighbor_count
478
Name : neighbor_count
479
Purpose : returns number of neighbors of a given node
480
Usage : my $count = $gr->neighbor_count($node)
481
Arguments : a node object
488
my ($self, $node) = @_;
489
if (!$node->isa('Bio::NodeI')) {
490
$self->throw ("I need a Bio::NodeI implementing object here , not a " . ref($node) . ".");
492
my @nbors = $self->neighbors($node);
493
return scalar @nbors;
496
=head2 _get_ids_by_db
498
Name : _get_ids_by_db
499
Purpose : gets all ids for a node, assuming its Bio::Seq object
500
Arguments: A Bio::SeqI object
501
Returns : A hash: Keys are db ids, values are accessions
502
Usage : my %ids = $gr->_get_ids_by_db($seqobj);
508
my $dummy_self = shift;
509
while (my $n = shift @_ ){ #ref to node, assume is a Bio::Seq
510
if (!$n->isa('Bio::AnnotatableI') || ! $n->isa('Bio::IdentifiableI' )) {
511
$n->throw("I need a Bio::AnnotatableI and Bio::IdentifiableI implementing object, not a [" .ref($n) ."]");
514
##if BioSeq getdbxref ids as well.
515
my $ac = $n->annotation();
516
for my $an($ac->get_Annotations('dblink')) {
517
$ids{$an->database()} = $an->primary_id();
526
my $dummy_self = shift;
527
while (my $n = shift @_ ){ #ref to node, assume is a Bio::Seq
528
if (!$n->isa('Bio::AnnotatableI') || ! $n->isa('Bio::IdentifiableI' )) {
529
$n->throw("I need a Bio::AnnotatableI and Bio::IdentifiableI implementing object, not a [" .ref($n) ."]");
532
map {$ids{$_} = undef} ($n->object_id);
534
##if BioSeq getdbxref ids as well.
535
if ($n->can('annotation')) {
536
my $ac = $n->annotation();
537
for my $an($ac->get_Annotations('dblink')) {
538
$ids{$an->primary_id()} = undef;
549
Purpose : adds an interaction to a graph.
550
Usage : $gr->add_edge($edge)
551
Arguments : a Bio::Graph::Edge object, or a reference to a 2 element list.
553
Description : This is the method to use to add an interaction to a graph.
554
It contains the logic used to determine if a graph is a
555
new edge, a duplicate (an existing interaction with a
556
different edge id) or a redundant edge (same interaction,
564
my $edges = $self->_edges;
565
my $neighbors = $self->_neighbors;
566
my $dup_edges = $self->_dup_edges;
569
if ( ref($_[0]) eq 'ARRAY' || !ref($_[0])) {
570
$self->SUPER::add_edges(@_);
573
elsif ( $_[0]->isa('Bio::Graph::Edge') ) { # it's already an edge
577
$self->throw(" Invalid edge! - must be an array of nodes, or an edge object");
580
my ($m, $n) = $edge->nodes();
581
next if $m eq $n; # no self edges
582
last unless defined $m && defined $n;
583
($m,$n) = ($n,$m) if "$n" lt "$m";
585
if (!exists($edges->{$m,$n})) {
586
$self->add_node($m,$n);
587
($m,$n) = $self->nodes($m,$n);
588
$edges->{$m,$n} = $edge;
589
push(@{$neighbors->{$m}},$n);
590
push(@{$neighbors->{$n}},$m);
592
## create look up hash for edge ##
593
$self->{'_edge_id_map'}{$edge->object_id()} = $edge;
595
## is it a redundant edge, ie with same edge id?
596
my $curr_edge = $edges->{$m,$n};
597
if($curr_edge->object_id() eq $edge->object_id()) {
598
$self->redundant_edge($edge);
600
## else it is a duplicate i.e., same nodes but different edge id
602
$self->add_dup_edge($edge);
606
$self->_is_connected(undef); # clear cached value
613
Purpose : To construct a subgraph of nodes from the main network.This
614
method overrides that of Bio::Graph::SimpleGraph in its dealings with
616
Usage : my $sg = $gr->subgraph(@nodes).
617
Returns : A subgraph of the same class as the original graph. Edge objects are
618
cloned from the original graph but node objects are shared, so beware if you
619
start deleting nodes from the parent graph whilst operating on subgraph nodes.
620
Arguments : A list of node objects.
627
## make new graph of same type as parent
628
my $class = ref($self);
629
my $subgraph = new $class;
630
$subgraph->add_node(@_);
631
# add all edges amongst the nodes
632
my @nodes=$subgraph->nodes;
635
if ($i++ % 100 == 0) { print STDERR ".";}
637
my $edges = $self->_edges;
639
if ($self->has_edge([$m,$n])) {
640
my ($edge) = $self->edges([$m,$n]); ## returns list of edges
641
my $id = $edge->object_id;
642
$subgraph->add_edge(Bio::Graph::Edge->new(-nodes=>[$m,$n],
653
Purpose : to flag an interaction as a duplicate, take advantage of
654
edge ids. The idea is that interactions from 2 sources with
655
different interaction ids can be used to provide more
656
evidence for a interaction being true, while preventing
657
redundancy of the same interaction being present more than
658
once in the same dataset.
659
Returns : 1 on successful addition, 0 on there being an existing
661
Usage : $gr->add_dup_edge(edge->new (-nodes => [$n1, $n2],
664
Arguments : an EdgeI implementing object.
673
my ($self, $newedge) = @_;
675
my $newedge_id = $newedge->object_id();
677
## now we have node objects, an edge id.
679
my $dup_edges = $self->_dup_edges();
680
if(!grep{$_->object_id eq $newedge_id } @$dup_edges) {
681
push @$dup_edges, $newedge;
684
$self->redundant_edge($newedge);
691
Purpose : retrieve data about an edge from its id
692
Arguments : a text identifier
693
Returns : a Bio::Graph::Edge object or undef
694
Usage : my $edge = $gr->edge_by_id('1000E');
700
my ($self, $id) = @_;
702
$self->warn ("Need a text identifier");
706
$self->throw(" I need a text identifier, not a [" . ref($id) . "].");
708
if (defined($self->{'_edge_id_map'}{$id})) {
709
return $self->{'_edge_id_map'}{$id};
715
=head2 remove_dup_edges
717
Name : remove_dup_edges
718
Purpose : removes duplicate edges from graph
719
Arguments : none - removes all duplicate edges
720
edge id list - removes specified edges
722
Usage : $gr->remove_dup_edges()
723
or $gr->remove_dup_edges($edgeid1, $edgeid2);
727
sub remove_dup_edges{
728
my ($self, @args) = @_;
729
my $dups = $self->_dup_edges();
734
while (my $node = shift @args) {
736
for my $dup (@$dups) {
737
if (!grep{$node eq $_} $dup->nodes) {
738
push @new_dups, $dup;
748
=head2 redundant_edge
750
Name : redundant_edge
751
Purpose : adds/retrieves redundant edges to graph
752
Usage : $gr->redundant_edge($edge)
753
Arguments : none (getter) or a Biuo::Graph::Edge object (setter).
754
Description : redundant edges are edges in a graph that have the
755
same edge id, ie. are 2 identical interactions.
756
With edge arg adds it to list, else returns list as reference.
762
my ($self, $edge) =@_;
764
if (!$edge->isa('Bio::Graph::Edge')) {
765
$self->throw ("I need a Bio::Graph::Edge object , not a [". ref($edge). "] object.");
767
if (!exists($self->{'_redundant_edges'})) {
768
$self->{'_redundant_edges'} = [];
770
## add edge to list if not already listed
771
if (!grep{$_->object_id eq $edge->object_id} @{$self->{'_redundant_edges'}}){
772
push @{$self->{'_redundant_edges'}}, $edge;
776
if (exists ($self->{'_redundant_edges'})){
777
return @{$self->{'_redundant_edges'}};
785
=head2 redundant_edges
787
Name : redundant_edges
788
Purpose : alias for redundant_edge
792
sub redundant_edges {
794
return $self->redundant_edge(shift);
797
=head2 remove_redundant_edges
799
Name : remove_redundant_edges
800
Purpose : removes redundant_edges from graph, used by remove_node(),
801
may be better as an internal method??
802
Arguments : none - removes all redundant edges
803
edge id list - removes specified edges
805
Usage : $gr->remove_redundant_edges()
806
or $gr->remove_redundant_edges($edgeid1, $edgeid2);
810
sub remove_redundant_edges {
811
my ($self, @args) = @_;
812
my @dups = $self->redundant_edge();
813
## if no args remove all
815
$self->{'_redundant_edges'} = [];
818
while (my $node = shift @args) {
820
for my $dup (@dups) {
821
if (!grep{$node eq $_} $dup->nodes) {
822
push @new_dups, $dup;
825
$self->{'_redundant_edges'} = \@new_dups;
832
=head2 clustering_coefficient
834
Name : clustering_coefficient
835
Purpose : determines the clustering coefficient of a node, a number
836
in range 0-1 indicating the extent to which the neighbors of
837
a node are interconnnected.
838
Arguments : A sequence object (preferred) or a text identifier
839
Returns : The clustering coefficient. 0 is a valid result.
840
If the CC is not calculable ( if the node has <2 neighbors),
842
Usage : my $node = $gr->nodes_by_id('P12345');
843
my $cc = $gr->clustering_coefficient($node);
847
sub clustering_coefficient {
848
my ($self, $val) = @_;
849
my $n = $self->_check_args($val);
850
$self->throw("[$val] is an incorrect parameter, not presnt in the graph")
852
my @n = $self->neighbors($n);
853
my $n_count = scalar @n;
856
## calculate cc if we can
858
for (my $i = 0; $i <= $#n; $i++ ) {
859
for (my $j = $i+1; $j <= $#n; $j++) {
860
if ($self->has_edge($n[$i], $n[$j])){
865
$c = 2 * $c / ($n_count *($n_count - 1));
866
return $c; # can be 0 if unconnected.
868
return -1; # if value is not calculable
875
Purpose : to delete a node from a graph, e.g., to simulate effect
877
Usage : $gr->remove_nodes($seqobj);
878
Arguments : a single $seqobj or list of seq objects (nodes)
879
Returns : 1 on success
887
$self->warn("You have to specify a node");
890
my $edges = $self->_edges;
891
my $ns = $self->_neighbors;
892
my $dups = $self->_dup_edges;
893
my $nodes = $self->_nodes;
894
while (my $val = shift @_ ) {
897
my $node = $self->_check_args($val);
898
$self->throw("[$val] is an incorrect parameter, not present in the graph")
899
unless defined($node);
901
##1. remove dup edges and redundant edges containing the node ##
902
$self->remove_dup_edges($node);
903
$self->remove_redundant_edges($node);
905
##2. remove node from interactor's neighbours
906
my @ns = $self->neighbors($node);
908
my @otherns = $self->neighbors($n); #get neighbors of neighbors
910
##look for node in neighbor's neighbors
911
@new_others = grep{$node ne $_} @otherns;
912
$ns->{$n} = \@new_others;
915
##3. Delete node from neighbour hash
918
##4. Now remove edges involving node
919
for my $k (keys %$edges) {
920
##access via internal hash rather than by object.
921
if ($edges->{$k}->[0] eq $node ||
922
$edges->{$k}->[1] eq $node){
923
## delete edge from look up hash
924
my $edge_id = $edges->{$k}->object_id();
925
delete $self->{'_edge_id_map'}{$edge_id};
926
delete($edges->{$k});
930
##5. Now remove node itself;
931
delete $nodes->{$node}{'_node_id'};
932
delete $nodes->{$node};
934
##6. now remove aliases from look up hash so it can no longer be accessed.
935
## is this wise? or shall we keep the sequence object available??
940
=head2 unconnected_nodes
942
Name : unconnected_nodes
943
Purpose : return a list of nodes with no connections.
945
Returns : an array or array reference of unconnected nodes
946
Usage : my @ucnodes = $gr->unconnected_nodes();
950
sub unconnected_nodes {
952
my $neighbours = $self->_neighbors;
953
my $nodes = $self->_nodes;
955
for my $n (keys %$neighbours) {
956
if (@{$neighbours->{$n}} == 0){
957
push @$uc_nodes, $nodes->{$n};
960
wantarray?@$uc_nodes:$uc_nodes;
963
=head2 articulation_points
965
Name : articulation_points
966
Purpose : to find edges in a graph that if broken will fragment
967
the graph into islands.
968
Usage : my $edgeref = $gr->articulation_points();
969
for my $e (keys %$edgeref) {
970
print $e->[0]->accession_number. "-".
971
$e->[1]->accession_number ."\n";
974
Returns : a list references to nodes that will fragment the graph
976
Notes : This is a "slow but sure" method that works with graphs
977
up to a few hundred nodes reasonably fast.
981
sub articulation_points {
984
## see if results are cahced already
985
$self->{'_artic_points'} ||= '';
986
return $self->{'_artic_points'} if $self->{'_artic_points'};
989
$self->debug( "doing subgraphs\n");
990
my @subgraphs = $self->components();
994
for my $sg (@subgraphs) {
995
my $all_nodes = $sg->_nodes;
996
$self->debug( "in subgraph - size". scalar (keys %$all_nodes) . "\n");
997
##ignore isolated vertices
998
next if scalar keys %$all_nodes <= 2;
999
my $neighbors = $sg->_neighbors;
1001
## find most connected - will be artic point if has >2 neighbors.
1002
## use this to initiate DFS
1005
for my $n (keys %$neighbors) {
1006
my $c = scalar @{$neighbors->{$n}};#
1007
($max, $id) = ($c, $n) if $c > $max;#
1010
my $t = $sg->node_traversal($all_nodes->{$id},'d');
1011
my @nodes = $t->get_all();
1015
$n->{'_node_id'} = $id;
1019
## cycle through each node
1020
for (my $i = $#nodes; $i >= 0; $i--) {
1022
## initiate minimumn to node_id
1023
my $curr_min = $all_nodes->{$nodes[$i]}{'_node_id'};
1024
#print STDERR "currmin - $curr_min, i = $i\n";
1025
## cycle through neighbors, reset minumum if required
1026
my $nbors = $neighbors->{$nodes[$i]};
1027
for my $nbor (@$nbors) {
1028
my $nbor_id = $all_nodes->{$nbor}{'_node_id'};
1030
## if is back edge ##
1031
if ($nbor_id < $i) {
1032
$curr_min = $nbor_id if $nbor_id < $curr_min ;
1035
## else is tree edge
1036
elsif($nbor_id > $i) {
1037
my $wlow = $all_nodes->{$nbor}{'_wlow'};
1038
$curr_min = $wlow if $wlow < $curr_min;
1042
## now we know the minimum, save.
1043
$all_nodes->{$nodes[$i]}{'_wlow'} = $curr_min;
1045
## now get tree nodes and test condition
1046
my @treenodes = grep{$all_nodes->{$_}{'_node_id'} > $i}@$nbors;
1047
for my $tn (@treenodes) {
1048
if(($all_nodes->{$tn}{'_wlow'} >= $i && $i != 0) ||
1049
($i == 0 && scalar @{$neighbors->{$nodes[0]}} > 1) ){
1050
$rts{$nodes[$i]} = $nodes[$i] unless exists $rts{$nodes[$i]};
1056
## cache results and return
1057
$self->{'_artic_points'} = [values %rts]; ##
1058
return $self->{'_artic_points'};
1061
=head2 is_articulation_point
1063
Name : is_articulation_point
1064
Purpose : to determine if a given node is an articulation point or not.
1065
Usage : if ($gr->is_articulation_point($node)) {....
1066
Arguments : a text identifier for the protein or the node itself
1067
Returns : 1 if node is an articulation point, 0 if it is not
1071
sub is_articulation_point {
1072
my ($self, $val) = @_;
1073
my $node = $self->_check_args($val);
1075
## this uses a cached value so it does not have to recalculate each time..
1076
my $artic_pt_ref = $self->articulation_points();
1077
my $acc = $node->accession_number;
1078
if (grep{$_->accession_number eq $acc} @$artic_pt_ref ){
1089
while (my $id = shift@_) {
1090
push @refs, $self->{'_id_map'}{$id};
1096
## used to check a parameter is a valid node or a text identifier
1097
my ($self, $val) = @_;
1100
$self->throw( "I need a node that's a Bio::AnnotatableI and Bio::IdentifiableI");
1103
## if param is text try to get sequence object..
1105
$n = $self->nodes_by_id($val);
1107
$self->throw ("Cannnot find node given by the id [$val]");
1110
# if reference should be a NodeI implementing object.
1111
elsif (!$val->isa('Bio::AnnotatableI') || !$val->isa('Bio::IdentifiableI')) {
1112
$self->throw( "I need a node that's a Bio::AnnotatableI and Bio::IdentifiableI ,not a [". ref($val) . "].");
1117
return $n; #n is either a node or undef