~ubuntu-branches/ubuntu/lucid/igraph/lucid

« back to all changes in this revision

Viewing changes to src/community.c

Committer: Bazaar Package Importer
Author(s): Mathieu Malaterre
Date: 2009-11-16 18:12:42 UTC
Revision ID: james.westby@ubuntu.com-20091116181242-mzv9p5fz9uj57xd1

Tags: upstream-0.5.3

Import upstream version 0.5.3

files added:

AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

TODO

aclocal.m4

config.guess

config.h.in

config.sub

configure

configure.in

depcomp

doc/Makefile.in

doc/book

doc/book/Makefile.in

doc/igraph-docs.info

doc/igraph.3

examples

examples/simple

examples/simple/LINKS.NET

examples/simple/adjlist.c

examples/simple/ak-4102.max

examples/simple/bellman_ford.c

examples/simple/bellman_ford.out

examples/simple/cattributes.c

examples/simple/cattributes.out

examples/simple/cattributes2.c

examples/simple/cattributes2.out

examples/simple/d_indheap.c

examples/simple/d_indheap.out

examples/simple/dijkstra.c

examples/simple/dijkstra.out

examples/simple/dot.c

examples/simple/dot.out

examples/simple/dqueue.c

examples/simple/eigenvector_centrality.c

examples/simple/eigenvector_centrality.out

examples/simple/flow.c

examples/simple/foreign.c

examples/simple/foreign.out

examples/simple/gml.c

examples/simple/gml.out

examples/simple/graphml-hsa05010.xml

examples/simple/graphml.c

examples/simple/graphml.out

examples/simple/heap.c

examples/simple/igraph_add_edges.c

examples/simple/igraph_add_edges.out

examples/simple/igraph_add_vertices.c

examples/simple/igraph_adjacency.c

examples/simple/igraph_array.c

examples/simple/igraph_array.out

examples/simple/igraph_atlas.c

examples/simple/igraph_atlas.out

examples/simple/igraph_average_path_length.c

examples/simple/igraph_barabasi_game.c

examples/simple/igraph_bfs.c

examples/simple/igraph_bfs.out

examples/simple/igraph_bipartite_create.c

examples/simple/igraph_bipartite_create.out

examples/simple/igraph_bipartite_projection.c

examples/simple/igraph_cliques.c

examples/simple/igraph_cliques.out

examples/simple/igraph_cocitation.c

examples/simple/igraph_cocitation.out

examples/simple/igraph_community_edge_betweenness.c

examples/simple/igraph_community_edge_betweenness.out

examples/simple/igraph_community_fastgreedy.c

examples/simple/igraph_community_fastgreedy.out

examples/simple/igraph_community_label_propagation.c

examples/simple/igraph_community_label_propagation.out

examples/simple/igraph_community_leading_eigenvector.c

examples/simple/igraph_community_leading_eigenvector.out

examples/simple/igraph_complementer.c

examples/simple/igraph_complementer.out

examples/simple/igraph_compose.c

examples/simple/igraph_compose.out

examples/simple/igraph_convergence_degree.c

examples/simple/igraph_convergence_degree.out

examples/simple/igraph_convex_hull.c

examples/simple/igraph_convex_hull.out

examples/simple/igraph_copy.c

examples/simple/igraph_create.c

examples/simple/igraph_decompose.c

examples/simple/igraph_decompose.out

examples/simple/igraph_degree.c

examples/simple/igraph_degree.out

examples/simple/igraph_degree_sequence_game.c

examples/simple/igraph_delete_edges.c

examples/simple/igraph_delete_vertices.c

examples/simple/igraph_diameter.c

examples/simple/igraph_diameter.out

examples/simple/igraph_difference.c

examples/simple/igraph_difference.out

examples/simple/igraph_disjoint_union.c

examples/simple/igraph_disjoint_union.out

examples/simple/igraph_edge_betweenness.c

examples/simple/igraph_edge_betweenness.out

examples/simple/igraph_empty.c

examples/simple/igraph_erdos_renyi_game.c

examples/simple/igraph_es_adj.c

examples/simple/igraph_es_adj.out

examples/simple/igraph_es_fromto.c

examples/simple/igraph_es_fromto.out

examples/simple/igraph_es_pairs.c

examples/simple/igraph_es_path.c

examples/simple/igraph_full.c

examples/simple/igraph_get_eid.c

examples/simple/igraph_get_eid.out

examples/simple/igraph_get_eids.c

examples/simple/igraph_get_eids.out

examples/simple/igraph_get_shortest_paths.c

examples/simple/igraph_get_shortest_paths.out

examples/simple/igraph_get_shortest_paths_dijkstra.c

examples/simple/igraph_get_shortest_paths_dijkstra.out

examples/simple/igraph_girth.c

examples/simple/igraph_grg_game.c

examples/simple/igraph_growing_random_game.c

examples/simple/igraph_hashtable.c

examples/simple/igraph_hashtable.out

examples/simple/igraph_i_cutheap.c

examples/simple/igraph_i_cutheap.out

examples/simple/igraph_i_layout_sphere.c

examples/simple/igraph_independent_sets.c

examples/simple/igraph_independent_sets.out

examples/simple/igraph_intersection.c

examples/simple/igraph_intersection.out

examples/simple/igraph_is_directed.c

examples/simple/igraph_is_loop.c

examples/simple/igraph_is_loop.out

examples/simple/igraph_is_multiple.c

examples/simple/igraph_is_multiple.out

examples/simple/igraph_lattice.c

examples/simple/igraph_layout_lgl.c

examples/simple/igraph_layout_merge.c

examples/simple/igraph_layout_reingold_tilford.c

examples/simple/igraph_layout_reingold_tilford.in

examples/simple/igraph_lcf.c

examples/simple/igraph_local_transitivity.c

examples/simple/igraph_mincut.c

examples/simple/igraph_mincut.out

examples/simple/igraph_minimum_spanning_tree.c

examples/simple/igraph_minimum_spanning_tree.out

examples/simple/igraph_motifs_randesu.c

examples/simple/igraph_motifs_randesu.out

examples/simple/igraph_neighbors.c

examples/simple/igraph_neighbors.out

examples/simple/igraph_pagerank.c

examples/simple/igraph_pagerank.out

examples/simple/igraph_preference_game.c

examples/simple/igraph_psumtree.c

examples/simple/igraph_read_graph_graphdb.c

examples/simple/igraph_read_graph_graphdb.out

examples/simple/igraph_read_graph_lgl-1.lgl

examples/simple/igraph_read_graph_lgl-2.lgl

examples/simple/igraph_read_graph_lgl-3.lgl

examples/simple/igraph_read_graph_lgl.c

examples/simple/igraph_read_graph_lgl.out

examples/simple/igraph_reciprocity.c

examples/simple/igraph_rewire.c

examples/simple/igraph_rewire.out

examples/simple/igraph_ring.c

examples/simple/igraph_set.c

examples/simple/igraph_set.out

examples/simple/igraph_similarity.c

examples/simple/igraph_similarity.out

examples/simple/igraph_simplify.c

examples/simple/igraph_simplify.out

examples/simple/igraph_small.c

examples/simple/igraph_small.out

examples/simple/igraph_star.c

examples/simple/igraph_strvector.c

examples/simple/igraph_strvector.out

examples/simple/igraph_to_undirected.c

examples/simple/igraph_to_undirected.out

examples/simple/igraph_topological_sorting.c

examples/simple/igraph_topological_sorting.out

examples/simple/igraph_transitivity.c

examples/simple/igraph_tree.c

examples/simple/igraph_trie.c

examples/simple/igraph_trie.out

examples/simple/igraph_union.c

examples/simple/igraph_union.out

examples/simple/igraph_vs_nonadj.c

examples/simple/igraph_vs_nonadj.out

examples/simple/igraph_vs_seq.c

examples/simple/igraph_vs_seq.out

examples/simple/igraph_vs_vector.c

examples/simple/igraph_vs_vector.out

examples/simple/igraph_weighted_adjacency.c

examples/simple/igraph_weighted_adjacency.out

examples/simple/igraph_write_graph_lgl.c

examples/simple/igraph_write_graph_pajek.c

examples/simple/igraph_write_graph_pajek.out

examples/simple/indheap.c

examples/simple/iso_b03_m1000.A00

examples/simple/karate.gml

examples/simple/lineendings.c

examples/simple/lineendings.out

examples/simple/matrix.c

examples/simple/matrix.out

examples/simple/matrix2.c

examples/simple/matrix2.out

examples/simple/pajek1.net

examples/simple/pajek2.net

examples/simple/pajek3.net

examples/simple/pajek4.net

examples/simple/random_seed.c

examples/simple/spinglass.c

examples/simple/spmatrix.c

examples/simple/spmatrix.out

examples/simple/stack.c

examples/simple/test.gxl

examples/simple/topology.c

examples/simple/topology.out

examples/simple/vector.c

examples/simple/vector.out

examples/simple/vector2.c

examples/simple/vector2.out

examples/simple/vector_ptr.c

examples/simple/walktrap.c

examples/simple/walktrap.out

igraph.pc

igraph.pc.in

igraph_Info.plist.in

include

include/arpack.h

include/array.h

include/attributes.h

include/dqueue.h

include/error.h

include/heap.h

include/igraph.h

include/igraph_pmt.h

include/igraph_pmt_off.h

include/interrupt.h

include/matrix.h

include/stack.h

include/types.h

include/vector.h

install-sh

interfaces

interfaces/Makefile.am

interfaces/Makefile.in

interfaces/R

interfaces/R/DESCRIPTION.in

interfaces/R/Makefile.in

interfaces/R/configure.in.in

interfaces/R/src

interfaces/R/src/Makevars.win.in

interfaces/shell

interfaces/shell/Makefile.am

interfaces/shell/Makefile.in

interfaces/shell/interface.c

ltmain.sh

missing

src/DensityGrid.cpp

src/DensityGrid.h

src/DensityGrid_3d.cpp

src/DensityGrid_3d.h

src/Makefile.am

src/Makefile.in

src/NetDataTypes.cpp

src/NetDataTypes.h

src/NetRoutines.cpp

src/NetRoutines.h

src/adjlist.c

src/arpack

src/arpack.c

src/arpack.h

src/arpack/dgetv0.c

src/arpack/dlaqrb.c

src/arpack/dmout.c

src/arpack/dnaitr.c

src/arpack/dnapps.c

src/arpack/dnaup2.c

src/arpack/dnaupd.c

src/arpack/dnconv.c

src/arpack/dneigh.c

src/arpack/dneupd.c

src/arpack/dngets.c

src/arpack/dsaitr.c

src/arpack/dsapps.c

src/arpack/dsaup2.c

src/arpack/dsaupd.c

src/arpack/dsconv.c

src/arpack/dseigt.c

src/arpack/dsesrt.c

src/arpack/dseupd.c

src/arpack/dsgets.c

src/arpack/dsortc.c

src/arpack/dsortr.c

src/arpack/dstatn.c

src/arpack/dstats.c

src/arpack/dstqrb.c

src/arpack/dvout.c

src/arpack/ivout.c

src/arpack/second.c

src/arpack_internal.h

src/array.c

src/array.h

src/array.pmt

src/atlas-edges.h

src/atlas.c

src/attributes.c

src/attributes.h

src/basic_query.c

src/bfgs.c

src/bipartite.c

src/blas

src/blas/dasum.c

src/blas/daxpy.c

src/blas/dcopy.c

src/blas/ddot.c

src/blas/dgemv.c

src/blas/dger.c

src/blas/dnrm2.c

src/blas/drot.c

src/blas/dscal.c

src/blas/dswap.c

src/blas/dtrmm.c

src/blas/idamax.c

src/blas/lsame.c

src/bliss.cc

src/bliss_bignum.hh

src/bliss_defs.hh

src/bliss_eqrefhash.cc

src/bliss_eqrefhash.hh

src/bliss_graph.cc

src/bliss_graph.hh

src/bliss_heap.cc

src/bliss_heap.hh

src/bliss_kqueue.hh

src/bliss_kstack.hh

src/bliss_orbit.cc

src/bliss_orbit.hh

src/bliss_partition.cc

src/bliss_partition.hh

src/bliss_timer.cc

src/bliss_timer.hh

src/bliss_utils.cc

src/bliss_utils.hh

src/cattributes.c

src/centrality.c

src/cliques.c

src/clustertool.cpp

src/cocitation.c

src/community.c

src/components.c

src/conversion.c

src/cores.c

src/dqueue.c

src/dqueue.h

src/dqueue.pmt

src/drl_Node.h

src/drl_Node_3d.h

src/drl_graph.cpp

src/drl_graph.h

src/drl_graph_3d.cpp

src/drl_graph_3d.h

src/drl_layout.cpp

src/drl_layout.h

src/drl_layout_3d.cpp

src/drl_layout_3d.h

src/drl_parse.cpp

src/drl_parse.h

src/error.c

src/error.h

src/evolver_cit.c

src/f2c

src/f2c.h

src/f2c/d_lg10.c

src/f2c/d_sign.c

src/f2c/etime_.c

src/f2c/i_dnnt.c

src/f2c/pow_dd.c

src/f2c/pow_di.c

src/f2c/s_cmp.c

src/f2c/s_copy.c

src/fast_community.c

src/flow.c

src/foreign-gml-lexer.c

src/foreign-gml-lexer.l

src/foreign-gml-parser.c

src/foreign-gml-parser.h

src/foreign-gml-parser.y

src/foreign-graphml.c

src/foreign-lgl-lexer.c

src/foreign-lgl-lexer.l

src/foreign-lgl-parser.c

src/foreign-lgl-parser.h

src/foreign-lgl-parser.y

src/foreign-ncol-lexer.c

src/foreign-ncol-lexer.l

src/foreign-ncol-parser.c

src/foreign-ncol-parser.h

src/foreign-ncol-parser.y

src/foreign-pajek-lexer.c

src/foreign-pajek-lexer.l

src/foreign-pajek-parser.c

src/foreign-pajek-parser.h

src/foreign-pajek-parser.y

src/foreign.c

src/forestfire.c

src/games.c

src/gengraph_box_list.cpp

src/gengraph_box_list.h

src/gengraph_definitions.h

src/gengraph_degree_sequence.cpp

src/gengraph_degree_sequence.h

src/gengraph_graph_molloy_hash.cpp

src/gengraph_graph_molloy_hash.h

src/gengraph_graph_molloy_optimized.cpp

src/gengraph_graph_molloy_optimized.h

src/gengraph_hash.h

src/gengraph_header.h

src/gengraph_mr-connected.cpp

src/gengraph_powerlaw.cpp

src/gengraph_powerlaw.h

src/gengraph_qsort.h

src/gengraph_random.cpp

src/gengraph_random.h

src/gengraph_vertex_cover.h

src/gml_tree.c

src/gml_tree.h

src/heap.c

src/heap.h

src/heap.pmt

src/igraph.h

src/igraph_buckets.c

src/igraph_grid.c

src/igraph_hashtable.c

src/igraph_heap.c

src/igraph_math.h

src/igraph_pmt.h

src/igraph_pmt_off.h

src/igraph_psumtree.c

src/igraph_set.c

src/igraph_stack.c

src/igraph_strvector.c

src/igraph_trie.c

src/interrupt.c

src/interrupt.h

src/iterators.c

src/lapack

src/lapack/dgeqr2.c

src/lapack/dlabad.c

src/lapack/dlacon.c

src/lapack/dlacpy.c

src/lapack/dladiv.c

src/lapack/dlae2.c

src/lapack/dlaev2.c

src/lapack/dlaexc.c

src/lapack/dlahqr.c

src/lapack/dlaln2.c

src/lapack/dlamch.c

src/lapack/dlange.c

src/lapack/dlanhs.c

src/lapack/dlanst.c

src/lapack/dlanv2.c

src/lapack/dlapy2.c

src/lapack/dlarf.c

src/lapack/dlarfg.c

src/lapack/dlarfx.c

src/lapack/dlarnv.c

src/lapack/dlartg.c

src/lapack/dlaruv.c

src/lapack/dlascl.c

src/lapack/dlaset.c

src/lapack/dlasr.c

src/lapack/dlasrt.c

src/lapack/dlassq.c

src/lapack/dlasy2.c

src/lapack/dorm2r.c

src/lapack/dsteqr.c

src/lapack/dtrevc.c

src/lapack/dtrexc.c

src/lapack/dtrsen.c

src/lapack/dtrsyl.c

src/lapack/xerbla.c

src/layout.c

src/math.c

src/matrix.c

src/matrix.h

src/matrix.pmt

src/measure_dynamics.c

src/memory.c

src/memory.h

src/motifs.c

src/operators.c

src/other.c

src/pottsmodel_2.cpp

src/pottsmodel_2.h

src/progress.c

src/random.c

src/random.h

src/revolver_cit.c

src/revolver_grow.c

src/revolver_ml_cit.c

src/spectral_properties.c

src/spmatrix.c

src/stack.h

src/stack.pmt

src/structural_properties.c

src/structure_generators.c

src/topology.c

src/type_indexededgelist.c

src/types.h

src/vector.c

src/vector.h

src/vector.pmt

src/vector_ptr.c

src/visitors.c

src/walktrap.cpp

src/walktrap_communities.cpp

src/walktrap_communities.h

src/walktrap_graph.cpp

src/walktrap_graph.h

src/walktrap_heap.cpp

src/walktrap_heap.h

src/zeroin.c

tests

tests/Makefile.am

tests/Makefile.in

tests/arpack.at

tests/atlocal.in

tests/attributes.at

tests/basic.at

tests/bipartite.at

tests/cliques.at

tests/community.at

tests/components.at

tests/conversion.at

tests/eigen.at

tests/flow.at

tests/foreign.at

tests/iterators.at

tests/layout.at

tests/motifs.at

tests/operators.at

tests/other.at

tests/package.m4

tests/structural_properties.at

tests/structure_generators.at

tests/testsuite

tests/testsuite.at

tests/topology.at

tests/types.at

tests/visitors.at

ylwrap

Show diffs side-by-side

added added

removed removed

src/community.c

/* -*- mode: C -*- */

/* vim:set ts=2 sw=2 sts=2 et: */

IGraph library.

MTA RMKI, Konkoly-Thege Miklos st. 29-33, Budapest 1121, Hungary

This program is free software; you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by

the Free Software Foundation; either version 2 of the License, or

(at your option) any later version.

This program is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

GNU General Public License for more details.

You should have received a copy of the GNU General Public License

along with this program; if not, write to the Free Software

Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

02110-1301 USA

#include "igraph.h"

#include "memory.h"

#include "random.h"

#include "arpack.h"

#include "arpack_internal.h"

#include "config.h"

#include <string.h>

#include <math.h>

/**

* \function igraph_community_eb_get_merges

* \brief Calculating the merges, ie. the dendrogram for an edge betweenness community structure

* </para><para>

* This function is handy if you have a sequence of edge which are

* gradually removed from the network and you would like to know how

* the network falls apart into separate components. The edge sequence

* may come from the \ref igraph_community_edge_betweenness()

* function, but this is not neccessary. Note that \ref

* igraph_community_edge_betweenness can also calculate the

* dendrogram, via its \p merges argument.

* \param graph The input graph.

* \param edges Vector containing the edges to be removed from the

* network, all edges are expected to appear exactly once in the

* vector.

* \param res Pointer to an initialized matrix, if not NULL then the

* dendrogram will be stored here, in the same form as for the \ref

* igraph_community_walktrap() function: the matrix has two columns

* and each line is a merge given by the ids of the merged

* components. The component ids are number from zero and

* component ids smaller than the number of vertices in the graph

* belong to individual vertices. The non-trivial components

* containing at least two vertices are numbered from \c n, \c n is

* the number of vertices in the graph. So if the first line

* contains \c a and \c b that means that components \c a and \c b

* are merged into component \c n, the second line creates

* component \c n+1, etc. The matrix will be resized as needed.

* \param bridges Pointer to an initialized vector or NULL. If not

* null then the index of the edge removals which split the network

* will be stored here. The vector will be resized as needed.

* \return Error code.

* \sa \ref igraph_community_edge_betweenness().

* Time complexity: O(|E|+|V|log|V|), |V| is the number of vertices,

* |E| is the number of edges.

int igraph_community_eb_get_merges(const igraph_t *graph,

const igraph_vector_t *edges,

igraph_matrix_t *res,

igraph_vector_t *bridges) {

long int no_of_nodes=igraph_vcount(graph);

igraph_vector_t ptr;

long int i, midx=0;

igraph_integer_t no_comps;

IGRAPH_CHECK(igraph_clusters(graph, 0, 0, &no_comps, IGRAPH_WEAK));

IGRAPH_VECTOR_INIT_FINALLY(&ptr, no_of_nodes*2-1);

if (res) {

IGRAPH_CHECK(igraph_matrix_resize(res, no_of_nodes-no_comps, 2));

}

if (bridges) {

IGRAPH_CHECK(igraph_vector_resize(bridges, no_of_nodes-no_comps));

}

for (i=igraph_vector_size(edges)-1; i>=0; i--) {

long int edge=VECTOR(*edges)[i];

igraph_integer_t from, to;

long int c1, c2, idx;

igraph_edge(graph, edge, &from, &to);

100

idx=from+1;

101

while (VECTOR(ptr)[idx-1] != 0) {

102

idx=VECTOR(ptr)[idx-1];

103

}

104

c1=idx-1;

105

idx=to+1;

106

while (VECTOR(ptr)[idx-1] != 0) {

107

idx=VECTOR(ptr)[idx-1];

108

}

109

c2=idx-1;

110

if (c1 != c2) { /* this is a merge */

111

if (res) {

112

MATRIX(*res, midx, 0)=c1;

113

MATRIX(*res, midx, 1)=c2;

114

}

115

if (bridges) {

116

VECTOR(*bridges)[midx]=i+1;

117

}

118

119

VECTOR(ptr)[c1]=no_of_nodes+midx+1;

120

VECTOR(ptr)[c2]=no_of_nodes+midx+1;

121

VECTOR(ptr)[(long int)from]=no_of_nodes+midx+1;

122

VECTOR(ptr)[(long int)to]=no_of_nodes+midx+1;

123

124

midx++;

125

}

126

}

127

128

igraph_vector_destroy(&ptr);

129

IGRAPH_FINALLY_CLEAN(1);

130

131

return 0;

132

}

133

134

/* Find the smallest active element in the vector */

135

long int igraph_i_vector_which_max_not_null(const igraph_vector_t *v,

136

const char *passive) {

137

long int which, i=0, size=igraph_vector_size(v);

138

igraph_real_t max;

139

while (passive[i]) {

140

i++;

141

}

142

which=i;

143

max=VECTOR(*v)[which];

144

for (i++; i<size; i++) {

145

igraph_real_t elem=VECTOR(*v)[i];

146

if (!passive[i] && elem > max) {

147

max=elem;

148

which=i;

149

}

150

}

151

152

return which;

153

}

154

155

/**

156

* \function igraph_community_edge_betweenness

157

* \brief Community findinf based on edge betweenness

158

159

* Community structure detection based on the betweenness of the edges

160

* in the network. The algorithm was invented by M. Girvan and

161

* M. Newman, see: M. Girvan and M. E. J. Newman: Community structure in

162

* social and biological networks, Proc. Nat. Acad. Sci. USA 99, 7821-7826

163

* (2002).

164

165

* </para><para>

166

* The idea is that the betweenness of the edges connecting two

167

* communities is typically high, as many of the shortest paths

168

* between nodes in separate communities go through them. So we

169

* gradually remove the edge with highest betweenness from the

170

* network, and recalculate edge betweenness after every removal.

171

* This way sooner or later the network falls off to two components,

172

* then after a while one of these components falls off to two smaller

173

* components, etc. until all edges are removed. This is a divisive

174

* hieararchical approach, the result is a dendrogram.

175

* \param graph The input graph.

176

* \param result Pointer to an initialized vector, the result will be

177

* stored here, the ids of the removed edges in the order of their

178

* removal. It will be resized as needed.

179

* \param edge_betweenness Pointer to an initialized vector or

180

* NULL. In the former case the edge betweenness of the removed

181

* edge is stored here. The vector will be resized as needed.

182

* \param merges Pointer to an initialized matrix or NULL. If not NULL

183

* then merges performed by the algorithm are stored here. Even if

184

* this is a divisive algorithm, we can replay it backwards and

185

* note which two clusters were merged. Clusters are numbered from

186

* zero, see the \p merges argument of \ref

187

* igraph_community_walktrap() for details. The matrix will be

188

* resized as needed.

189

* \param bridges Pointer to an initialized vector of NULL. If not

190

* NULL then all edge removals which separated the network into

191

* more components are marked here.

192

* \param directed Logical constant, whether to calculate directed

193

* betweenness (ie. directed paths) for directed graphs. It is

194

* ignored for undirected graphs.

195

* \return Error code.

196

197

* \sa \ref igraph_community_eb_get_merges(), \ref

198

* igraph_community_spinglass(), \ref igraph_community_walktrap().

199

200

* Time complexity: O(|V|^3), as the betweenness calculation requires

201

* O(|V|^2) and we do it |V|-1 times.

202

203

204

int igraph_community_edge_betweenness(const igraph_t *graph,

205

igraph_vector_t *result,

206

igraph_vector_t *edge_betweenness,

207

igraph_matrix_t *merges,

208

igraph_vector_t *bridges,

209

igraph_bool_t directed) {

210

211

long int no_of_nodes=igraph_vcount(graph);

212

long int no_of_edges=igraph_ecount(graph);

213

igraph_dqueue_t q=IGRAPH_DQUEUE_NULL;

214

long int *distance, *nrgeo;

215

double *tmpscore;

216

igraph_stack_t stack=IGRAPH_STACK_NULL;

217

long int source, i, e;

218

219

igraph_adjedgelist_t elist_out, elist_in;

220

igraph_adjedgelist_t *elist_out_p, *elist_in_p;

221

igraph_vector_t *neip;

222

long int neino;

223

igraph_integer_t modein, modeout;

224

igraph_vector_t eb;

225

long int maxedge, pos;

226

igraph_integer_t from, to;

227

228

char *passive;

229

230

directed=directed && igraph_is_directed(graph);

231

if (directed) {

232

modeout=IGRAPH_OUT;

233

modeout=IGRAPH_IN;

234

IGRAPH_CHECK(igraph_adjedgelist_init(graph, &elist_out, IGRAPH_OUT));

235

IGRAPH_FINALLY(igraph_adjedgelist_destroy, &elist_out);

236

IGRAPH_CHECK(igraph_adjedgelist_init(graph, &elist_in, IGRAPH_IN));

237

IGRAPH_FINALLY(igraph_adjedgelist_destroy, &elist_in);

238

elist_out_p=&elist_out;

239

elist_in_p=&elist_in;

240

} else {

241

modeout=modein=IGRAPH_ALL;

242

IGRAPH_CHECK(igraph_adjedgelist_init(graph, &elist_out, IGRAPH_ALL));

243

IGRAPH_FINALLY(igraph_adjedgelist_destroy, &elist_out);

244

elist_out_p=elist_in_p=&elist_out;

245

}

246

247

distance=igraph_Calloc(no_of_nodes, long int);

248

if (distance==0) {

249

IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);

250

}

251

IGRAPH_FINALLY(igraph_free, distance);

252

nrgeo=igraph_Calloc(no_of_nodes, long int);

253

if (nrgeo==0) {

254

IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);

255

}

256

IGRAPH_FINALLY(igraph_free, nrgeo);

257

tmpscore=igraph_Calloc(no_of_nodes, double);

258

if (tmpscore==0) {

259

IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);

260

}

261

IGRAPH_FINALLY(igraph_free, tmpscore);

262

263

IGRAPH_DQUEUE_INIT_FINALLY(&q, 100);

264

IGRAPH_CHECK(igraph_stack_init(&stack, no_of_nodes));

265

IGRAPH_FINALLY(igraph_stack_destroy, &stack);

266

267

IGRAPH_CHECK(igraph_vector_resize(result, no_of_edges));

268

if (edge_betweenness) {

269

IGRAPH_CHECK(igraph_vector_resize(edge_betweenness, no_of_edges));

270

VECTOR(*edge_betweenness)[no_of_edges-1]=0;

271

}

272

273

IGRAPH_VECTOR_INIT_FINALLY(&eb, no_of_edges);

274

275

passive=igraph_Calloc(no_of_edges, char);

276

if (!passive) {

277

IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);

278

}

279

IGRAPH_FINALLY(igraph_free, passive);

280

281

for (e=0; e<no_of_edges; e++) {

282

283

igraph_vector_null(&eb);

284

285

for (source=0; source<no_of_nodes; source++) {

286

287

/* This will contain the edge betweenness in the current step */

288

IGRAPH_ALLOW_INTERRUPTION();

289

290

memset(distance, 0, no_of_nodes*sizeof(long int));

291

memset(nrgeo, 0, no_of_nodes*sizeof(long int));

292

memset(tmpscore, 0, no_of_nodes*sizeof(double));

293

igraph_stack_clear(&stack); /* it should be empty anyway... */

294

295

IGRAPH_CHECK(igraph_dqueue_push(&q, source));

296

297

nrgeo[source]=1;

298

distance[source]=0;

299

300

while (!igraph_dqueue_empty(&q)) {

301

long int actnode=igraph_dqueue_pop(&q);

302

303

neip=igraph_adjedgelist_get(elist_out_p, actnode);

304

neino=igraph_vector_size(neip);

305

for (i=0; i<neino; i++) {

306

igraph_integer_t edge=VECTOR(*neip)[i], from, to;

307

long int neighbor;

308

igraph_edge(graph, edge, &from, &to);

309

neighbor = actnode!=from ? from : to;

310

if (nrgeo[neighbor] != 0) {

311

/* we've already seen this node, another shortest path? */

312

if (distance[neighbor]==distance[actnode]+1) {

313

nrgeo[neighbor]+=nrgeo[actnode];

314

}

315

} else {

316

/* we haven't seen this node yet */

317

nrgeo[neighbor]+=nrgeo[actnode];

318

distance[neighbor]=distance[actnode]+1;

319

IGRAPH_CHECK(igraph_dqueue_push(&q, neighbor));

320

IGRAPH_CHECK(igraph_stack_push(&stack, neighbor));

321

}

322

}

323

} /* while !igraph_dqueue_empty */

324

325

/* Ok, we've the distance of each node and also the number of

326

shortest paths to them. Now we do an inverse search, starting

327

with the farthest nodes. */

328

while (!igraph_stack_empty(&stack)) {

329

long int actnode=igraph_stack_pop(&stack);

330

if (distance[actnode]<1) { continue; } /* skip source node */

331

332

/* set the temporary score of the friends */

333

neip=igraph_adjedgelist_get(elist_in_p, actnode);

334

neino=igraph_vector_size(neip);

335

for (i=0; i<neino; i++) {

336

igraph_integer_t from, to;

337

long int neighbor;

338

long int edgeno=VECTOR(*neip)[i];

339

igraph_edge(graph, edgeno, &from, &to);

340

neighbor= actnode != from ? from : to;

341

if (distance[neighbor]==distance[actnode]-1 &&

342

nrgeo[neighbor] != 0) {

343

tmpscore[neighbor] +=

344

(tmpscore[actnode]+1)*nrgeo[neighbor]/nrgeo[actnode];

345

VECTOR(eb)[edgeno] +=

346

(tmpscore[actnode]+1)*nrgeo[neighbor]/nrgeo[actnode];

347

}

348

}

349

}

350

/* Ok, we've the scores for this source */

351

} /* for source <= no_of_nodes */

352

353

/* Now look for the smallest edge betweenness */

354

/* and eliminate that edge from the network */

355

maxedge=igraph_i_vector_which_max_not_null(&eb, passive);

356

VECTOR(*result)[e]=maxedge;

357

if (edge_betweenness) {

358

VECTOR(*edge_betweenness)[e]=VECTOR(eb)[maxedge];

359

if (!directed) {

360

VECTOR(*edge_betweenness)[e] /= 2.0;

361

}

362

}

363

passive[maxedge]=1;

364

igraph_edge(graph, maxedge, &from, &to);

365

366

neip=igraph_adjedgelist_get(elist_in_p, to);

367

neino=igraph_vector_size(neip);

368

igraph_vector_search(neip, 0, maxedge, &pos);

369

VECTOR(*neip)[pos]=VECTOR(*neip)[neino-1];

370

igraph_vector_pop_back(neip);

371

372

neip=igraph_adjedgelist_get(elist_out_p, from);

373

neino=igraph_vector_size(neip);

374

igraph_vector_search(neip, 0, maxedge, &pos);

375

VECTOR(*neip)[pos]=VECTOR(*neip)[neino-1];

376

igraph_vector_pop_back(neip);

377

}

378

379

igraph_free(passive);

380

igraph_vector_destroy(&eb);

381

igraph_stack_destroy(&stack);

382

igraph_dqueue_destroy(&q);

383

igraph_free(tmpscore);

384

igraph_free(nrgeo);

385

igraph_free(distance);

386

IGRAPH_FINALLY_CLEAN(7);

387

388

if (directed) {

389

igraph_adjedgelist_destroy(&elist_out);

390

igraph_adjedgelist_destroy(&elist_in);

391

IGRAPH_FINALLY_CLEAN(2);

392

} else {

393

igraph_adjedgelist_destroy(&elist_out);

394

IGRAPH_FINALLY_CLEAN(1);

395

}

396

397

if (merges || bridges) {

398

IGRAPH_CHECK(igraph_community_eb_get_merges(graph, result, merges, bridges));

399

}

400

401

return 0;

402

}

403

404

405

/**

406

* \function igraph_community_to_membership

407

* \brief Create membership vector from community structure dendrogram

408

409

* This function creates a membership vector from a community

410

* structure dendrogram. A membership vector contains for each vertex

411

* the id of its graph component, the graph components are numbered

412

* from zero, see the same argument of \ref igraph_clusters() for an

413

* example of a membership vector.

414

415

* </para><para>

416

* Many community detection algorithms return with a \em merges

417

* matrix, \ref igraph_community_walktrap() an \ref

418

* igraph_community_edge_betweenness() are two examples. The matrix

419

* contains the merge operations performed while mapping the

420

* hierarchical structure of a network. If the matrix has \c n-1 rows,

421

* where \c n is the number of vertices in the graph, then it contains

422

* the hierarchical structure of the whole network and it is called a

423

* dendrogram.

424

425

* </para><para>

426

* This function performs \p steps merge operations as prescribed by

427

* the \p merges matrix and returns the current state of the network.

428

429

* </para><para>

430

* If if \p merges is not a complete dendrogram, it is possible to

431

* take \p steps steps if \p steps is not bigger than the number

432

* lines in \p merges.

433

* \param merges The two-column matrix containing the merge

434

* operations. See \ref igraph_community_walktrap() for the

435

* detailed syntax.

436

* \param nodes The number of leaf nodes in the dendrogram

437

* \param steps Integer constant, the number of steps to take.

438

* \param membership Pointer to an initialied vector, the membership

439

* results will be stored here, if not NULL. The vector will be

440

* resized as needed.

441

* \param csize Pointer to an initialized vector, or NULL. If not NULL

442

* then the sizes of the components will be stored here, the vector

443

* will be resized as needed.

444

445

* \sa \ref igraph_community_walktrap(), \ref

446

* igraph_community_edge_betweenness(), \ref

447

* igraph_community_fastgreedy() for community structure detection

448

* algorithms.

449

450

* Time complexity: O(|V|), the number of vertices in the graph.

451

452

453

int igraph_community_to_membership(const igraph_matrix_t *merges,

454

igraph_integer_t nodes,

455

igraph_integer_t steps,

456

igraph_vector_t *membership,

457

igraph_vector_t *csize) {

458

459

long int no_of_nodes=nodes;

460

long int components=no_of_nodes-steps;

461

long int i, found=0;

462

igraph_vector_t tmp;

463

464

if (steps > igraph_matrix_nrow(merges)) {

465

IGRAPH_ERROR("`steps' to big or `merges' matrix too short", IGRAPH_EINVAL);

466

}

467

468

if (membership) {

469

IGRAPH_CHECK(igraph_vector_resize(membership, no_of_nodes));

470

igraph_vector_null(membership);

471

}

472

if (csize) {

473

IGRAPH_CHECK(igraph_vector_resize(csize, components));

474

igraph_vector_null(csize);

475

}

476

477

IGRAPH_VECTOR_INIT_FINALLY(&tmp, steps);

478

479

for (i=steps-1; i>=0; i--) {

480

long int c1=MATRIX(*merges, i, 0);

481

long int c2=MATRIX(*merges, i, 1);

482

483

/* new component? */

484

if (VECTOR(tmp)[i]==0) {

485

found++;

486

VECTOR(tmp)[i]=found;

487

}

488

489

if (c1<no_of_nodes) {

490

long int cid=VECTOR(tmp)[i]-1;

491

if (membership) { VECTOR(*membership)[c1]=cid+1; }

492

if (csize) { VECTOR(*csize)[cid] += 1; }

493

} else {

494

VECTOR(tmp)[c1-no_of_nodes]=VECTOR(tmp)[i];

495

}

496

497

if (c2<no_of_nodes) {

498

long int cid=VECTOR(tmp)[i]-1;

499

if (membership) { VECTOR(*membership)[c2]=cid+1; }

500

if (csize) { VECTOR(*csize)[cid] += 1; }

501

} else {

502

VECTOR(tmp)[c2-no_of_nodes]=VECTOR(tmp)[i];

503

}

504

505

}

506

507

if (membership || csize) {

508

for (i=0; i<no_of_nodes; i++) {

509

long int tmp=VECTOR(*membership)[i];

510

if (tmp!=0) {

511

if (membership) {

512

VECTOR(*membership)[i]=tmp-1;

513

}

514

} else {

515

if (csize) {

516

VECTOR(*csize)[found]+=1;

517

}

518

if (membership) {

519

VECTOR(*membership)[i]=found;

520

}

521

found++;

522

}

523

}

524

}

525

526

igraph_vector_destroy(&tmp);

527

IGRAPH_FINALLY_CLEAN(1);

528

529

return 0;

530

}

531

532

/**

533

* \function igraph_modularity

534

* \brief Calculate the modularity of a graph with respect to some vertex types

535

536

* The modularity of a graph with respect to some division (or vertex

537

* types) measures how good the division is, or how separated are the

538

* different vertex types from each other. It defined as

539

* Q=1/(2m) * sum(Aij-ki*kj/(2m)delta(ci,cj),i,j), here `m' is the

540

* number of edges, `Aij' is the element of the `A' adjacency matrix

541

* in row `i' and column `j', `ki' is the degree of `i', `kj' is the

542

* degree of `j', `ci' is the type (or component) of `i', `cj' that of

543

* `j', the sum goes over all `i' and `j' pairs of vertices, and

544

* `delta(x,y)' is one if x=y and zero otherwise.

545

546

* </para><para>

547

* Modularity on weighted graphs is also meaningful. When taking edge

548

* weights into account, `Aij' becomes the weight of the corresponding

549

* edge (or 0 if there is no edge), `ki' is the total weight of edges

550

* adjacent to vertex `i', `kj' is the total weight of edges adjacent

551

* to vertex `j' and `m' is the total weight of all edges.

552

553

* </para><para>

554

* See also MEJ Newman and M Girvan: Finding and evaluating community

555

* structure in networks. Physical Review E 69 026113, 2004.

556

* \param graph The input graph.

557

* \param membership Numeric vector which gives the type of each

558

* vertex, ie. the component to which it belongs.

559

* \param modularity Pointer to a real number, the result will be

560

* stored here.

561

* \param weights Weight vector or NULL if no weights are specified.

562

* \return Error code.

563

564

* Time complexity: O(|V|+|E|), the number of vertices plus the number

565

* of edges.

566

567

568

int igraph_modularity(const igraph_t *graph,

569

const igraph_vector_t *membership,

570

igraph_real_t *modularity,

571

const igraph_vector_t *weights) {

572

573

igraph_vector_t e, a;

574

long int types=igraph_vector_max(membership)+1;

575

long int no_of_edges=igraph_ecount(graph);

576

long int i;

577

igraph_integer_t from, to, m;

578

long int c1, c2;

579

580

IGRAPH_VECTOR_INIT_FINALLY(&e, types);

581

IGRAPH_VECTOR_INIT_FINALLY(&a, types);

582

583

if (weights) {

584

if (igraph_vector_size(weights) < no_of_edges)

585

IGRAPH_ERROR("cannot calculate modularity, weight vector too short",

586

IGRAPH_EINVAL);

587

m=igraph_vector_sum(weights);

588

for (i=0; i<no_of_edges; i++) {

589

igraph_real_t w=VECTOR(*weights)[i];

590

igraph_edge(graph, i, &from, &to);

591

c1=VECTOR(*membership)[(long int)from];

592

c2=VECTOR(*membership)[(long int)to];

593

if (c1==c2) VECTOR(e)[c1] += 2*w;

594

VECTOR(a)[c1] += w;

595

VECTOR(a)[c2] += w;

596

}

597

} else {

598

m=no_of_edges;

599

for (i=0; i<no_of_edges; i++) {

600

igraph_edge(graph, i, &from, &to);

601

c1=VECTOR(*membership)[(long int)from];

602

c2=VECTOR(*membership)[(long int)to];

603

if (c1==c2) VECTOR(e)[c1] += 2;

604

VECTOR(a)[c1] += 1;

605

VECTOR(a)[c2] += 1;

606

}

607

}

608

609

*modularity=0.0;

610

for (i=0; i<types; i++) {

611

igraph_real_t tmp=VECTOR(a)[i]/2/m;

612

*modularity += VECTOR(e)[i]/2/m;

613

*modularity -= tmp*tmp;

614

}

615

616

igraph_vector_destroy(&e);

617

igraph_vector_destroy(&a);

618

IGRAPH_FINALLY_CLEAN(2);

619

620

return 0;

621

}

622

623

/**

624

* \section about_leading_eigenvector_methods

625

626

* <para>

627

* The functions documented in these section implement the

628

* <quote>leading eigenvector</quote> method developed by Mark Newman and

629

* published in MEJ Newman: Finding community structure using the

630

* eigenvectors of matrices, arXiv:physics/0605087. TODO: proper

631

* citation.</para>

632

633

* <para>

634

* The heart of the method is the definition of the modularity matrix,

635

* B, which is B=A-P, A being the adjacency matrix of the (undirected)

636

* network, and P contains the probability that certain edges are

637

* present according to the <quote>configuration model</quote> In

638

* other words, a Pij element of P is the probability that there is an

639

* edge between vertices i and j in a random network in which the

640

* degrees of all vertices are the same as in the input graph.</para>

641

642

* <para>

643

* The leading eigenvector method works by calculating the eigenvector

644

* of the modularity matrix for the largest positive eigenvalue and

645

* then separating vertices into two community based on the sign of

646

* the corresponding element in the eigenvector. If all elements in

647

* the eigenvector are of the same sign that means that the network

648

* has no underlying comuunity structure.

649

* Check Newman's paper to understand why this is a good method for

650

* detecting community structure. </para>

651

652

* <para>

653

* Three function are implemented, they all work accoding to the same

654

* principles. The simplest is perhaps \ref

655

* igraph_community_leading_eigenvector_naive(). This function splits

656

* the network as described above and then recursively splits the

657

* two components after the split as individual networks, if possible.

658

* This however is not a good way for maximizing moduilarity, again

659

* see the paper for explanation and the proper definition of

660

* modularity.</para>

661

662

* <para>

663

* The correct recursive community structure detection method is

664

* implemented in \ref igraph_community_leading_eigenvector().

665

* Here, after the initial split, the following splits are done in a

666

* way to optimize modularity regarding the original network.

667

* I can't say it enough, see the paper, particularly section VI.

668

* </para>

669

670

* <para>

671

* The third function is \ref

672

* igraph_community_leading_eigenvector_step(), this starts from a

673

* division of the network and tries to split a given community into

674

* two subcommunities via the same (correct) method as \ref

675

* igraph_community_leading_eigenvector().

676

* </para>

677

678

679

typedef struct igraph_i_community_leading_eigenvector_naive_data_t {

680

igraph_vector_t *idx;

681

igraph_adjlist_t *adjlist;

682

} igraph_i_community_leading_eigenvector_naive_data_t;

683

684

int igraph_i_community_leading_eigenvector_naive(igraph_real_t *to,

685

const igraph_real_t *from,

686

long int n, void *extra) {

687

688

igraph_i_community_leading_eigenvector_naive_data_t *data=extra;

689

long int j, k, nlen, size=n;

690

igraph_vector_t *idx=data->idx;

691

igraph_adjlist_t *adjlist=data->adjlist;

692

igraph_real_t ktx, ktx2;

693

694

/* Calculate Ax first */

695

for (j=0; j<size; j++) {

696

long int oldid=VECTOR(*idx)[j];

697

igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);

698

nlen=igraph_vector_size(neis);

699

to[j]=0.0;

700

for (k=0; k<nlen; k++) {

701

long int nei=VECTOR(*neis)[k];

702

to[j] += from[nei];

703

}

704

}

705

706

/* Now calculate k^Tx/2m */

707

ktx=0.0; ktx2=0;

708

for (j=0; j<size; j++) {

709

long int oldid=VECTOR(*idx)[j];

710

igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);

711

long int degree=igraph_vector_size(neis);

712

ktx2 += degree;

713

ktx += from[j] * degree;

714

}

715

ktx = ktx/ ktx2;

716

717

/* Now calculate Bx */

718

for (j=0; j<size; j++) {

719

long int oldid=VECTOR(*idx)[j];

720

igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);

721

long int degree=igraph_vector_size(neis);

722

to[j] = to[j] - ktx*degree + degree*degree*from[j]/ktx2;

723

}

724

725

return 0;

726

}

727

728

/**

729

* \ingroup communities

730

* \function igraph_community_leading_eigenvector_naive

731

* \brief Leading eigenvector community finding (naive version).

732

733

* A naive implementation of Newman's eigenvector community structure

734

* detection. This function splits the network into two components

735

* according to the leading eigenvector of the modularity matrix and

736

* then recursively takes \p steps steps by splitting the components

737

* as individual network. This is not the correct way however, see

738

* MEJ Newman: Finding community structure in networks using the

739

* eigenvectors of matrices, arXiv:physics/0605087. Consider using the

740

* correct \ref igraph_community_leading_eigenvector() function instead.

741

* \param graph The input graph, should be undirected to make sense.

742

* \param merges The merge matrix. The splits done by the algorithm

743

* are stored here, its structure is the same ad for \ref

744

* igraph_community_leading_eigenvector(). This argument is ignored

745

* if it is \c NULL.

746

* \param membership The membership vector, for each vertex it gives

747

* the id of its community after all the splits are performed.

748

* This argument is ignored if it is \c NULL.

749

* \param steps The number of splits to do, if possible. Supply the

750

* number of vertices in the network here to perform as many steps

751

* as possible.

752

* \param options The options for ARPACK. \c n is always

753

* overwritten. \c ncv is set to at least 3.

754

* \return Error code.

755

756

* \sa \ref igraph_community_leading_eigenvector() for the proper way,

757

* \ref igraph_community_leading_eigenvector_step() to do just one split.

758

759

* Time complexity: O(E|+|V|^2*steps), |V| is the number of vertices,

760

* |E| is the number of edges.

761

762

763

int igraph_community_leading_eigenvector_naive(const igraph_t *graph,

764

igraph_matrix_t *merges,

765

igraph_vector_t *membership,

766

igraph_integer_t steps,

767

igraph_arpack_options_t *options) {

768

769

long int no_of_nodes=igraph_vcount(graph);

770

igraph_dqueue_t tosplit;

771

igraph_vector_t mymerges;

772

igraph_vector_t idx;

773

long int staken=0;

774

igraph_adjlist_t adjlist;

775

long int i, j, k, l, m;

776

long int communities=1;

777

igraph_vector_t vmembership, *mymembership=membership;

778

igraph_i_community_leading_eigenvector_naive_data_t extra;

779

igraph_arpack_storage_t storage;

780

781

if (igraph_is_directed(graph)) {

782

IGRAPH_WARNING("This method was developed for undirected graphs");

783

}

784

785

if (steps < 0 || steps > no_of_nodes-1) {

786

steps=no_of_nodes-1;

787

}

788

789

if (!membership) {

790

mymembership=&vmembership;

791

IGRAPH_VECTOR_INIT_FINALLY(mymembership, 0);

792

}

793

794

IGRAPH_VECTOR_INIT_FINALLY(&mymerges, 0);

795

IGRAPH_CHECK(igraph_vector_reserve(&mymerges, steps*2));

796

IGRAPH_VECTOR_INIT_FINALLY(&idx, no_of_nodes);

797

IGRAPH_DQUEUE_INIT_FINALLY(&tosplit, 100);

798

IGRAPH_CHECK(igraph_adjlist_init(graph, &adjlist, IGRAPH_ALL));

799

IGRAPH_FINALLY(igraph_adjlist_destroy, &adjlist);

800

801

IGRAPH_CHECK(igraph_vector_resize(mymembership, no_of_nodes));

802

igraph_vector_null(mymembership);

803

804

igraph_dqueue_push(&tosplit, 0);

805

806

if (options->ncv<3) { options->ncv=3; }

807

808

/* Memory for ARPACK */

809

IGRAPH_CHECK(igraph_arpack_storage_init(&storage, no_of_nodes, options->ncv,

810

no_of_nodes, 1));

811

IGRAPH_FINALLY(igraph_arpack_storage_destroy, &storage);

812

extra.idx=&idx;

813

extra.adjlist=&adjlist;

814

815

while (!igraph_dqueue_empty(&tosplit) && staken < steps) {

816

long int comm=igraph_dqueue_pop_back(&tosplit); /* depth first search */

817

long int size=0;

818

819

IGRAPH_ALLOW_INTERRUPTION();

820

821

for (i=0; i<no_of_nodes; i++) {

822

if (VECTOR(*mymembership)[i]==comm) {

823

VECTOR(idx)[size++]=i;

824

}

825

}

826

827

/* now 'size' is the size of the current community and

828

idx[0:(size-1)] contains the original ids of the vertices in

829

the current community. We need this to index the neighbor list. */

830

831

staken++;

832

if (size==1) {

833

continue; /* nothing to do */

834

}

835

836

options->start=0;

837

options->n=size;

838

options->which[0]='L'; options->which[1]='A';

839

if (options->ncv<3) { options->ncv=3; }

840

if (options->ncv > options->n) { options->ncv=options->n; }

841

842

/* Call ARPACK solver */

843

IGRAPH_CHECK(igraph_arpack_rssolve(igraph_i_community_leading_eigenvector_naive,

844

&extra, options, &storage, 0, 0));

845

846

if (options->noiter > options->mxiter) {

847

IGRAPH_WARNING("Maximum number of ARPACK iterations reached");

848

}

849

850

/* just to have the always the same result, we multiply by -1

851

if the first (nonzero) element is not positive */

852

for (i=0; i<size; i++) {

853

if (storage.v[i] != 0) { break; }

854

}

855

if (storage.v[i]<0) {

856

for (; i<size; i++) {

857

storage.v[i] = - storage.v[i];

858

}

859

}

860

861

/* Ok, we have the eigenvector */

862

863

/* Non-positive eigenvalue */

864

/* printf("%f\n", storage.d[0]); */

865

if (storage.d[0] <= 0) { continue; }

866

867

/* We create an index vector in workd to renumber the vertices */

868

l=0; m=0;

869

for (j=0; j<size; j++) {

870

if (storage.v[j] <= 0) {

871

storage.workd[j]=l++;

872

} else {

873

storage.workd[j]=m++;

874

}

875

}

876

/* if l==0 or m==0 then there was no split */

877

if (l==0 || m==0) {

878

continue;

879

}

880

communities++;

881

882

/* Rewrite the adjacency lists */

883

for (j=0; j<size; j++) {

884

long int oldid=VECTOR(idx)[j];

885

igraph_vector_t *neis=igraph_adjlist_get(&adjlist, oldid);

886

long int n=igraph_vector_size(neis);

887

for (k=0; k<n; ) {

888

long int nei=VECTOR(*neis)[k];

889

if ( (storage.v[j] <= 0 && storage.v[nei] <= 0) ||

890

(storage.v[j] > 0 && storage.v[nei] > 0)) {

891

/* they remain in the same community */

892

VECTOR(*neis)[k] = storage.workd[nei];

893

k++;

894

} else {

895

/* nei in the other community, remove from neighbor list */

896

VECTOR(*neis)[k] = VECTOR(*neis)[n-1];

897

igraph_vector_pop_back(neis);

898

n--;

899

}

900

}

901

}

902

903

/* Also rewrite the mymembership vector */

904

for (j=0; j<size; j++) {

905

if (storage.v[j] <= 0) {

906

long int oldid=VECTOR(idx)[j];

907

VECTOR(*mymembership)[oldid]=communities-1;

908

}

909

}

910

911

/* Record merge */

912

IGRAPH_CHECK(igraph_vector_push_back(&mymerges, comm));

913

IGRAPH_CHECK(igraph_vector_push_back(&mymerges, communities-1));

914

915

/* Store the resulting communities in the queue if needed */

916

if (l > 1) {

917

IGRAPH_CHECK(igraph_dqueue_push(&tosplit, communities-1));

918

}

919

if (m > 1) {

920

IGRAPH_CHECK(igraph_dqueue_push(&tosplit, comm));

921

}

922

923

}

924

925

igraph_arpack_storage_destroy(&storage);

926

igraph_adjlist_destroy(&adjlist);

927

igraph_dqueue_destroy(&tosplit);

928

IGRAPH_FINALLY_CLEAN(3);

929

930

/* reform the mymerges vector into merges matrix */

931

if (merges) {

932

igraph_vector_null(&idx);

933

l=igraph_vector_size(&mymerges);

934

k=communities;

935

j=0;

936

IGRAPH_CHECK(igraph_matrix_resize(merges, l/2, 2));

937

for (i=l; i>0; i-=2) {

938

long int from=VECTOR(mymerges)[i-1];

939

long int to=VECTOR(mymerges)[i-2];

940

MATRIX(*merges, j, 0)=VECTOR(mymerges)[i-2];

941

MATRIX(*merges, j, 1)=VECTOR(mymerges)[i-1];

942

if (VECTOR(idx)[from]!=0) {

943

MATRIX(*merges, j, 1)=VECTOR(idx)[from]-1;

944

}

945

if (VECTOR(idx)[to]!=0) {

946

MATRIX(*merges, j, 0)=VECTOR(idx)[to]-1;

947

}

948

VECTOR(idx)[to]=++k;

949

j++;

950

}

951

}

952

953

igraph_vector_destroy(&idx);

954

igraph_vector_destroy(&mymerges);

955

IGRAPH_FINALLY_CLEAN(2);

956

957

if (!membership) {

958

igraph_vector_destroy(mymembership);

959

IGRAPH_FINALLY_CLEAN(1);

960

}

961

962

return 0;

963

}

964

965

typedef struct igraph_i_community_leading_eigenvector_data_t {

966

igraph_vector_t *idx;

967

igraph_vector_t *idx2;

968

igraph_adjlist_t *adjlist;

969

igraph_vector_t *tmp;

970

long int no_of_edges;

971

igraph_vector_t *mymembership;

972

long int comm;

973

} igraph_i_community_leading_eigenvector_data_t;

974

975

int igraph_i_community_leading_eigenvector(igraph_real_t *to,

976

const igraph_real_t *from,

977

long int n, void *extra) {

978

979

igraph_i_community_leading_eigenvector_data_t *data=extra;

980

long int j, k, nlen, size=n;

981

igraph_vector_t *idx=data->idx;

982

igraph_vector_t *idx2=data->idx2;

983

igraph_vector_t *tmp=data->tmp;

984

igraph_adjlist_t *adjlist=data->adjlist;

985

igraph_real_t ktx, ktx2;

986

long int no_of_edges=data->no_of_edges;

987

igraph_vector_t *mymembership=data->mymembership;

988

long int comm=data->comm;

989

990

for (j=0; j<size; j++) {

991

long int oldid=VECTOR(*idx)[j];

992

igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);

993

nlen=igraph_vector_size(neis);

994

to[j]=0.0;

995

VECTOR(*tmp)[j]=0.0;

996

for (k=0; k<nlen; k++) {

997

long int nei=VECTOR(*neis)[k];

998

if (VECTOR(*mymembership)[nei]==comm) {

999

to[j] += from[ (long int) VECTOR(*idx2)[nei] ];

1000

VECTOR(*tmp)[j] += 1;

1001

}

1002

}

1003

}

1004

1005

/* Now calculate k^Tx/2m */

1006

ktx=0.0; ktx2=0.0;

1007

for (j=0; j<size; j++) {

1008

long int oldid=VECTOR(*idx)[j];

1009

igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);

1010

long int degree=igraph_vector_size(neis);

1011

ktx += from[j] * degree;

1012

ktx2 += degree;

1013

}

1014

ktx = ktx / no_of_edges/2.0;

1015

ktx2 = ktx2 / no_of_edges/2.0;

1016

1017

/* Now calculate Bx */

1018

for (j=0; j<size; j++) {

1019

long int oldid=VECTOR(*idx)[j];

1020

igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);

1021

igraph_real_t degree=igraph_vector_size(neis);

1022

to[j] = to[j] - ktx*degree + degree*degree*from[j]/no_of_edges/2.0;

1023

VECTOR(*tmp)[j] = VECTOR(*tmp)[j] - ktx2*degree +

1024

degree*degree*1.0/no_of_edges/2.0;

1025

}

1026

1027

/* -d_ij summa l in G B_il */

1028

for (j=0; j<size; j++) {

1029

to[j] -= VECTOR(*tmp)[j] * from[j];

1030

}

1031

1032

return 0;

1033

}

1034

1035

/**

1036

* \ingroup communities

1037

* \function igraph_community_leading_eigenvector

1038

* \brief Leading eigenvector community finding (proper version).

1039

1040

* Newman's leading eigenvector method for detecting community

1041

* structure. This is the proper implementation of the recursive,

1042

* divisive algorithm: each split is done by maximizing the modularity

1043

* regarding the original network, see MEJ Newman: Finding community

1044

* structure in networks using the eigenvectors of matrices,

1045

* arXiv:physics/0605087.

1046

1047

* \param graph The undirected input graph.

1048

* \param merges The result of the algorithm, a matrix containing the

1049

* information about the splits performed. The matrix is built in

1050

* the opposite way however, it is like the result of an

1051

* agglomerative algorithm. If at the end of the algorithm (after

1052

* \p steps steps was done) there are <quote>p</quote> communities,

1053

* then these are numbered from zero to <quote>p-1</quote>. The

1054

* first line of the matrix contains the first <quote>merge</quote>

1055

* (which is in reality the last split) of two communities into

1056

* community <quote>p</quote>, the merge in the second line forms

1057

* community <quote>p+1</quote>, etc. The matrix should be

1058

* initialized before calling and will be resized as needed.

1059

* This argument is ignored of it is \c NULL.

1060

* \param membership The membership of the vertices after all the

1061

* splits were performed will be stored here. The vector must be

1062

* initialized before calling and will be resized as needed.

1063

* This argument is ignored if it is \c NULL.

1064

* \param steps The maximum number of steps to perform. It might

1065

* happen that some component (or the whole network) has no

1066

* underlying community structure and no further steps can be

1067

* done. If you wany as many steps as possible then supply the

1068

* number of vertices in the network here.

1069

* \param options The options for ARPACK. \c n is always

1070

* overwritten. \c ncv is set to at least 3.

1071

* \return Error code.

1072

1073

* \sa \ref igraph_community_walktrap() and \ref

1074

* igraph_community_spinglass() for other community structure

1075

* detection methods.

1076

1077

* Time complexity: O(|E|+|V|^2*steps), |V| is the number of vertices,

1078

* |E| the number of edges, <quote>steps</quote> the number of splits

1079

* performed.

1080

1081

1082

int igraph_community_leading_eigenvector(const igraph_t *graph,

1083

igraph_matrix_t *merges,

1084

igraph_vector_t *membership,

1085

igraph_integer_t steps,

1086

igraph_arpack_options_t *options) {

1087

1088

long int no_of_nodes=igraph_vcount(graph);

1089

long int no_of_edges=igraph_ecount(graph);

1090

igraph_dqueue_t tosplit;

1091

igraph_vector_t idx, idx2, mymerges;

1092

igraph_vector_t tmp;

1093

long int staken=0;

1094

igraph_adjlist_t adjlist;

1095

long int i, j, k, l;

1096

long int communities=1;

1097

igraph_vector_t vmembership, *mymembership=membership;

1098

igraph_i_community_leading_eigenvector_data_t extra;

1099

igraph_arpack_storage_t storage;

1100

1101

if (igraph_is_directed(graph)) {

1102

IGRAPH_WARNING("This method was developed for undirected graphs");

1103

}

1104

1105

if (steps < 0 || steps > no_of_nodes-1) {

1106

steps=no_of_nodes-1;

1107

}

1108

1109

if (steps > no_of_nodes-1) {

1110

steps=no_of_nodes-1;

1111

}

1112

1113

if (!membership) {

1114

mymembership=&vmembership;

1115

IGRAPH_VECTOR_INIT_FINALLY(mymembership, 0);

1116

}

1117

1118

IGRAPH_VECTOR_INIT_FINALLY(&mymerges, 0);

1119

IGRAPH_CHECK(igraph_vector_reserve(&mymerges, steps*2));

1120

1121

IGRAPH_VECTOR_INIT_FINALLY(&idx, no_of_nodes);

1122

IGRAPH_VECTOR_INIT_FINALLY(&idx2, no_of_nodes);

1123

IGRAPH_VECTOR_INIT_FINALLY(&tmp, no_of_nodes);

1124

IGRAPH_DQUEUE_INIT_FINALLY(&tosplit, 100);

1125

IGRAPH_CHECK(igraph_adjlist_init(graph, &adjlist, IGRAPH_ALL));

1126

IGRAPH_FINALLY(igraph_adjlist_destroy, &adjlist);

1127

1128

IGRAPH_CHECK(igraph_vector_resize(mymembership, no_of_nodes));

1129

igraph_vector_null(mymembership);

1130

1131

igraph_dqueue_push(&tosplit, 0);

1132

1133

if (options->ncv<3) { options->ncv=3; }

1134

1135

/* Memory for ARPACK */

1136

IGRAPH_CHECK(igraph_arpack_storage_init(&storage, no_of_nodes, options->ncv,

1137

no_of_nodes, 1));

1138

IGRAPH_FINALLY(igraph_arpack_storage_destroy, &storage);

1139

extra.idx=&idx;

1140

extra.idx2=&idx2;

1141

extra.tmp=&tmp;

1142

extra.adjlist=&adjlist;

1143

extra.no_of_edges=no_of_edges;

1144

extra.mymembership=mymembership;

1145

1146

while (!igraph_dqueue_empty(&tosplit) && staken < steps) {

1147

long int comm=igraph_dqueue_pop_back(&tosplit); /* depth first search */

1148

long int size=0;

1149

1150

IGRAPH_ALLOW_INTERRUPTION();

1151

1152

for (i=0; i<no_of_nodes; i++) {

1153

if (VECTOR(*mymembership)[i]==comm) {

1154

VECTOR(idx)[size]=i;

1155

VECTOR(idx2)[i]=size++;

1156

}

1157

}

1158

1159

staken++;

1160

if (size==1) {

1161

continue;

1162

}

1163

1164

options->start=0;

1165

options->n=size;

1166

options->which[0]='L'; options->which[1]='A';

1167

if (options->ncv < 3) { options->ncv=3; }

1168

if (options->ncv > options->n) { options->ncv=options->n; }

1169

extra.comm=comm;

1170

1171

/* Call ARPACK solver */

1172

IGRAPH_CHECK(igraph_arpack_rssolve(igraph_i_community_leading_eigenvector,

1173

&extra, options, &storage, 0, 0));

1174

1175

if (options->noiter > options->mxiter) {

1176

IGRAPH_WARNING("Maximum number of ARPACK iterations reached");

1177

}

1178

1179

/* just to have the always the same result, we multiply by -1

1180

if the first (nonzero) element is not positive */

1181

for (i=0; i<size; i++) {

1182

if (storage.v[i] != 0) { break; }

1183

}

1184

if (storage.v[i]<0) {

1185

for (; i<size; i++) {

1186

storage.v[i] = - storage.v[i];

1187

}

1188

}

1189

1190

/* Ok, we have the eigenvector */

1191

1192

/* Non-positive eigenvalue */

1193

/* printf("%f\n", storage.d[0]); */

1194

/* for (j=0; j<size; j++) { printf("%g ", storage.v[j]); } */

1195

/* printf("\n"); */

1196

if (storage.d[0] <= 0) { continue; }

1197

1198

/* Count the number of vertices in each community after the split */

1199

l=0;

1200

for (j=0; j<size; j++) {

1201

if (storage.v[j] <= 0) {

1202

l++;

1203

}

1204

}

1205

if (l==0 || l==size) {

1206

continue;

1207

}

1208

communities++;

1209

1210

/* Rewrite the mymembership vector */

1211

for (j=0; j<size; j++) {

1212

if (storage.v[j] <= 0) {

1213

long int oldid=VECTOR(idx)[j];

1214

VECTOR(*mymembership)[oldid]=communities-1;

1215

}

1216

}

1217

1218

/* Record merge */

1219

IGRAPH_CHECK(igraph_vector_push_back(&mymerges, comm));

1220

IGRAPH_CHECK(igraph_vector_push_back(&mymerges, communities-1));

1221

1222

/* Store the resulting communities in the queue if needed */

1223

if (l > 1) {

1224

IGRAPH_CHECK(igraph_dqueue_push(&tosplit, communities-1));

1225

}

1226

if (size-l > 1) {

1227

IGRAPH_CHECK(igraph_dqueue_push(&tosplit, comm));

1228

}

1229

1230

}

1231

1232

igraph_arpack_storage_destroy(&storage);

1233

igraph_adjlist_destroy(&adjlist);

1234

igraph_dqueue_destroy(&tosplit);

1235

igraph_vector_destroy(&tmp);

1236

igraph_vector_destroy(&idx2);

1237

IGRAPH_FINALLY_CLEAN(5);

1238

1239

/* reform the mymerges vector */

1240

if (merges) {

1241

igraph_vector_null(&idx);

1242

l=igraph_vector_size(&mymerges);

1243

k=communities;

1244

j=0;

1245

IGRAPH_CHECK(igraph_matrix_resize(merges, l/2, 2));

1246

for (i=l; i>0; i-=2) {

1247

long int from=VECTOR(mymerges)[i-1];

1248

long int to=VECTOR(mymerges)[i-2];

1249

MATRIX(*merges, j, 0)=VECTOR(mymerges)[i-2];

1250

MATRIX(*merges, j, 1)=VECTOR(mymerges)[i-1];

1251

if (VECTOR(idx)[from]!=0) {

1252

MATRIX(*merges, j, 1)=VECTOR(idx)[from]-1;

1253

}

1254

if (VECTOR(idx)[to]!=0) {

1255

MATRIX(*merges, j, 0)=VECTOR(idx)[to]-1;

1256

}

1257

VECTOR(idx)[to]=++k;

1258

j++;

1259

}

1260

}

1261

1262

igraph_vector_destroy(&idx);

1263

igraph_vector_destroy(&mymerges);

1264

IGRAPH_FINALLY_CLEAN(2);

1265

1266

if (!membership) {

1267

igraph_vector_destroy(mymembership);

1268

IGRAPH_FINALLY_CLEAN(1);

1269

}

1270

1271

return 0;

1272

}

1273

1274

typedef struct igraph_i_community_leading_eigenvector_step_data_t {

1275

igraph_vector_t *idx;

1276

igraph_vector_t *idx2;

1277

igraph_lazy_adjlist_t *adjlist;

1278

igraph_vector_t *tmp;

1279

long int no_of_edges;

1280

igraph_vector_t *mymembership;

1281

long int comm;

1282

} igraph_i_community_leading_eigenvector_step_data_t;

1283

1284

int igraph_i_community_leading_eigenvector_step(igraph_real_t *to,

1285

const igraph_real_t *from,

1286

long int n, void *extra) {

1287

1288

igraph_i_community_leading_eigenvector_step_data_t *data=extra;

1289

long int j, k, nlen, size=n;

1290

igraph_vector_t *idx=data->idx;

1291

igraph_vector_t *idx2=data->idx2;

1292

igraph_vector_t *tmp=data->tmp;

1293

igraph_lazy_adjlist_t *adjlist=data->adjlist;

1294

igraph_real_t ktx, ktx2;

1295

long int no_of_edges=data->no_of_edges;

1296

igraph_vector_t *mymembership=data->mymembership;

1297

long int comm=data->comm;

1298

1299

for (j=0; j<size; j++) {

1300

long int oldid=VECTOR(*idx)[j];

1301

igraph_vector_t *neis=igraph_lazy_adjlist_get(adjlist, oldid);

1302

nlen=igraph_vector_size(neis);

1303

to[j]=0.0;

1304

VECTOR(*tmp)[j]=0.0;

1305

for (k=0; k<nlen; k++) {

1306

long int nei=VECTOR(*neis)[k];

1307

if (VECTOR(*mymembership)[nei]==comm) {

1308

to[j] += from[ (long int) VECTOR(*idx2)[nei] ];

1309

VECTOR(*tmp)[j] += 1;

1310

}

1311

}

1312

}

1313

1314

/* Now calculate k^Tx/2m */

1315

ktx=0.0; ktx2=0.0;

1316

for (j=0; j<size; j++) {

1317

long int oldid=VECTOR(*idx)[j];

1318

igraph_vector_t *neis=igraph_lazy_adjlist_get(adjlist, oldid);

1319

long int degree=igraph_vector_size(neis);

1320

ktx += from[j] * degree;

1321

ktx2 += degree;

1322

}

1323

ktx = ktx / no_of_edges/2.0;

1324

ktx2 = ktx2 / no_of_edges/2.0;

1325

1326

/* Now calculate Bx */

1327

for (j=0; j<size; j++) {

1328

long int oldid=VECTOR(*idx)[j];

1329

igraph_vector_t *neis=igraph_lazy_adjlist_get(adjlist, oldid);

1330

igraph_real_t degree=igraph_vector_size(neis);

1331

to[j] = to[j] - ktx*degree + degree*degree*from[j]/no_of_edges/2.0;

1332

VECTOR(*tmp)[j] = VECTOR(*tmp)[j] - ktx2*degree +

1333

degree*degree*1.0/no_of_edges/2.0;

1334

}

1335

1336

/* -d_ij summa l in G B_il */

1337

for (j=0; j<size; j++) {

1338

to[j] -= VECTOR(*tmp)[j] * from[j];

1339

}

1340

1341

return 0;

1342

}

1343

1344

/**

1345

* \ingroup communities

1346

* \function igraph_community_leading_eigenvector_step

1347

* \brief Leading eigenvector community finding (make one step).

1348

1349

* Do one split according to Mark Newman's leading eigenvector

1350

* community detection method. See MEJ Newman: Finding community

1351

* structure in networks using the eigenvectors of matrices,

1352

* arXiv:phyisics/0605087 for the details.

1353

1354

* </para><para>Use this function instead of \ref

1355

* igraph_community_leading_eigenvector() if you want to have full

1356

* control over and information about each split performed along

1357

* community structure detection. \ref

1358

* igraph_community_leading_eigenvector() can be simulated by

1359

* repeatedly calling this function.

1360

1361

* \param graph The undirected input graph.

1362

* \param membership Numeric vector giving a division of \p graph.

1363

* The result will be also stored here. The vector contains the

1364

* community ids for each vertex, these are numbered from 0.

1365

* \param community The id of the community to split.

1366

* \param split Pointer to a logical variable, if it was possible to

1367

* split community \p community then 1, otherwise 0 will be stored

1368

* here. This argument is ignored if it is \c NULL.

1369

* \param eigenvector Pointer to an initialized vector, the

1370

* eigenvector on which the split was done will be stored here.

1371

* It will be resised to have the same length as the number of

1372

* vertices in community \p community. This argument is ignored

1373

* if it is \c NULL.

1374

* \param eigenvalue Pointer to a real variable, the eigenvalue

1375

* associated with \p eigenvector will be stored here.

1376

* This argument is ignored if it is \c NULL.

1377

* \return Error code.

1378

1379

* \sa \ref igraph_community_leading_eigenvector().

1380

1381

* Time complexity: O(|E|+|V|^2), |E| is the number of edges, |V| is

1382

* the number of vertices.

1383

1384

1385

int igraph_community_leading_eigenvector_step(const igraph_t *graph,

1386

igraph_vector_t *membership,

1387

igraph_integer_t community,

1388

igraph_bool_t *split,

1389

igraph_vector_t *eigenvector,

1390

igraph_real_t *eigenvalue,

1391

igraph_arpack_options_t *options,

1392

igraph_arpack_storage_t *storage) {

1393

1394

long int no_of_nodes=igraph_vcount(graph);

1395

long int no_of_edges=igraph_ecount(graph);

1396

igraph_vector_t tmp;

1397

igraph_vector_t idx, idx2;

1398

long int i, j, k;

1399

long int communities=1;

1400

igraph_lazy_adjlist_t adjlist;

1401

long int size=0;

1402

igraph_i_community_leading_eigenvector_step_data_t extra;

1403

igraph_arpack_storage_t real_storage, *mystorage=

1404

storage ? storage : &real_storage;

1405

long int comm=community;

1406

1407

if (igraph_vector_size(membership) != no_of_nodes) {

1408

IGRAPH_ERROR("Invalid membership vector length", IGRAPH_EINVAL);

1409

}

1410

1411

if (igraph_is_directed(graph)) {

1412

IGRAPH_WARNING("This method was developed for undirected graphs");

1413

}

1414

1415

IGRAPH_VECTOR_INIT_FINALLY(&idx, no_of_nodes);

1416

IGRAPH_VECTOR_INIT_FINALLY(&idx2, no_of_nodes);

1417

1418

for (i=0; i<no_of_nodes; i++) {

1419

if (VECTOR(*membership)[i]==comm) {

1420

VECTOR(idx)[size]=i;

1421

VECTOR(idx2)[i]=size;

1422

size++;

1423

}

1424

if (VECTOR(*membership)[i] > communities-1) {

1425

communities=VECTOR(*membership)[i]+1;

1426

}

1427

}

1428

1429

if (split) { *split=0; }

1430

if (size != 1) {

1431

IGRAPH_CHECK(igraph_lazy_adjlist_init(graph, &adjlist, IGRAPH_ALL,

1432

IGRAPH_DONT_SIMPLIFY));

1433

IGRAPH_FINALLY(igraph_lazy_adjlist_destroy, &adjlist);

1434

if (!storage) {

1435

IGRAPH_CHECK(igraph_arpack_storage_init(mystorage, no_of_nodes, 3, no_of_nodes, 1));

1436

IGRAPH_FINALLY(igraph_arpack_storage_destroy, mystorage);

1437

}

1438

IGRAPH_VECTOR_INIT_FINALLY(&tmp, size);

1439

1440

extra.idx=&idx;

1441

extra.idx2=&idx2;

1442

extra.tmp=&tmp;

1443

extra.adjlist=&adjlist;

1444

extra.no_of_edges=no_of_edges;

1445

extra.mymembership=membership;

1446

extra.comm=comm;

1447

1448

options->start=0;

1449

options->n=size;

1450

if (options->ncv < 3) { options->ncv=3; }

1451

options->which[0]='L'; options->which[1]='A';

1452

if (options->ncv > options->n) { options->ncv=options->n; }

1453

1454

IGRAPH_CHECK(igraph_arpack_rssolve(igraph_i_community_leading_eigenvector_step,

1455

&extra, options, mystorage, 0, 0));

1456

1457

if (options->noiter > options->mxiter) {

1458

IGRAPH_WARNING("Maximum number of ARPACK iterations reached");

1459

}

1460

1461

/* just to have the always the same result, we multiply by -1

1462

if the first (nonzero) element is not positive */

1463

for (i=0; i<size; i++) {

1464

if (mystorage->v[i] != 0) { break; }

1465

}

1466

if (mystorage->v[i]<0) {

1467

for (; i<size; i++) {

1468

mystorage->v[i] = - mystorage->v[i];

1469

}

1470

}

1471

1472

/* Ok, we have the eigenvector */

1473

1474

/* Save eigenvalue/vector if requested */

1475

if (eigenvalue) {

1476

*eigenvalue=mystorage->d[0];

1477

}

1478

if (eigenvector) {

1479

IGRAPH_CHECK(igraph_vector_resize(eigenvector, size));

1480

for (i=0; i<size; i++) {

1481

VECTOR(*eigenvector)[i] = mystorage->v[i];

1482

}

1483

}

1484

1485

/* Positive eigenvalue? */

1486

if (mystorage->d[0] > 0) {

1487

1488

/* Rewrite the membership vector, check if there was a split */

1489

for (j=0, k=0; j<size; j++) {

1490

if (VECTOR(*eigenvector)[j] <= 0) {

1491

long int oldid=VECTOR(idx)[j];

1492

VECTOR(*membership)[oldid]=communities;

1493

k++;

1494

}

1495

}

1496

1497

if (split && k>0) {

1498

*split=1;

1499

}

1500

}

1501

1502

igraph_vector_destroy(&tmp);

1503

IGRAPH_FINALLY_CLEAN(1);

1504

if (!storage) {

1505

igraph_arpack_storage_destroy(mystorage);

1506

IGRAPH_FINALLY_CLEAN(1);

1507

}

1508

igraph_lazy_adjlist_destroy(&adjlist);

1509

IGRAPH_FINALLY_CLEAN(1);

1510

1511

} /* size != 1 */

1512

1513

igraph_vector_destroy(&idx2);

1514

igraph_vector_destroy(&idx);

1515

IGRAPH_FINALLY_CLEAN(2);

1516

1517

return 0;

1518

}

1519

1520

/**

1521

* \function igraph_le_community_to_membership

1522

* Vertex membership from the leading eigenvector community structure

1523

1524

* This function creates a membership vector from the

1525

* result of \ref igraph_community_leading_eigenvector() or

1526

* \ref igraph_community_leading_eigenvector_naive(). It takes \c membership

1527

* and permformes \c steps merges, according to the supplied

1528

* \c merges matrix.

1529

* \param merges The matrix defining the merges to make.

1530

* This is usually from the output of the leading eigenvector community

1531

* structure detection routines.

1532

* \param steps The number of steps to make according to \c merges.

1533

* \param membership Initially the starting membership vector,

1534

* on output the resulting membership vector, after performing \c steps merges.

1535

* \param csize Optionally the sizes of the commmunities is stored here,

1536

* if this is not a null pointer, but an initialized vector.

1537

* \return Error code.

1538

1539

* Time complexity: O(|V|), the number of vertices.

1540

1541

1542

int igraph_le_community_to_membership(const igraph_matrix_t *merges,

1543

igraph_integer_t steps,

1544

igraph_vector_t *membership,

1545

igraph_vector_t *csize) {

1546

1547

long int no_of_nodes=igraph_vector_size(membership);

1548

igraph_vector_t fake_memb;

1549

long int components, i;

1550

1551

if (igraph_matrix_nrow(merges) < steps) {

1552

IGRAPH_ERROR("`steps' to big or `merges' matrix too short", IGRAPH_EINVAL);

1553

}

1554

1555

components=igraph_vector_max(membership)+1;

1556

if (components > no_of_nodes) {

1557

IGRAPH_ERROR("Invalid membership vector, too many components", IGRAPH_EINVAL);

1558

}

1559

if (steps >= components) {

1560

IGRAPH_ERROR("Cannot make `steps' steps from supplied membership vector",

1561

IGRAPH_EINVAL);

1562

}

1563

1564

IGRAPH_VECTOR_INIT_FINALLY(&fake_memb, components);

1565

1566

/* Check membership vector */

1567

for (i=0; i<no_of_nodes; i++) {

1568

if (VECTOR(*membership)[i] < 0) {

1569

IGRAPH_ERROR("Invalid membership vector, negative id", IGRAPH_EINVAL);

1570

}

1571

VECTOR(fake_memb)[ (long int) VECTOR(*membership)[i] ] += 1;

1572

}

1573

for (i=0; i<components; i++) {

1574

if (VECTOR(fake_memb)[i] == 0) {

1575

IGRAPH_ERROR("Invalid membership vector, empty cluster", IGRAPH_EINVAL);

1576

}

1577

}

1578

1579

IGRAPH_CHECK(igraph_community_to_membership(merges, components, steps,

1580

&fake_memb, 0));

1581

1582

/* Ok, now we have the membership of the initial components,

1583

rewrite the original membership vector. */

1584

1585

if (csize) {

1586

IGRAPH_CHECK(igraph_vector_resize(csize, components-steps));

1587

igraph_vector_null(csize);

1588

}

1589

1590

for (i=0; i<no_of_nodes; i++) {

1591

VECTOR(*membership)[i] = VECTOR(fake_memb)[ (long int) VECTOR(*membership)[i] ];

1592

if (csize) {

1593

VECTOR(*csize)[ (long int) VECTOR(*membership)[i] ] += 1;

1594

}

1595

}

1596

1597

igraph_vector_destroy(&fake_memb);

1598

IGRAPH_FINALLY_CLEAN(1);

1599

return 0;

1600

}

1601

1602

/**

1603

* \function igraph_community_label_propagation

1604

* \ingroup communities

1605

* \brief Community detection based on label propagation

1606

1607

* This function implements the community detection method described in:

1608

* Raghavan, U.N. and Albert, R. and Kumara, S.: Near linear time algorithm

1609

* to detect community structures in large-scale networks. Phys Rev E

1610

* 76, 036106. (2007). This version extends the original method by

1611

* the ability to take edge weights into consideration and also

1612

* by allowing some labels to be fixed.

1613

1614

* \param graph The input graph, should be undirected to make sense.

1615

* \param membership The membership vector, the result is returned here.

1616

* For each vertex it gives the ID of its community (label).

1617

* \param weights The weight vector, it should contain a positive

1618

* weight for all the edges.

1619

* \param initial The initial state. If NULL, every vertex will have

1620

* a different label at the beginning. Otherwise it must be a vector

1621

* with an entry for each vertex. Non-negative values denote different

1622

* labels, negative entries denote vertices without labels.

1623

* \param fixed Boolean vector denoting which labels are fixed. Of course

1624

* this makes sense only if you provided an initial state, otherwise

1625

* this element will be ignored. Also note that vertices without labels

1626

* cannot be fixed.

1627

* \return Error code.

1628

1629

* Time complexity: O(m+n)

1630

1631

int igraph_community_label_propagation(const igraph_t *graph,

1632

igraph_vector_t *membership,

1633

const igraph_vector_t *weights,

1634

const igraph_vector_t *initial,

1635

igraph_vector_bool_t *fixed) {

1636

long int no_of_nodes=igraph_vcount(graph);

1637

long int no_of_edges=igraph_ecount(graph);

1638

long int no_of_not_fixed_nodes=no_of_nodes;

1639

long int i, j, k;

1640

igraph_adjlist_t al;

1641

igraph_adjedgelist_t ael;

1642

igraph_bool_t running = 1;

1643

1644

igraph_vector_t label_counters, dominant_labels, node_order;

1645

1646

/* The implementation uses a trick to avoid negative array indexing:

1647

* elements of the membership vector are increased by 1 at the start

1648

* of the algorithm; this to allow us to denote unlabeled vertices

1649

* (if any) by zeroes. The membership vector is shifted back in the end

1650

1651

1652

/* Do some initial checks */

1653

if (fixed && igraph_vector_bool_size(fixed) != no_of_nodes) {

1654

IGRAPH_ERROR("Invalid fixed labeling vector length", IGRAPH_EINVAL);

1655

}

1656

if (weights) {

1657

if (igraph_vector_size(weights) != no_of_edges) {

1658

IGRAPH_ERROR("Invalid weight vector length", IGRAPH_EINVAL);

1659

} else if (igraph_vector_min(weights) < 0) {

1660

IGRAPH_ERROR("Weights must be non-negative", IGRAPH_EINVAL);

1661

}

1662

}

1663

if (fixed && !initial) {

1664

IGRAPH_WARNING("Ignoring fixed vertices as no initial labeling given");

1665

}

1666

1667

IGRAPH_CHECK(igraph_vector_resize(membership, no_of_nodes));

1668

1669

if (initial) {

1670

if (igraph_vector_size(initial) != no_of_nodes) {

1671

IGRAPH_ERROR("Invalid initial labeling vector length", IGRAPH_EINVAL);

1672

}

1673

/* Check if the labels used are valid, initialize membership vector */

1674

for (i=0; i<no_of_nodes; i++) {

1675

if (VECTOR(*initial)[i] < -1) {

1676

VECTOR(*membership)[i] = 0;

1677

} else {

1678

VECTOR(*membership)[i] = VECTOR(*initial)[i] + 1;

1679

}

1680

if (VECTOR(*fixed)[i]) {

1681

if (VECTOR(*membership)[i] == 0) {

1682

IGRAPH_WARNING("Fixed nodes cannot be unlabeled, ignoring them");

1683

VECTOR(*fixed)[i] = 0;

1684

} else {

1685

no_of_not_fixed_nodes--;

1686

}

1687

}

1688

}

1689

1690

i = igraph_vector_max(membership);

1691

if (i > no_of_nodes) {

1692

IGRAPH_ERROR("elements of the initial labeling vector must be between 0 and |V|-1", IGRAPH_EINVAL);

1693

}

1694

if (i <= 0) {

1695

IGRAPH_ERROR("at least one vertex must be labeled in the initial labeling", IGRAPH_EINVAL);

1696

}

1697

} else {

1698

for (i=0; i<no_of_nodes; i++) {

1699

VECTOR(*membership)[i] = i;

1700

}

1701

}

1702

1703

/* Create an adjacency (edge) list representation for efficiency.

1704

* For the unweighted case, the adjacency list is enough. For the

1705

* weighted case, we need the adjacency edge list */

1706

if (weights) {

1707

IGRAPH_CHECK(igraph_adjedgelist_init(graph, &ael, IGRAPH_IN));

1708

IGRAPH_FINALLY(igraph_adjedgelist_destroy, &ael);

1709

} else {

1710

IGRAPH_CHECK(igraph_adjlist_init(graph, &al, IGRAPH_IN));

1711

IGRAPH_FINALLY(igraph_adjlist_destroy, &al);

1712

}

1713

1714

/* Create storage space for counting distinct labels and dominant ones */

1715

IGRAPH_VECTOR_INIT_FINALLY(&label_counters, no_of_nodes+1);

1716

IGRAPH_VECTOR_INIT_FINALLY(&dominant_labels, 0);

1717

IGRAPH_CHECK(igraph_vector_reserve(&dominant_labels, 2));

1718

1719

RNG_BEGIN();

1720

1721

/* Initialize node ordering vector with only the not fixed nodes */

1722

if (fixed) {

1723

IGRAPH_VECTOR_INIT_FINALLY(&node_order, no_of_not_fixed_nodes);

1724

for (i=0, j=0; i<no_of_nodes; i++) {

1725

if (!VECTOR(*fixed)[i]) {

1726

VECTOR(node_order)[j] = i;

1727

j++;

1728

}

1729

}

1730

} else {

1731

IGRAPH_CHECK(igraph_vector_init_seq(&node_order, 0, no_of_nodes-1));

1732

IGRAPH_FINALLY(igraph_vector_destroy, &node_order);

1733

}

1734

1735

running = 1;

1736

while (running) {

1737

long int v1, num_neis;

1738

igraph_real_t max_count;

1739

igraph_vector_t *neis;

1740

1741

running = 0;

1742

1743

/* Shuffle the node ordering vector */

1744

IGRAPH_CHECK(igraph_vector_shuffle(&node_order));

1745

/* In the prescribed order, loop over the vertices and reassign labels */

1746

for (i=0; i<no_of_not_fixed_nodes; i++) {

1747

v1 = VECTOR(node_order)[i];

1748

1749

/* Count the weights corresponding to different labels */

1750

igraph_vector_null(&label_counters);

1751

max_count = 0.0;

1752

if (weights) {

1753

neis = igraph_adjedgelist_get(&ael, v1);

1754

num_neis = igraph_vector_size(neis);

1755

for (j=0; j<num_neis; j++) {

1756

k = VECTOR(*membership)[(long)IGRAPH_OTHER(graph, VECTOR(*neis)[j], v1)];

1757

if (k == 0) continue; /* skip if it has no label yet */

1758

VECTOR(label_counters)[k] += VECTOR(*weights)[(long)VECTOR(*neis)[j]];

1759

if (max_count < VECTOR(label_counters)[k]) {

1760

max_count = VECTOR(label_counters)[k];

1761

igraph_vector_resize(&dominant_labels, 1);

1762

VECTOR(dominant_labels)[0] = k;

1763

} else if (max_count == VECTOR(label_counters)[k]) {

1764

igraph_vector_push_back(&dominant_labels, k);

1765

}

1766

}

1767

} else {

1768

neis = igraph_adjlist_get(&al, v1);

1769

num_neis = igraph_vector_size(neis);

1770

for (j=0; j<num_neis; j++) {

1771

k = VECTOR(*membership)[(long)VECTOR(*neis)[j]];

1772

if (k == 0) continue; /* skip if it has no label yet */

1773

VECTOR(label_counters)[k]++;

1774

if (max_count < VECTOR(label_counters)[k]) {

1775

max_count = VECTOR(label_counters)[k];

1776

igraph_vector_resize(&dominant_labels, 1);

1777

VECTOR(dominant_labels)[0] = k;

1778

} else if (max_count == VECTOR(label_counters)[k]) {

1779

igraph_vector_push_back(&dominant_labels, k);

1780

}

1781

}

1782

}

1783

1784

if (igraph_vector_size(&dominant_labels) > 0) {

1785

/* Select randomly from the dominant labels */

1786

k = RNG_INTEGER(0, igraph_vector_size(&dominant_labels)-1);

1787

k = VECTOR(dominant_labels)[k];

1788

/* Check if the _current_ label of the node is also dominant */

1789

if (VECTOR(label_counters)[(long)VECTOR(*membership)[v1]]!=max_count) {

1790

/* Nope, we need at least one more iteration */

1791

running = 1;

1792

}

1793

VECTOR(*membership)[v1] = k;

1794

}

1795

}

1796

}

1797

1798

RNG_END();

1799

1800

/* Shift back the membership vector, permute labels in increasing order */

1801

/* We recycle label_counters here :) */

1802

igraph_vector_fill(&label_counters, -1);

1803

j = 0;

1804

for (i=0; i<no_of_nodes; i++) {

1805

k = (long)VECTOR(*membership)[i]-1;

1806

if (k >= 0) {

1807

if (VECTOR(label_counters)[k] == -1) {

1808

/* We have seen this label for the first time */

1809

VECTOR(label_counters)[k] = j;

1810

k = j;

1811

j++;

1812

} else {

1813

k = VECTOR(label_counters)[k];

1814

}

1815

} else {

1816

/* This is an unlabeled vertex */

1817

}

1818

VECTOR(*membership)[i] = k;

1819

}

1820

1821

if (weights)

1822

igraph_adjedgelist_destroy(&ael);

1823

else

1824

igraph_adjlist_destroy(&al);

1825

1826

igraph_vector_destroy(&node_order);

1827

igraph_vector_destroy(&label_counters);

1828

igraph_vector_destroy(&dominant_labels);

1829

IGRAPH_FINALLY_CLEAN(4);

1830

1831

return 0;

1832

}

1833

Older »