2
/* vim:set ts=2 sw=2 sts=2 et: */
5
Copyright (C) 2007 Gabor Csardi <csardi@rmki.kfki.hu>
6
MTA RMKI, Konkoly-Thege Miklos st. 29-33, Budapest 1121, Hungary
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2 of the License, or
11
(at your option) any later version.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program; if not, write to the Free Software
20
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29
#include "arpack_internal.h"
36
* \function igraph_community_eb_get_merges
37
* \brief Calculating the merges, ie. the dendrogram for an edge betweenness community structure
40
* This function is handy if you have a sequence of edge which are
41
* gradually removed from the network and you would like to know how
42
* the network falls apart into separate components. The edge sequence
43
* may come from the \ref igraph_community_edge_betweenness()
44
* function, but this is not neccessary. Note that \ref
45
* igraph_community_edge_betweenness can also calculate the
46
* dendrogram, via its \p merges argument.
48
* \param graph The input graph.
49
* \param edges Vector containing the edges to be removed from the
50
* network, all edges are expected to appear exactly once in the
52
* \param res Pointer to an initialized matrix, if not NULL then the
53
* dendrogram will be stored here, in the same form as for the \ref
54
* igraph_community_walktrap() function: the matrix has two columns
55
* and each line is a merge given by the ids of the merged
56
* components. The component ids are number from zero and
57
* component ids smaller than the number of vertices in the graph
58
* belong to individual vertices. The non-trivial components
59
* containing at least two vertices are numbered from \c n, \c n is
60
* the number of vertices in the graph. So if the first line
61
* contains \c a and \c b that means that components \c a and \c b
62
* are merged into component \c n, the second line creates
63
* component \c n+1, etc. The matrix will be resized as needed.
64
* \param bridges Pointer to an initialized vector or NULL. If not
65
* null then the index of the edge removals which split the network
66
* will be stored here. The vector will be resized as needed.
69
* \sa \ref igraph_community_edge_betweenness().
71
* Time complexity: O(|E|+|V|log|V|), |V| is the number of vertices,
72
* |E| is the number of edges.
75
int igraph_community_eb_get_merges(const igraph_t *graph,
76
const igraph_vector_t *edges,
78
igraph_vector_t *bridges) {
80
long int no_of_nodes=igraph_vcount(graph);
83
igraph_integer_t no_comps;
85
IGRAPH_CHECK(igraph_clusters(graph, 0, 0, &no_comps, IGRAPH_WEAK));
87
IGRAPH_VECTOR_INIT_FINALLY(&ptr, no_of_nodes*2-1);
89
IGRAPH_CHECK(igraph_matrix_resize(res, no_of_nodes-no_comps, 2));
92
IGRAPH_CHECK(igraph_vector_resize(bridges, no_of_nodes-no_comps));
95
for (i=igraph_vector_size(edges)-1; i>=0; i--) {
96
long int edge=VECTOR(*edges)[i];
97
igraph_integer_t from, to;
99
igraph_edge(graph, edge, &from, &to);
101
while (VECTOR(ptr)[idx-1] != 0) {
102
idx=VECTOR(ptr)[idx-1];
106
while (VECTOR(ptr)[idx-1] != 0) {
107
idx=VECTOR(ptr)[idx-1];
110
if (c1 != c2) { /* this is a merge */
112
MATRIX(*res, midx, 0)=c1;
113
MATRIX(*res, midx, 1)=c2;
116
VECTOR(*bridges)[midx]=i+1;
119
VECTOR(ptr)[c1]=no_of_nodes+midx+1;
120
VECTOR(ptr)[c2]=no_of_nodes+midx+1;
121
VECTOR(ptr)[(long int)from]=no_of_nodes+midx+1;
122
VECTOR(ptr)[(long int)to]=no_of_nodes+midx+1;
128
igraph_vector_destroy(&ptr);
129
IGRAPH_FINALLY_CLEAN(1);
134
/* Find the smallest active element in the vector */
135
long int igraph_i_vector_which_max_not_null(const igraph_vector_t *v,
136
const char *passive) {
137
long int which, i=0, size=igraph_vector_size(v);
143
max=VECTOR(*v)[which];
144
for (i++; i<size; i++) {
145
igraph_real_t elem=VECTOR(*v)[i];
146
if (!passive[i] && elem > max) {
156
* \function igraph_community_edge_betweenness
157
* \brief Community findinf based on edge betweenness
159
* Community structure detection based on the betweenness of the edges
160
* in the network. The algorithm was invented by M. Girvan and
161
* M. Newman, see: M. Girvan and M. E. J. Newman: Community structure in
162
* social and biological networks, Proc. Nat. Acad. Sci. USA 99, 7821-7826
166
* The idea is that the betweenness of the edges connecting two
167
* communities is typically high, as many of the shortest paths
168
* between nodes in separate communities go through them. So we
169
* gradually remove the edge with highest betweenness from the
170
* network, and recalculate edge betweenness after every removal.
171
* This way sooner or later the network falls off to two components,
172
* then after a while one of these components falls off to two smaller
173
* components, etc. until all edges are removed. This is a divisive
174
* hieararchical approach, the result is a dendrogram.
175
* \param graph The input graph.
176
* \param result Pointer to an initialized vector, the result will be
177
* stored here, the ids of the removed edges in the order of their
178
* removal. It will be resized as needed.
179
* \param edge_betweenness Pointer to an initialized vector or
180
* NULL. In the former case the edge betweenness of the removed
181
* edge is stored here. The vector will be resized as needed.
182
* \param merges Pointer to an initialized matrix or NULL. If not NULL
183
* then merges performed by the algorithm are stored here. Even if
184
* this is a divisive algorithm, we can replay it backwards and
185
* note which two clusters were merged. Clusters are numbered from
186
* zero, see the \p merges argument of \ref
187
* igraph_community_walktrap() for details. The matrix will be
189
* \param bridges Pointer to an initialized vector of NULL. If not
190
* NULL then all edge removals which separated the network into
191
* more components are marked here.
192
* \param directed Logical constant, whether to calculate directed
193
* betweenness (ie. directed paths) for directed graphs. It is
194
* ignored for undirected graphs.
195
* \return Error code.
197
* \sa \ref igraph_community_eb_get_merges(), \ref
198
* igraph_community_spinglass(), \ref igraph_community_walktrap().
200
* Time complexity: O(|V|^3), as the betweenness calculation requires
201
* O(|V|^2) and we do it |V|-1 times.
204
int igraph_community_edge_betweenness(const igraph_t *graph,
205
igraph_vector_t *result,
206
igraph_vector_t *edge_betweenness,
207
igraph_matrix_t *merges,
208
igraph_vector_t *bridges,
209
igraph_bool_t directed) {
211
long int no_of_nodes=igraph_vcount(graph);
212
long int no_of_edges=igraph_ecount(graph);
213
igraph_dqueue_t q=IGRAPH_DQUEUE_NULL;
214
long int *distance, *nrgeo;
216
igraph_stack_t stack=IGRAPH_STACK_NULL;
217
long int source, i, e;
219
igraph_adjedgelist_t elist_out, elist_in;
220
igraph_adjedgelist_t *elist_out_p, *elist_in_p;
221
igraph_vector_t *neip;
223
igraph_integer_t modein, modeout;
225
long int maxedge, pos;
226
igraph_integer_t from, to;
230
directed=directed && igraph_is_directed(graph);
234
IGRAPH_CHECK(igraph_adjedgelist_init(graph, &elist_out, IGRAPH_OUT));
235
IGRAPH_FINALLY(igraph_adjedgelist_destroy, &elist_out);
236
IGRAPH_CHECK(igraph_adjedgelist_init(graph, &elist_in, IGRAPH_IN));
237
IGRAPH_FINALLY(igraph_adjedgelist_destroy, &elist_in);
238
elist_out_p=&elist_out;
239
elist_in_p=&elist_in;
241
modeout=modein=IGRAPH_ALL;
242
IGRAPH_CHECK(igraph_adjedgelist_init(graph, &elist_out, IGRAPH_ALL));
243
IGRAPH_FINALLY(igraph_adjedgelist_destroy, &elist_out);
244
elist_out_p=elist_in_p=&elist_out;
247
distance=igraph_Calloc(no_of_nodes, long int);
249
IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);
251
IGRAPH_FINALLY(igraph_free, distance);
252
nrgeo=igraph_Calloc(no_of_nodes, long int);
254
IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);
256
IGRAPH_FINALLY(igraph_free, nrgeo);
257
tmpscore=igraph_Calloc(no_of_nodes, double);
259
IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);
261
IGRAPH_FINALLY(igraph_free, tmpscore);
263
IGRAPH_DQUEUE_INIT_FINALLY(&q, 100);
264
IGRAPH_CHECK(igraph_stack_init(&stack, no_of_nodes));
265
IGRAPH_FINALLY(igraph_stack_destroy, &stack);
267
IGRAPH_CHECK(igraph_vector_resize(result, no_of_edges));
268
if (edge_betweenness) {
269
IGRAPH_CHECK(igraph_vector_resize(edge_betweenness, no_of_edges));
270
VECTOR(*edge_betweenness)[no_of_edges-1]=0;
273
IGRAPH_VECTOR_INIT_FINALLY(&eb, no_of_edges);
275
passive=igraph_Calloc(no_of_edges, char);
277
IGRAPH_ERROR("edge betweenness community structure failed", IGRAPH_ENOMEM);
279
IGRAPH_FINALLY(igraph_free, passive);
281
for (e=0; e<no_of_edges; e++) {
283
igraph_vector_null(&eb);
285
for (source=0; source<no_of_nodes; source++) {
287
/* This will contain the edge betweenness in the current step */
288
IGRAPH_ALLOW_INTERRUPTION();
290
memset(distance, 0, no_of_nodes*sizeof(long int));
291
memset(nrgeo, 0, no_of_nodes*sizeof(long int));
292
memset(tmpscore, 0, no_of_nodes*sizeof(double));
293
igraph_stack_clear(&stack); /* it should be empty anyway... */
295
IGRAPH_CHECK(igraph_dqueue_push(&q, source));
300
while (!igraph_dqueue_empty(&q)) {
301
long int actnode=igraph_dqueue_pop(&q);
303
neip=igraph_adjedgelist_get(elist_out_p, actnode);
304
neino=igraph_vector_size(neip);
305
for (i=0; i<neino; i++) {
306
igraph_integer_t edge=VECTOR(*neip)[i], from, to;
308
igraph_edge(graph, edge, &from, &to);
309
neighbor = actnode!=from ? from : to;
310
if (nrgeo[neighbor] != 0) {
311
/* we've already seen this node, another shortest path? */
312
if (distance[neighbor]==distance[actnode]+1) {
313
nrgeo[neighbor]+=nrgeo[actnode];
316
/* we haven't seen this node yet */
317
nrgeo[neighbor]+=nrgeo[actnode];
318
distance[neighbor]=distance[actnode]+1;
319
IGRAPH_CHECK(igraph_dqueue_push(&q, neighbor));
320
IGRAPH_CHECK(igraph_stack_push(&stack, neighbor));
323
} /* while !igraph_dqueue_empty */
325
/* Ok, we've the distance of each node and also the number of
326
shortest paths to them. Now we do an inverse search, starting
327
with the farthest nodes. */
328
while (!igraph_stack_empty(&stack)) {
329
long int actnode=igraph_stack_pop(&stack);
330
if (distance[actnode]<1) { continue; } /* skip source node */
332
/* set the temporary score of the friends */
333
neip=igraph_adjedgelist_get(elist_in_p, actnode);
334
neino=igraph_vector_size(neip);
335
for (i=0; i<neino; i++) {
336
igraph_integer_t from, to;
338
long int edgeno=VECTOR(*neip)[i];
339
igraph_edge(graph, edgeno, &from, &to);
340
neighbor= actnode != from ? from : to;
341
if (distance[neighbor]==distance[actnode]-1 &&
342
nrgeo[neighbor] != 0) {
343
tmpscore[neighbor] +=
344
(tmpscore[actnode]+1)*nrgeo[neighbor]/nrgeo[actnode];
345
VECTOR(eb)[edgeno] +=
346
(tmpscore[actnode]+1)*nrgeo[neighbor]/nrgeo[actnode];
350
/* Ok, we've the scores for this source */
351
} /* for source <= no_of_nodes */
353
/* Now look for the smallest edge betweenness */
354
/* and eliminate that edge from the network */
355
maxedge=igraph_i_vector_which_max_not_null(&eb, passive);
356
VECTOR(*result)[e]=maxedge;
357
if (edge_betweenness) {
358
VECTOR(*edge_betweenness)[e]=VECTOR(eb)[maxedge];
360
VECTOR(*edge_betweenness)[e] /= 2.0;
364
igraph_edge(graph, maxedge, &from, &to);
366
neip=igraph_adjedgelist_get(elist_in_p, to);
367
neino=igraph_vector_size(neip);
368
igraph_vector_search(neip, 0, maxedge, &pos);
369
VECTOR(*neip)[pos]=VECTOR(*neip)[neino-1];
370
igraph_vector_pop_back(neip);
372
neip=igraph_adjedgelist_get(elist_out_p, from);
373
neino=igraph_vector_size(neip);
374
igraph_vector_search(neip, 0, maxedge, &pos);
375
VECTOR(*neip)[pos]=VECTOR(*neip)[neino-1];
376
igraph_vector_pop_back(neip);
379
igraph_free(passive);
380
igraph_vector_destroy(&eb);
381
igraph_stack_destroy(&stack);
382
igraph_dqueue_destroy(&q);
383
igraph_free(tmpscore);
385
igraph_free(distance);
386
IGRAPH_FINALLY_CLEAN(7);
389
igraph_adjedgelist_destroy(&elist_out);
390
igraph_adjedgelist_destroy(&elist_in);
391
IGRAPH_FINALLY_CLEAN(2);
393
igraph_adjedgelist_destroy(&elist_out);
394
IGRAPH_FINALLY_CLEAN(1);
397
if (merges || bridges) {
398
IGRAPH_CHECK(igraph_community_eb_get_merges(graph, result, merges, bridges));
406
* \function igraph_community_to_membership
407
* \brief Create membership vector from community structure dendrogram
409
* This function creates a membership vector from a community
410
* structure dendrogram. A membership vector contains for each vertex
411
* the id of its graph component, the graph components are numbered
412
* from zero, see the same argument of \ref igraph_clusters() for an
413
* example of a membership vector.
416
* Many community detection algorithms return with a \em merges
417
* matrix, \ref igraph_community_walktrap() an \ref
418
* igraph_community_edge_betweenness() are two examples. The matrix
419
* contains the merge operations performed while mapping the
420
* hierarchical structure of a network. If the matrix has \c n-1 rows,
421
* where \c n is the number of vertices in the graph, then it contains
422
* the hierarchical structure of the whole network and it is called a
426
* This function performs \p steps merge operations as prescribed by
427
* the \p merges matrix and returns the current state of the network.
430
* If if \p merges is not a complete dendrogram, it is possible to
431
* take \p steps steps if \p steps is not bigger than the number
432
* lines in \p merges.
433
* \param merges The two-column matrix containing the merge
434
* operations. See \ref igraph_community_walktrap() for the
436
* \param nodes The number of leaf nodes in the dendrogram
437
* \param steps Integer constant, the number of steps to take.
438
* \param membership Pointer to an initialied vector, the membership
439
* results will be stored here, if not NULL. The vector will be
441
* \param csize Pointer to an initialized vector, or NULL. If not NULL
442
* then the sizes of the components will be stored here, the vector
443
* will be resized as needed.
445
* \sa \ref igraph_community_walktrap(), \ref
446
* igraph_community_edge_betweenness(), \ref
447
* igraph_community_fastgreedy() for community structure detection
450
* Time complexity: O(|V|), the number of vertices in the graph.
453
int igraph_community_to_membership(const igraph_matrix_t *merges,
454
igraph_integer_t nodes,
455
igraph_integer_t steps,
456
igraph_vector_t *membership,
457
igraph_vector_t *csize) {
459
long int no_of_nodes=nodes;
460
long int components=no_of_nodes-steps;
464
if (steps > igraph_matrix_nrow(merges)) {
465
IGRAPH_ERROR("`steps' to big or `merges' matrix too short", IGRAPH_EINVAL);
469
IGRAPH_CHECK(igraph_vector_resize(membership, no_of_nodes));
470
igraph_vector_null(membership);
473
IGRAPH_CHECK(igraph_vector_resize(csize, components));
474
igraph_vector_null(csize);
477
IGRAPH_VECTOR_INIT_FINALLY(&tmp, steps);
479
for (i=steps-1; i>=0; i--) {
480
long int c1=MATRIX(*merges, i, 0);
481
long int c2=MATRIX(*merges, i, 1);
484
if (VECTOR(tmp)[i]==0) {
486
VECTOR(tmp)[i]=found;
489
if (c1<no_of_nodes) {
490
long int cid=VECTOR(tmp)[i]-1;
491
if (membership) { VECTOR(*membership)[c1]=cid+1; }
492
if (csize) { VECTOR(*csize)[cid] += 1; }
494
VECTOR(tmp)[c1-no_of_nodes]=VECTOR(tmp)[i];
497
if (c2<no_of_nodes) {
498
long int cid=VECTOR(tmp)[i]-1;
499
if (membership) { VECTOR(*membership)[c2]=cid+1; }
500
if (csize) { VECTOR(*csize)[cid] += 1; }
502
VECTOR(tmp)[c2-no_of_nodes]=VECTOR(tmp)[i];
507
if (membership || csize) {
508
for (i=0; i<no_of_nodes; i++) {
509
long int tmp=VECTOR(*membership)[i];
512
VECTOR(*membership)[i]=tmp-1;
516
VECTOR(*csize)[found]+=1;
519
VECTOR(*membership)[i]=found;
526
igraph_vector_destroy(&tmp);
527
IGRAPH_FINALLY_CLEAN(1);
533
* \function igraph_modularity
534
* \brief Calculate the modularity of a graph with respect to some vertex types
536
* The modularity of a graph with respect to some division (or vertex
537
* types) measures how good the division is, or how separated are the
538
* different vertex types from each other. It defined as
539
* Q=1/(2m) * sum(Aij-ki*kj/(2m)delta(ci,cj),i,j), here `m' is the
540
* number of edges, `Aij' is the element of the `A' adjacency matrix
541
* in row `i' and column `j', `ki' is the degree of `i', `kj' is the
542
* degree of `j', `ci' is the type (or component) of `i', `cj' that of
543
* `j', the sum goes over all `i' and `j' pairs of vertices, and
544
* `delta(x,y)' is one if x=y and zero otherwise.
547
* Modularity on weighted graphs is also meaningful. When taking edge
548
* weights into account, `Aij' becomes the weight of the corresponding
549
* edge (or 0 if there is no edge), `ki' is the total weight of edges
550
* adjacent to vertex `i', `kj' is the total weight of edges adjacent
551
* to vertex `j' and `m' is the total weight of all edges.
554
* See also MEJ Newman and M Girvan: Finding and evaluating community
555
* structure in networks. Physical Review E 69 026113, 2004.
556
* \param graph The input graph.
557
* \param membership Numeric vector which gives the type of each
558
* vertex, ie. the component to which it belongs.
559
* \param modularity Pointer to a real number, the result will be
561
* \param weights Weight vector or NULL if no weights are specified.
562
* \return Error code.
564
* Time complexity: O(|V|+|E|), the number of vertices plus the number
568
int igraph_modularity(const igraph_t *graph,
569
const igraph_vector_t *membership,
570
igraph_real_t *modularity,
571
const igraph_vector_t *weights) {
573
igraph_vector_t e, a;
574
long int types=igraph_vector_max(membership)+1;
575
long int no_of_edges=igraph_ecount(graph);
577
igraph_integer_t from, to, m;
580
IGRAPH_VECTOR_INIT_FINALLY(&e, types);
581
IGRAPH_VECTOR_INIT_FINALLY(&a, types);
584
if (igraph_vector_size(weights) < no_of_edges)
585
IGRAPH_ERROR("cannot calculate modularity, weight vector too short",
587
m=igraph_vector_sum(weights);
588
for (i=0; i<no_of_edges; i++) {
589
igraph_real_t w=VECTOR(*weights)[i];
590
igraph_edge(graph, i, &from, &to);
591
c1=VECTOR(*membership)[(long int)from];
592
c2=VECTOR(*membership)[(long int)to];
593
if (c1==c2) VECTOR(e)[c1] += 2*w;
599
for (i=0; i<no_of_edges; i++) {
600
igraph_edge(graph, i, &from, &to);
601
c1=VECTOR(*membership)[(long int)from];
602
c2=VECTOR(*membership)[(long int)to];
603
if (c1==c2) VECTOR(e)[c1] += 2;
610
for (i=0; i<types; i++) {
611
igraph_real_t tmp=VECTOR(a)[i]/2/m;
612
*modularity += VECTOR(e)[i]/2/m;
613
*modularity -= tmp*tmp;
616
igraph_vector_destroy(&e);
617
igraph_vector_destroy(&a);
618
IGRAPH_FINALLY_CLEAN(2);
624
* \section about_leading_eigenvector_methods
627
* The functions documented in these section implement the
628
* <quote>leading eigenvector</quote> method developed by Mark Newman and
629
* published in MEJ Newman: Finding community structure using the
630
* eigenvectors of matrices, arXiv:physics/0605087. TODO: proper
634
* The heart of the method is the definition of the modularity matrix,
635
* B, which is B=A-P, A being the adjacency matrix of the (undirected)
636
* network, and P contains the probability that certain edges are
637
* present according to the <quote>configuration model</quote> In
638
* other words, a Pij element of P is the probability that there is an
639
* edge between vertices i and j in a random network in which the
640
* degrees of all vertices are the same as in the input graph.</para>
643
* The leading eigenvector method works by calculating the eigenvector
644
* of the modularity matrix for the largest positive eigenvalue and
645
* then separating vertices into two community based on the sign of
646
* the corresponding element in the eigenvector. If all elements in
647
* the eigenvector are of the same sign that means that the network
648
* has no underlying comuunity structure.
649
* Check Newman's paper to understand why this is a good method for
650
* detecting community structure. </para>
653
* Three function are implemented, they all work accoding to the same
654
* principles. The simplest is perhaps \ref
655
* igraph_community_leading_eigenvector_naive(). This function splits
656
* the network as described above and then recursively splits the
657
* two components after the split as individual networks, if possible.
658
* This however is not a good way for maximizing moduilarity, again
659
* see the paper for explanation and the proper definition of
663
* The correct recursive community structure detection method is
664
* implemented in \ref igraph_community_leading_eigenvector().
665
* Here, after the initial split, the following splits are done in a
666
* way to optimize modularity regarding the original network.
667
* I can't say it enough, see the paper, particularly section VI.
671
* The third function is \ref
672
* igraph_community_leading_eigenvector_step(), this starts from a
673
* division of the network and tries to split a given community into
674
* two subcommunities via the same (correct) method as \ref
675
* igraph_community_leading_eigenvector().
679
typedef struct igraph_i_community_leading_eigenvector_naive_data_t {
680
igraph_vector_t *idx;
681
igraph_adjlist_t *adjlist;
682
} igraph_i_community_leading_eigenvector_naive_data_t;
684
int igraph_i_community_leading_eigenvector_naive(igraph_real_t *to,
685
const igraph_real_t *from,
686
long int n, void *extra) {
688
igraph_i_community_leading_eigenvector_naive_data_t *data=extra;
689
long int j, k, nlen, size=n;
690
igraph_vector_t *idx=data->idx;
691
igraph_adjlist_t *adjlist=data->adjlist;
692
igraph_real_t ktx, ktx2;
694
/* Calculate Ax first */
695
for (j=0; j<size; j++) {
696
long int oldid=VECTOR(*idx)[j];
697
igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);
698
nlen=igraph_vector_size(neis);
700
for (k=0; k<nlen; k++) {
701
long int nei=VECTOR(*neis)[k];
706
/* Now calculate k^Tx/2m */
708
for (j=0; j<size; j++) {
709
long int oldid=VECTOR(*idx)[j];
710
igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);
711
long int degree=igraph_vector_size(neis);
713
ktx += from[j] * degree;
717
/* Now calculate Bx */
718
for (j=0; j<size; j++) {
719
long int oldid=VECTOR(*idx)[j];
720
igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);
721
long int degree=igraph_vector_size(neis);
722
to[j] = to[j] - ktx*degree + degree*degree*from[j]/ktx2;
729
* \ingroup communities
730
* \function igraph_community_leading_eigenvector_naive
731
* \brief Leading eigenvector community finding (naive version).
733
* A naive implementation of Newman's eigenvector community structure
734
* detection. This function splits the network into two components
735
* according to the leading eigenvector of the modularity matrix and
736
* then recursively takes \p steps steps by splitting the components
737
* as individual network. This is not the correct way however, see
738
* MEJ Newman: Finding community structure in networks using the
739
* eigenvectors of matrices, arXiv:physics/0605087. Consider using the
740
* correct \ref igraph_community_leading_eigenvector() function instead.
741
* \param graph The input graph, should be undirected to make sense.
742
* \param merges The merge matrix. The splits done by the algorithm
743
* are stored here, its structure is the same ad for \ref
744
* igraph_community_leading_eigenvector(). This argument is ignored
746
* \param membership The membership vector, for each vertex it gives
747
* the id of its community after all the splits are performed.
748
* This argument is ignored if it is \c NULL.
749
* \param steps The number of splits to do, if possible. Supply the
750
* number of vertices in the network here to perform as many steps
752
* \param options The options for ARPACK. \c n is always
753
* overwritten. \c ncv is set to at least 3.
754
* \return Error code.
756
* \sa \ref igraph_community_leading_eigenvector() for the proper way,
757
* \ref igraph_community_leading_eigenvector_step() to do just one split.
759
* Time complexity: O(E|+|V|^2*steps), |V| is the number of vertices,
760
* |E| is the number of edges.
763
int igraph_community_leading_eigenvector_naive(const igraph_t *graph,
764
igraph_matrix_t *merges,
765
igraph_vector_t *membership,
766
igraph_integer_t steps,
767
igraph_arpack_options_t *options) {
769
long int no_of_nodes=igraph_vcount(graph);
770
igraph_dqueue_t tosplit;
771
igraph_vector_t mymerges;
774
igraph_adjlist_t adjlist;
775
long int i, j, k, l, m;
776
long int communities=1;
777
igraph_vector_t vmembership, *mymembership=membership;
778
igraph_i_community_leading_eigenvector_naive_data_t extra;
779
igraph_arpack_storage_t storage;
781
if (igraph_is_directed(graph)) {
782
IGRAPH_WARNING("This method was developed for undirected graphs");
785
if (steps < 0 || steps > no_of_nodes-1) {
790
mymembership=&vmembership;
791
IGRAPH_VECTOR_INIT_FINALLY(mymembership, 0);
794
IGRAPH_VECTOR_INIT_FINALLY(&mymerges, 0);
795
IGRAPH_CHECK(igraph_vector_reserve(&mymerges, steps*2));
796
IGRAPH_VECTOR_INIT_FINALLY(&idx, no_of_nodes);
797
IGRAPH_DQUEUE_INIT_FINALLY(&tosplit, 100);
798
IGRAPH_CHECK(igraph_adjlist_init(graph, &adjlist, IGRAPH_ALL));
799
IGRAPH_FINALLY(igraph_adjlist_destroy, &adjlist);
801
IGRAPH_CHECK(igraph_vector_resize(mymembership, no_of_nodes));
802
igraph_vector_null(mymembership);
804
igraph_dqueue_push(&tosplit, 0);
806
if (options->ncv<3) { options->ncv=3; }
808
/* Memory for ARPACK */
809
IGRAPH_CHECK(igraph_arpack_storage_init(&storage, no_of_nodes, options->ncv,
811
IGRAPH_FINALLY(igraph_arpack_storage_destroy, &storage);
813
extra.adjlist=&adjlist;
815
while (!igraph_dqueue_empty(&tosplit) && staken < steps) {
816
long int comm=igraph_dqueue_pop_back(&tosplit); /* depth first search */
819
IGRAPH_ALLOW_INTERRUPTION();
821
for (i=0; i<no_of_nodes; i++) {
822
if (VECTOR(*mymembership)[i]==comm) {
823
VECTOR(idx)[size++]=i;
827
/* now 'size' is the size of the current community and
828
idx[0:(size-1)] contains the original ids of the vertices in
829
the current community. We need this to index the neighbor list. */
833
continue; /* nothing to do */
838
options->which[0]='L'; options->which[1]='A';
839
if (options->ncv<3) { options->ncv=3; }
840
if (options->ncv > options->n) { options->ncv=options->n; }
842
/* Call ARPACK solver */
843
IGRAPH_CHECK(igraph_arpack_rssolve(igraph_i_community_leading_eigenvector_naive,
844
&extra, options, &storage, 0, 0));
846
if (options->noiter > options->mxiter) {
847
IGRAPH_WARNING("Maximum number of ARPACK iterations reached");
850
/* just to have the always the same result, we multiply by -1
851
if the first (nonzero) element is not positive */
852
for (i=0; i<size; i++) {
853
if (storage.v[i] != 0) { break; }
855
if (storage.v[i]<0) {
856
for (; i<size; i++) {
857
storage.v[i] = - storage.v[i];
861
/* Ok, we have the eigenvector */
863
/* Non-positive eigenvalue */
864
/* printf("%f\n", storage.d[0]); */
865
if (storage.d[0] <= 0) { continue; }
867
/* We create an index vector in workd to renumber the vertices */
869
for (j=0; j<size; j++) {
870
if (storage.v[j] <= 0) {
871
storage.workd[j]=l++;
873
storage.workd[j]=m++;
876
/* if l==0 or m==0 then there was no split */
882
/* Rewrite the adjacency lists */
883
for (j=0; j<size; j++) {
884
long int oldid=VECTOR(idx)[j];
885
igraph_vector_t *neis=igraph_adjlist_get(&adjlist, oldid);
886
long int n=igraph_vector_size(neis);
888
long int nei=VECTOR(*neis)[k];
889
if ( (storage.v[j] <= 0 && storage.v[nei] <= 0) ||
890
(storage.v[j] > 0 && storage.v[nei] > 0)) {
891
/* they remain in the same community */
892
VECTOR(*neis)[k] = storage.workd[nei];
895
/* nei in the other community, remove from neighbor list */
896
VECTOR(*neis)[k] = VECTOR(*neis)[n-1];
897
igraph_vector_pop_back(neis);
903
/* Also rewrite the mymembership vector */
904
for (j=0; j<size; j++) {
905
if (storage.v[j] <= 0) {
906
long int oldid=VECTOR(idx)[j];
907
VECTOR(*mymembership)[oldid]=communities-1;
912
IGRAPH_CHECK(igraph_vector_push_back(&mymerges, comm));
913
IGRAPH_CHECK(igraph_vector_push_back(&mymerges, communities-1));
915
/* Store the resulting communities in the queue if needed */
917
IGRAPH_CHECK(igraph_dqueue_push(&tosplit, communities-1));
920
IGRAPH_CHECK(igraph_dqueue_push(&tosplit, comm));
925
igraph_arpack_storage_destroy(&storage);
926
igraph_adjlist_destroy(&adjlist);
927
igraph_dqueue_destroy(&tosplit);
928
IGRAPH_FINALLY_CLEAN(3);
930
/* reform the mymerges vector into merges matrix */
932
igraph_vector_null(&idx);
933
l=igraph_vector_size(&mymerges);
936
IGRAPH_CHECK(igraph_matrix_resize(merges, l/2, 2));
937
for (i=l; i>0; i-=2) {
938
long int from=VECTOR(mymerges)[i-1];
939
long int to=VECTOR(mymerges)[i-2];
940
MATRIX(*merges, j, 0)=VECTOR(mymerges)[i-2];
941
MATRIX(*merges, j, 1)=VECTOR(mymerges)[i-1];
942
if (VECTOR(idx)[from]!=0) {
943
MATRIX(*merges, j, 1)=VECTOR(idx)[from]-1;
945
if (VECTOR(idx)[to]!=0) {
946
MATRIX(*merges, j, 0)=VECTOR(idx)[to]-1;
953
igraph_vector_destroy(&idx);
954
igraph_vector_destroy(&mymerges);
955
IGRAPH_FINALLY_CLEAN(2);
958
igraph_vector_destroy(mymembership);
959
IGRAPH_FINALLY_CLEAN(1);
965
typedef struct igraph_i_community_leading_eigenvector_data_t {
966
igraph_vector_t *idx;
967
igraph_vector_t *idx2;
968
igraph_adjlist_t *adjlist;
969
igraph_vector_t *tmp;
970
long int no_of_edges;
971
igraph_vector_t *mymembership;
973
} igraph_i_community_leading_eigenvector_data_t;
975
int igraph_i_community_leading_eigenvector(igraph_real_t *to,
976
const igraph_real_t *from,
977
long int n, void *extra) {
979
igraph_i_community_leading_eigenvector_data_t *data=extra;
980
long int j, k, nlen, size=n;
981
igraph_vector_t *idx=data->idx;
982
igraph_vector_t *idx2=data->idx2;
983
igraph_vector_t *tmp=data->tmp;
984
igraph_adjlist_t *adjlist=data->adjlist;
985
igraph_real_t ktx, ktx2;
986
long int no_of_edges=data->no_of_edges;
987
igraph_vector_t *mymembership=data->mymembership;
988
long int comm=data->comm;
990
for (j=0; j<size; j++) {
991
long int oldid=VECTOR(*idx)[j];
992
igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);
993
nlen=igraph_vector_size(neis);
996
for (k=0; k<nlen; k++) {
997
long int nei=VECTOR(*neis)[k];
998
if (VECTOR(*mymembership)[nei]==comm) {
999
to[j] += from[ (long int) VECTOR(*idx2)[nei] ];
1000
VECTOR(*tmp)[j] += 1;
1005
/* Now calculate k^Tx/2m */
1007
for (j=0; j<size; j++) {
1008
long int oldid=VECTOR(*idx)[j];
1009
igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);
1010
long int degree=igraph_vector_size(neis);
1011
ktx += from[j] * degree;
1014
ktx = ktx / no_of_edges/2.0;
1015
ktx2 = ktx2 / no_of_edges/2.0;
1017
/* Now calculate Bx */
1018
for (j=0; j<size; j++) {
1019
long int oldid=VECTOR(*idx)[j];
1020
igraph_vector_t *neis=igraph_adjlist_get(adjlist, oldid);
1021
igraph_real_t degree=igraph_vector_size(neis);
1022
to[j] = to[j] - ktx*degree + degree*degree*from[j]/no_of_edges/2.0;
1023
VECTOR(*tmp)[j] = VECTOR(*tmp)[j] - ktx2*degree +
1024
degree*degree*1.0/no_of_edges/2.0;
1027
/* -d_ij summa l in G B_il */
1028
for (j=0; j<size; j++) {
1029
to[j] -= VECTOR(*tmp)[j] * from[j];
1036
* \ingroup communities
1037
* \function igraph_community_leading_eigenvector
1038
* \brief Leading eigenvector community finding (proper version).
1040
* Newman's leading eigenvector method for detecting community
1041
* structure. This is the proper implementation of the recursive,
1042
* divisive algorithm: each split is done by maximizing the modularity
1043
* regarding the original network, see MEJ Newman: Finding community
1044
* structure in networks using the eigenvectors of matrices,
1045
* arXiv:physics/0605087.
1047
* \param graph The undirected input graph.
1048
* \param merges The result of the algorithm, a matrix containing the
1049
* information about the splits performed. The matrix is built in
1050
* the opposite way however, it is like the result of an
1051
* agglomerative algorithm. If at the end of the algorithm (after
1052
* \p steps steps was done) there are <quote>p</quote> communities,
1053
* then these are numbered from zero to <quote>p-1</quote>. The
1054
* first line of the matrix contains the first <quote>merge</quote>
1055
* (which is in reality the last split) of two communities into
1056
* community <quote>p</quote>, the merge in the second line forms
1057
* community <quote>p+1</quote>, etc. The matrix should be
1058
* initialized before calling and will be resized as needed.
1059
* This argument is ignored of it is \c NULL.
1060
* \param membership The membership of the vertices after all the
1061
* splits were performed will be stored here. The vector must be
1062
* initialized before calling and will be resized as needed.
1063
* This argument is ignored if it is \c NULL.
1064
* \param steps The maximum number of steps to perform. It might
1065
* happen that some component (or the whole network) has no
1066
* underlying community structure and no further steps can be
1067
* done. If you wany as many steps as possible then supply the
1068
* number of vertices in the network here.
1069
* \param options The options for ARPACK. \c n is always
1070
* overwritten. \c ncv is set to at least 3.
1071
* \return Error code.
1073
* \sa \ref igraph_community_walktrap() and \ref
1074
* igraph_community_spinglass() for other community structure
1075
* detection methods.
1077
* Time complexity: O(|E|+|V|^2*steps), |V| is the number of vertices,
1078
* |E| the number of edges, <quote>steps</quote> the number of splits
1082
int igraph_community_leading_eigenvector(const igraph_t *graph,
1083
igraph_matrix_t *merges,
1084
igraph_vector_t *membership,
1085
igraph_integer_t steps,
1086
igraph_arpack_options_t *options) {
1088
long int no_of_nodes=igraph_vcount(graph);
1089
long int no_of_edges=igraph_ecount(graph);
1090
igraph_dqueue_t tosplit;
1091
igraph_vector_t idx, idx2, mymerges;
1092
igraph_vector_t tmp;
1094
igraph_adjlist_t adjlist;
1095
long int i, j, k, l;
1096
long int communities=1;
1097
igraph_vector_t vmembership, *mymembership=membership;
1098
igraph_i_community_leading_eigenvector_data_t extra;
1099
igraph_arpack_storage_t storage;
1101
if (igraph_is_directed(graph)) {
1102
IGRAPH_WARNING("This method was developed for undirected graphs");
1105
if (steps < 0 || steps > no_of_nodes-1) {
1106
steps=no_of_nodes-1;
1109
if (steps > no_of_nodes-1) {
1110
steps=no_of_nodes-1;
1114
mymembership=&vmembership;
1115
IGRAPH_VECTOR_INIT_FINALLY(mymembership, 0);
1118
IGRAPH_VECTOR_INIT_FINALLY(&mymerges, 0);
1119
IGRAPH_CHECK(igraph_vector_reserve(&mymerges, steps*2));
1121
IGRAPH_VECTOR_INIT_FINALLY(&idx, no_of_nodes);
1122
IGRAPH_VECTOR_INIT_FINALLY(&idx2, no_of_nodes);
1123
IGRAPH_VECTOR_INIT_FINALLY(&tmp, no_of_nodes);
1124
IGRAPH_DQUEUE_INIT_FINALLY(&tosplit, 100);
1125
IGRAPH_CHECK(igraph_adjlist_init(graph, &adjlist, IGRAPH_ALL));
1126
IGRAPH_FINALLY(igraph_adjlist_destroy, &adjlist);
1128
IGRAPH_CHECK(igraph_vector_resize(mymembership, no_of_nodes));
1129
igraph_vector_null(mymembership);
1131
igraph_dqueue_push(&tosplit, 0);
1133
if (options->ncv<3) { options->ncv=3; }
1135
/* Memory for ARPACK */
1136
IGRAPH_CHECK(igraph_arpack_storage_init(&storage, no_of_nodes, options->ncv,
1138
IGRAPH_FINALLY(igraph_arpack_storage_destroy, &storage);
1142
extra.adjlist=&adjlist;
1143
extra.no_of_edges=no_of_edges;
1144
extra.mymembership=mymembership;
1146
while (!igraph_dqueue_empty(&tosplit) && staken < steps) {
1147
long int comm=igraph_dqueue_pop_back(&tosplit); /* depth first search */
1150
IGRAPH_ALLOW_INTERRUPTION();
1152
for (i=0; i<no_of_nodes; i++) {
1153
if (VECTOR(*mymembership)[i]==comm) {
1154
VECTOR(idx)[size]=i;
1155
VECTOR(idx2)[i]=size++;
1166
options->which[0]='L'; options->which[1]='A';
1167
if (options->ncv < 3) { options->ncv=3; }
1168
if (options->ncv > options->n) { options->ncv=options->n; }
1171
/* Call ARPACK solver */
1172
IGRAPH_CHECK(igraph_arpack_rssolve(igraph_i_community_leading_eigenvector,
1173
&extra, options, &storage, 0, 0));
1175
if (options->noiter > options->mxiter) {
1176
IGRAPH_WARNING("Maximum number of ARPACK iterations reached");
1179
/* just to have the always the same result, we multiply by -1
1180
if the first (nonzero) element is not positive */
1181
for (i=0; i<size; i++) {
1182
if (storage.v[i] != 0) { break; }
1184
if (storage.v[i]<0) {
1185
for (; i<size; i++) {
1186
storage.v[i] = - storage.v[i];
1190
/* Ok, we have the eigenvector */
1192
/* Non-positive eigenvalue */
1193
/* printf("%f\n", storage.d[0]); */
1194
/* for (j=0; j<size; j++) { printf("%g ", storage.v[j]); } */
1196
if (storage.d[0] <= 0) { continue; }
1198
/* Count the number of vertices in each community after the split */
1200
for (j=0; j<size; j++) {
1201
if (storage.v[j] <= 0) {
1205
if (l==0 || l==size) {
1210
/* Rewrite the mymembership vector */
1211
for (j=0; j<size; j++) {
1212
if (storage.v[j] <= 0) {
1213
long int oldid=VECTOR(idx)[j];
1214
VECTOR(*mymembership)[oldid]=communities-1;
1219
IGRAPH_CHECK(igraph_vector_push_back(&mymerges, comm));
1220
IGRAPH_CHECK(igraph_vector_push_back(&mymerges, communities-1));
1222
/* Store the resulting communities in the queue if needed */
1224
IGRAPH_CHECK(igraph_dqueue_push(&tosplit, communities-1));
1227
IGRAPH_CHECK(igraph_dqueue_push(&tosplit, comm));
1232
igraph_arpack_storage_destroy(&storage);
1233
igraph_adjlist_destroy(&adjlist);
1234
igraph_dqueue_destroy(&tosplit);
1235
igraph_vector_destroy(&tmp);
1236
igraph_vector_destroy(&idx2);
1237
IGRAPH_FINALLY_CLEAN(5);
1239
/* reform the mymerges vector */
1241
igraph_vector_null(&idx);
1242
l=igraph_vector_size(&mymerges);
1245
IGRAPH_CHECK(igraph_matrix_resize(merges, l/2, 2));
1246
for (i=l; i>0; i-=2) {
1247
long int from=VECTOR(mymerges)[i-1];
1248
long int to=VECTOR(mymerges)[i-2];
1249
MATRIX(*merges, j, 0)=VECTOR(mymerges)[i-2];
1250
MATRIX(*merges, j, 1)=VECTOR(mymerges)[i-1];
1251
if (VECTOR(idx)[from]!=0) {
1252
MATRIX(*merges, j, 1)=VECTOR(idx)[from]-1;
1254
if (VECTOR(idx)[to]!=0) {
1255
MATRIX(*merges, j, 0)=VECTOR(idx)[to]-1;
1257
VECTOR(idx)[to]=++k;
1262
igraph_vector_destroy(&idx);
1263
igraph_vector_destroy(&mymerges);
1264
IGRAPH_FINALLY_CLEAN(2);
1267
igraph_vector_destroy(mymembership);
1268
IGRAPH_FINALLY_CLEAN(1);
1274
typedef struct igraph_i_community_leading_eigenvector_step_data_t {
1275
igraph_vector_t *idx;
1276
igraph_vector_t *idx2;
1277
igraph_lazy_adjlist_t *adjlist;
1278
igraph_vector_t *tmp;
1279
long int no_of_edges;
1280
igraph_vector_t *mymembership;
1282
} igraph_i_community_leading_eigenvector_step_data_t;
1284
int igraph_i_community_leading_eigenvector_step(igraph_real_t *to,
1285
const igraph_real_t *from,
1286
long int n, void *extra) {
1288
igraph_i_community_leading_eigenvector_step_data_t *data=extra;
1289
long int j, k, nlen, size=n;
1290
igraph_vector_t *idx=data->idx;
1291
igraph_vector_t *idx2=data->idx2;
1292
igraph_vector_t *tmp=data->tmp;
1293
igraph_lazy_adjlist_t *adjlist=data->adjlist;
1294
igraph_real_t ktx, ktx2;
1295
long int no_of_edges=data->no_of_edges;
1296
igraph_vector_t *mymembership=data->mymembership;
1297
long int comm=data->comm;
1299
for (j=0; j<size; j++) {
1300
long int oldid=VECTOR(*idx)[j];
1301
igraph_vector_t *neis=igraph_lazy_adjlist_get(adjlist, oldid);
1302
nlen=igraph_vector_size(neis);
1304
VECTOR(*tmp)[j]=0.0;
1305
for (k=0; k<nlen; k++) {
1306
long int nei=VECTOR(*neis)[k];
1307
if (VECTOR(*mymembership)[nei]==comm) {
1308
to[j] += from[ (long int) VECTOR(*idx2)[nei] ];
1309
VECTOR(*tmp)[j] += 1;
1314
/* Now calculate k^Tx/2m */
1316
for (j=0; j<size; j++) {
1317
long int oldid=VECTOR(*idx)[j];
1318
igraph_vector_t *neis=igraph_lazy_adjlist_get(adjlist, oldid);
1319
long int degree=igraph_vector_size(neis);
1320
ktx += from[j] * degree;
1323
ktx = ktx / no_of_edges/2.0;
1324
ktx2 = ktx2 / no_of_edges/2.0;
1326
/* Now calculate Bx */
1327
for (j=0; j<size; j++) {
1328
long int oldid=VECTOR(*idx)[j];
1329
igraph_vector_t *neis=igraph_lazy_adjlist_get(adjlist, oldid);
1330
igraph_real_t degree=igraph_vector_size(neis);
1331
to[j] = to[j] - ktx*degree + degree*degree*from[j]/no_of_edges/2.0;
1332
VECTOR(*tmp)[j] = VECTOR(*tmp)[j] - ktx2*degree +
1333
degree*degree*1.0/no_of_edges/2.0;
1336
/* -d_ij summa l in G B_il */
1337
for (j=0; j<size; j++) {
1338
to[j] -= VECTOR(*tmp)[j] * from[j];
1345
* \ingroup communities
1346
* \function igraph_community_leading_eigenvector_step
1347
* \brief Leading eigenvector community finding (make one step).
1349
* Do one split according to Mark Newman's leading eigenvector
1350
* community detection method. See MEJ Newman: Finding community
1351
* structure in networks using the eigenvectors of matrices,
1352
* arXiv:phyisics/0605087 for the details.
1354
* </para><para>Use this function instead of \ref
1355
* igraph_community_leading_eigenvector() if you want to have full
1356
* control over and information about each split performed along
1357
* community structure detection. \ref
1358
* igraph_community_leading_eigenvector() can be simulated by
1359
* repeatedly calling this function.
1361
* \param graph The undirected input graph.
1362
* \param membership Numeric vector giving a division of \p graph.
1363
* The result will be also stored here. The vector contains the
1364
* community ids for each vertex, these are numbered from 0.
1365
* \param community The id of the community to split.
1366
* \param split Pointer to a logical variable, if it was possible to
1367
* split community \p community then 1, otherwise 0 will be stored
1368
* here. This argument is ignored if it is \c NULL.
1369
* \param eigenvector Pointer to an initialized vector, the
1370
* eigenvector on which the split was done will be stored here.
1371
* It will be resised to have the same length as the number of
1372
* vertices in community \p community. This argument is ignored
1374
* \param eigenvalue Pointer to a real variable, the eigenvalue
1375
* associated with \p eigenvector will be stored here.
1376
* This argument is ignored if it is \c NULL.
1377
* \return Error code.
1379
* \sa \ref igraph_community_leading_eigenvector().
1381
* Time complexity: O(|E|+|V|^2), |E| is the number of edges, |V| is
1382
* the number of vertices.
1385
int igraph_community_leading_eigenvector_step(const igraph_t *graph,
1386
igraph_vector_t *membership,
1387
igraph_integer_t community,
1388
igraph_bool_t *split,
1389
igraph_vector_t *eigenvector,
1390
igraph_real_t *eigenvalue,
1391
igraph_arpack_options_t *options,
1392
igraph_arpack_storage_t *storage) {
1394
long int no_of_nodes=igraph_vcount(graph);
1395
long int no_of_edges=igraph_ecount(graph);
1396
igraph_vector_t tmp;
1397
igraph_vector_t idx, idx2;
1399
long int communities=1;
1400
igraph_lazy_adjlist_t adjlist;
1402
igraph_i_community_leading_eigenvector_step_data_t extra;
1403
igraph_arpack_storage_t real_storage, *mystorage=
1404
storage ? storage : &real_storage;
1405
long int comm=community;
1407
if (igraph_vector_size(membership) != no_of_nodes) {
1408
IGRAPH_ERROR("Invalid membership vector length", IGRAPH_EINVAL);
1411
if (igraph_is_directed(graph)) {
1412
IGRAPH_WARNING("This method was developed for undirected graphs");
1415
IGRAPH_VECTOR_INIT_FINALLY(&idx, no_of_nodes);
1416
IGRAPH_VECTOR_INIT_FINALLY(&idx2, no_of_nodes);
1418
for (i=0; i<no_of_nodes; i++) {
1419
if (VECTOR(*membership)[i]==comm) {
1420
VECTOR(idx)[size]=i;
1421
VECTOR(idx2)[i]=size;
1424
if (VECTOR(*membership)[i] > communities-1) {
1425
communities=VECTOR(*membership)[i]+1;
1429
if (split) { *split=0; }
1431
IGRAPH_CHECK(igraph_lazy_adjlist_init(graph, &adjlist, IGRAPH_ALL,
1432
IGRAPH_DONT_SIMPLIFY));
1433
IGRAPH_FINALLY(igraph_lazy_adjlist_destroy, &adjlist);
1435
IGRAPH_CHECK(igraph_arpack_storage_init(mystorage, no_of_nodes, 3, no_of_nodes, 1));
1436
IGRAPH_FINALLY(igraph_arpack_storage_destroy, mystorage);
1438
IGRAPH_VECTOR_INIT_FINALLY(&tmp, size);
1443
extra.adjlist=&adjlist;
1444
extra.no_of_edges=no_of_edges;
1445
extra.mymembership=membership;
1450
if (options->ncv < 3) { options->ncv=3; }
1451
options->which[0]='L'; options->which[1]='A';
1452
if (options->ncv > options->n) { options->ncv=options->n; }
1454
IGRAPH_CHECK(igraph_arpack_rssolve(igraph_i_community_leading_eigenvector_step,
1455
&extra, options, mystorage, 0, 0));
1457
if (options->noiter > options->mxiter) {
1458
IGRAPH_WARNING("Maximum number of ARPACK iterations reached");
1461
/* just to have the always the same result, we multiply by -1
1462
if the first (nonzero) element is not positive */
1463
for (i=0; i<size; i++) {
1464
if (mystorage->v[i] != 0) { break; }
1466
if (mystorage->v[i]<0) {
1467
for (; i<size; i++) {
1468
mystorage->v[i] = - mystorage->v[i];
1472
/* Ok, we have the eigenvector */
1474
/* Save eigenvalue/vector if requested */
1476
*eigenvalue=mystorage->d[0];
1479
IGRAPH_CHECK(igraph_vector_resize(eigenvector, size));
1480
for (i=0; i<size; i++) {
1481
VECTOR(*eigenvector)[i] = mystorage->v[i];
1485
/* Positive eigenvalue? */
1486
if (mystorage->d[0] > 0) {
1488
/* Rewrite the membership vector, check if there was a split */
1489
for (j=0, k=0; j<size; j++) {
1490
if (VECTOR(*eigenvector)[j] <= 0) {
1491
long int oldid=VECTOR(idx)[j];
1492
VECTOR(*membership)[oldid]=communities;
1502
igraph_vector_destroy(&tmp);
1503
IGRAPH_FINALLY_CLEAN(1);
1505
igraph_arpack_storage_destroy(mystorage);
1506
IGRAPH_FINALLY_CLEAN(1);
1508
igraph_lazy_adjlist_destroy(&adjlist);
1509
IGRAPH_FINALLY_CLEAN(1);
1513
igraph_vector_destroy(&idx2);
1514
igraph_vector_destroy(&idx);
1515
IGRAPH_FINALLY_CLEAN(2);
1521
* \function igraph_le_community_to_membership
1522
* Vertex membership from the leading eigenvector community structure
1524
* This function creates a membership vector from the
1525
* result of \ref igraph_community_leading_eigenvector() or
1526
* \ref igraph_community_leading_eigenvector_naive(). It takes \c membership
1527
* and permformes \c steps merges, according to the supplied
1529
* \param merges The matrix defining the merges to make.
1530
* This is usually from the output of the leading eigenvector community
1531
* structure detection routines.
1532
* \param steps The number of steps to make according to \c merges.
1533
* \param membership Initially the starting membership vector,
1534
* on output the resulting membership vector, after performing \c steps merges.
1535
* \param csize Optionally the sizes of the commmunities is stored here,
1536
* if this is not a null pointer, but an initialized vector.
1537
* \return Error code.
1539
* Time complexity: O(|V|), the number of vertices.
1542
int igraph_le_community_to_membership(const igraph_matrix_t *merges,
1543
igraph_integer_t steps,
1544
igraph_vector_t *membership,
1545
igraph_vector_t *csize) {
1547
long int no_of_nodes=igraph_vector_size(membership);
1548
igraph_vector_t fake_memb;
1549
long int components, i;
1551
if (igraph_matrix_nrow(merges) < steps) {
1552
IGRAPH_ERROR("`steps' to big or `merges' matrix too short", IGRAPH_EINVAL);
1555
components=igraph_vector_max(membership)+1;
1556
if (components > no_of_nodes) {
1557
IGRAPH_ERROR("Invalid membership vector, too many components", IGRAPH_EINVAL);
1559
if (steps >= components) {
1560
IGRAPH_ERROR("Cannot make `steps' steps from supplied membership vector",
1564
IGRAPH_VECTOR_INIT_FINALLY(&fake_memb, components);
1566
/* Check membership vector */
1567
for (i=0; i<no_of_nodes; i++) {
1568
if (VECTOR(*membership)[i] < 0) {
1569
IGRAPH_ERROR("Invalid membership vector, negative id", IGRAPH_EINVAL);
1571
VECTOR(fake_memb)[ (long int) VECTOR(*membership)[i] ] += 1;
1573
for (i=0; i<components; i++) {
1574
if (VECTOR(fake_memb)[i] == 0) {
1575
IGRAPH_ERROR("Invalid membership vector, empty cluster", IGRAPH_EINVAL);
1579
IGRAPH_CHECK(igraph_community_to_membership(merges, components, steps,
1582
/* Ok, now we have the membership of the initial components,
1583
rewrite the original membership vector. */
1586
IGRAPH_CHECK(igraph_vector_resize(csize, components-steps));
1587
igraph_vector_null(csize);
1590
for (i=0; i<no_of_nodes; i++) {
1591
VECTOR(*membership)[i] = VECTOR(fake_memb)[ (long int) VECTOR(*membership)[i] ];
1593
VECTOR(*csize)[ (long int) VECTOR(*membership)[i] ] += 1;
1597
igraph_vector_destroy(&fake_memb);
1598
IGRAPH_FINALLY_CLEAN(1);
1603
* \function igraph_community_label_propagation
1604
* \ingroup communities
1605
* \brief Community detection based on label propagation
1607
* This function implements the community detection method described in:
1608
* Raghavan, U.N. and Albert, R. and Kumara, S.: Near linear time algorithm
1609
* to detect community structures in large-scale networks. Phys Rev E
1610
* 76, 036106. (2007). This version extends the original method by
1611
* the ability to take edge weights into consideration and also
1612
* by allowing some labels to be fixed.
1614
* \param graph The input graph, should be undirected to make sense.
1615
* \param membership The membership vector, the result is returned here.
1616
* For each vertex it gives the ID of its community (label).
1617
* \param weights The weight vector, it should contain a positive
1618
* weight for all the edges.
1619
* \param initial The initial state. If NULL, every vertex will have
1620
* a different label at the beginning. Otherwise it must be a vector
1621
* with an entry for each vertex. Non-negative values denote different
1622
* labels, negative entries denote vertices without labels.
1623
* \param fixed Boolean vector denoting which labels are fixed. Of course
1624
* this makes sense only if you provided an initial state, otherwise
1625
* this element will be ignored. Also note that vertices without labels
1627
* \return Error code.
1629
* Time complexity: O(m+n)
1631
int igraph_community_label_propagation(const igraph_t *graph,
1632
igraph_vector_t *membership,
1633
const igraph_vector_t *weights,
1634
const igraph_vector_t *initial,
1635
igraph_vector_bool_t *fixed) {
1636
long int no_of_nodes=igraph_vcount(graph);
1637
long int no_of_edges=igraph_ecount(graph);
1638
long int no_of_not_fixed_nodes=no_of_nodes;
1640
igraph_adjlist_t al;
1641
igraph_adjedgelist_t ael;
1642
igraph_bool_t running = 1;
1644
igraph_vector_t label_counters, dominant_labels, node_order;
1646
/* The implementation uses a trick to avoid negative array indexing:
1647
* elements of the membership vector are increased by 1 at the start
1648
* of the algorithm; this to allow us to denote unlabeled vertices
1649
* (if any) by zeroes. The membership vector is shifted back in the end
1652
/* Do some initial checks */
1653
if (fixed && igraph_vector_bool_size(fixed) != no_of_nodes) {
1654
IGRAPH_ERROR("Invalid fixed labeling vector length", IGRAPH_EINVAL);
1657
if (igraph_vector_size(weights) != no_of_edges) {
1658
IGRAPH_ERROR("Invalid weight vector length", IGRAPH_EINVAL);
1659
} else if (igraph_vector_min(weights) < 0) {
1660
IGRAPH_ERROR("Weights must be non-negative", IGRAPH_EINVAL);
1663
if (fixed && !initial) {
1664
IGRAPH_WARNING("Ignoring fixed vertices as no initial labeling given");
1667
IGRAPH_CHECK(igraph_vector_resize(membership, no_of_nodes));
1670
if (igraph_vector_size(initial) != no_of_nodes) {
1671
IGRAPH_ERROR("Invalid initial labeling vector length", IGRAPH_EINVAL);
1673
/* Check if the labels used are valid, initialize membership vector */
1674
for (i=0; i<no_of_nodes; i++) {
1675
if (VECTOR(*initial)[i] < -1) {
1676
VECTOR(*membership)[i] = 0;
1678
VECTOR(*membership)[i] = VECTOR(*initial)[i] + 1;
1680
if (VECTOR(*fixed)[i]) {
1681
if (VECTOR(*membership)[i] == 0) {
1682
IGRAPH_WARNING("Fixed nodes cannot be unlabeled, ignoring them");
1683
VECTOR(*fixed)[i] = 0;
1685
no_of_not_fixed_nodes--;
1690
i = igraph_vector_max(membership);
1691
if (i > no_of_nodes) {
1692
IGRAPH_ERROR("elements of the initial labeling vector must be between 0 and |V|-1", IGRAPH_EINVAL);
1695
IGRAPH_ERROR("at least one vertex must be labeled in the initial labeling", IGRAPH_EINVAL);
1698
for (i=0; i<no_of_nodes; i++) {
1699
VECTOR(*membership)[i] = i;
1703
/* Create an adjacency (edge) list representation for efficiency.
1704
* For the unweighted case, the adjacency list is enough. For the
1705
* weighted case, we need the adjacency edge list */
1707
IGRAPH_CHECK(igraph_adjedgelist_init(graph, &ael, IGRAPH_IN));
1708
IGRAPH_FINALLY(igraph_adjedgelist_destroy, &ael);
1710
IGRAPH_CHECK(igraph_adjlist_init(graph, &al, IGRAPH_IN));
1711
IGRAPH_FINALLY(igraph_adjlist_destroy, &al);
1714
/* Create storage space for counting distinct labels and dominant ones */
1715
IGRAPH_VECTOR_INIT_FINALLY(&label_counters, no_of_nodes+1);
1716
IGRAPH_VECTOR_INIT_FINALLY(&dominant_labels, 0);
1717
IGRAPH_CHECK(igraph_vector_reserve(&dominant_labels, 2));
1721
/* Initialize node ordering vector with only the not fixed nodes */
1723
IGRAPH_VECTOR_INIT_FINALLY(&node_order, no_of_not_fixed_nodes);
1724
for (i=0, j=0; i<no_of_nodes; i++) {
1725
if (!VECTOR(*fixed)[i]) {
1726
VECTOR(node_order)[j] = i;
1731
IGRAPH_CHECK(igraph_vector_init_seq(&node_order, 0, no_of_nodes-1));
1732
IGRAPH_FINALLY(igraph_vector_destroy, &node_order);
1737
long int v1, num_neis;
1738
igraph_real_t max_count;
1739
igraph_vector_t *neis;
1743
/* Shuffle the node ordering vector */
1744
IGRAPH_CHECK(igraph_vector_shuffle(&node_order));
1745
/* In the prescribed order, loop over the vertices and reassign labels */
1746
for (i=0; i<no_of_not_fixed_nodes; i++) {
1747
v1 = VECTOR(node_order)[i];
1749
/* Count the weights corresponding to different labels */
1750
igraph_vector_null(&label_counters);
1753
neis = igraph_adjedgelist_get(&ael, v1);
1754
num_neis = igraph_vector_size(neis);
1755
for (j=0; j<num_neis; j++) {
1756
k = VECTOR(*membership)[(long)IGRAPH_OTHER(graph, VECTOR(*neis)[j], v1)];
1757
if (k == 0) continue; /* skip if it has no label yet */
1758
VECTOR(label_counters)[k] += VECTOR(*weights)[(long)VECTOR(*neis)[j]];
1759
if (max_count < VECTOR(label_counters)[k]) {
1760
max_count = VECTOR(label_counters)[k];
1761
igraph_vector_resize(&dominant_labels, 1);
1762
VECTOR(dominant_labels)[0] = k;
1763
} else if (max_count == VECTOR(label_counters)[k]) {
1764
igraph_vector_push_back(&dominant_labels, k);
1768
neis = igraph_adjlist_get(&al, v1);
1769
num_neis = igraph_vector_size(neis);
1770
for (j=0; j<num_neis; j++) {
1771
k = VECTOR(*membership)[(long)VECTOR(*neis)[j]];
1772
if (k == 0) continue; /* skip if it has no label yet */
1773
VECTOR(label_counters)[k]++;
1774
if (max_count < VECTOR(label_counters)[k]) {
1775
max_count = VECTOR(label_counters)[k];
1776
igraph_vector_resize(&dominant_labels, 1);
1777
VECTOR(dominant_labels)[0] = k;
1778
} else if (max_count == VECTOR(label_counters)[k]) {
1779
igraph_vector_push_back(&dominant_labels, k);
1784
if (igraph_vector_size(&dominant_labels) > 0) {
1785
/* Select randomly from the dominant labels */
1786
k = RNG_INTEGER(0, igraph_vector_size(&dominant_labels)-1);
1787
k = VECTOR(dominant_labels)[k];
1788
/* Check if the _current_ label of the node is also dominant */
1789
if (VECTOR(label_counters)[(long)VECTOR(*membership)[v1]]!=max_count) {
1790
/* Nope, we need at least one more iteration */
1793
VECTOR(*membership)[v1] = k;
1800
/* Shift back the membership vector, permute labels in increasing order */
1801
/* We recycle label_counters here :) */
1802
igraph_vector_fill(&label_counters, -1);
1804
for (i=0; i<no_of_nodes; i++) {
1805
k = (long)VECTOR(*membership)[i]-1;
1807
if (VECTOR(label_counters)[k] == -1) {
1808
/* We have seen this label for the first time */
1809
VECTOR(label_counters)[k] = j;
1813
k = VECTOR(label_counters)[k];
1816
/* This is an unlabeled vertex */
1818
VECTOR(*membership)[i] = k;
1822
igraph_adjedgelist_destroy(&ael);
1824
igraph_adjlist_destroy(&al);
1826
igraph_vector_destroy(&node_order);
1827
igraph_vector_destroy(&label_counters);
1828
igraph_vector_destroy(&dominant_labels);
1829
IGRAPH_FINALLY_CLEAN(4);