~ubuntu-branches/ubuntu/wily/python-networkx/wily

« back to all changes in this revision

Viewing changes to networkx/centrality.py

  • Committer: Bazaar Package Importer
  • Author(s): Cyril Brulebois
  • Date: 2008-03-02 01:06:32 UTC
  • mfrom: (1.2.1 upstream) (3.1.3 hardy)
  • Revision ID: james.westby@ubuntu.com-20080302010632-1lp6qe1orf59jl8b
* debian/control:
   + Replace python-setuptools with python-pkg-resources in the
     “Recommends:” since pkg_resources is now available in this
     separate package, thanks Matthias Klose (Closes: #468721).
* debian/copyright:
   + Use “© $years $name” instead of invalid “$name, $years” and
     “(C) $years, $name”, thanks to lintian.

Show diffs side-by-side

added added

removed removed

Lines of Context:
2
2
Centrality measures.
3
3
 
4
4
"""
5
 
#    Copyright (C) 2004,2005 by 
 
5
#    Copyright (C) 2004-2007 by 
6
6
#    Aric Hagberg <hagberg@lanl.gov>
7
7
#    Dan Schult <dschult@colgate.edu>
8
8
#    Pieter Swart <swart@lanl.gov>
9
9
#    Distributed under the terms of the GNU Lesser General Public License
10
10
#    http://www.gnu.org/copyleft/lesser.html
11
 
__author__ = """Aric Hagberg (hagberg@lanl.gov)\nPieter Swart (swart@lanl.gov)"""
12
 
__date__ = "$Date: 2005-07-06 08:02:28 -0600 (Wed, 06 Jul 2005) $"
13
 
__credits__ = """"""
14
 
__revision__ = "$Revision: 1064 $"
15
 
 
16
 
def betweenness_centrality(G,v=False,cutoff=False,normalized=True):
17
 
    """Betweenness centrality for nodes.
 
11
__author__ = """Aric Hagberg (hagberg@lanl.gov)\nPieter Swart (swart@lanl.gov)\nSasha Gutfraind (ag362@cornell.edu)"""
 
12
 
 
13
from networkx.path import dijkstra_predecessor_and_distance, predecessor
 
14
 
 
15
 
 
16
def brandes_betweenness_centrality(G,normalized=True,weighted_edges=False):
 
17
    """Compute the betweenness centrality for nodes in G:
 
18
    the fraction of number of shortests paths that pass through each node.
 
19
 
 
20
    The keyword normalized (default=True) specifies whether the 
 
21
    betweenness values are normalized by b=b/(n-1)(n-2) where
 
22
    n is the number of nodes in G.
 
23
 
 
24
    The keyword weighted_edges (default=False) specifies whether
 
25
    to use edge weights (otherwise weights are all assumed equal).
 
26
 
 
27
    The algorithm is from
 
28
    Ulrik Brandes,
 
29
    A Faster Algorithm for Betweenness Centrality.
 
30
    Journal of Mathematical Sociology 25(2):163-177, 2001.
 
31
    http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
 
32
 
 
33
"""
 
34
    import heapq
 
35
    betweenness=dict.fromkeys(G,0.0) # b[v]=0 for v in G
 
36
    for s in G:
 
37
        S=[]
 
38
        P={}
 
39
        for v in G:
 
40
            P[v]=[]
 
41
        sigma=dict.fromkeys(G,0)    # sigma[v]=0 for v in G
 
42
        D={}
 
43
        sigma[s]=1
 
44
        if not weighted_edges:  # use BFS
 
45
            D[s]=0
 
46
            Q=[s]
 
47
            while Q:   # use BFS to find shortest paths
 
48
                v=Q.pop(0)
 
49
                S.append(v)
 
50
                for w in G.neighbors(v):
 
51
#                for w in G.adj[v]: # speed hack, exposes internals
 
52
                    if w not in D:
 
53
                        Q.append(w)
 
54
                        D[w]=D[v]+1
 
55
                    if D[w]==D[v]+1:   # this is a shortest path, count paths
 
56
                        sigma[w]=sigma[w]+sigma[v]
 
57
                        P[w].append(v) # predecessors 
 
58
        else:  # use Dijkstra's algorithm for shortest paths,
 
59
               # modified from Eppstein
 
60
            push=heapq.heappush
 
61
            pop=heapq.heappop
 
62
            seen = {s:0} 
 
63
            Q=[]   # use Q as heap with (distance,node id) tuples
 
64
            push(Q,(0,s,s))
 
65
            while Q:   
 
66
                (dist,pred,v)=pop(Q)
 
67
                if v in D:
 
68
                    continue # already searched this node.
 
69
                sigma[v]=sigma[v]+sigma[pred] # count paths
 
70
                S.append(v)
 
71
                D[v] = seen[v]
 
72
#                for w in G.adj[v]: # speed hack, exposes internals
 
73
                for w in G.neighbors(v):
 
74
                    vw_dist = D[v] + G.get_edge(v,w)
 
75
                    if w not in D and (w not in seen or vw_dist < seen[w]):
 
76
                        seen[w] = vw_dist
 
77
                        push(Q,(vw_dist,v,w))
 
78
                        P[w]=[v]
 
79
                    elif vw_dist==seen[w]:  # handle equal paths
 
80
                        sigma[w]=sigma[w]+sigma[v]
 
81
                        P[w].append(v)
 
82
 
 
83
 
 
84
        delta=dict.fromkeys(G,0) 
 
85
        while S:
 
86
            w=S.pop()
 
87
            for v in P[w]:
 
88
                delta[v]=delta[v]+\
 
89
                          (float(sigma[v])/float(sigma[w]))*(1.0+delta[w])
 
90
            if w != s:
 
91
                betweenness[w]=betweenness[w]+delta[w]
 
92
                    
 
93
    # normalize
 
94
    if normalized:
 
95
        order=len(betweenness)
 
96
        if order <=2:
 
97
            return betweenness # no normalization b=0 for all nodes
 
98
        scale=1.0/((order-1)*(order-2))
 
99
        for v in betweenness:
 
100
            betweenness[v] *= scale
 
101
 
 
102
    return betweenness            
 
103
 
 
104
 
 
105
 
 
106
def newman_betweenness_centrality(G,v=None,cutoff=None,
 
107
                           normalized=True,
 
108
                           weighted_edges=False):
 
109
    """
 
110
    "Load" centrality for nodes.
 
111
 
 
112
    This actually computes 'load' and not betweenness.
 
113
    See https://networkx.lanl.gov/ticket/103
 
114
 
18
115
    The fraction of number of shortests paths that go
19
 
    through each node.
 
116
    through each node counted according to the algorithm in 
 
117
 
 
118
    Scientific collaboration networks: II.
 
119
    Shortest paths, weighted networks, and centrality,
 
120
    M. E. J. Newman, Phys. Rev. E 64, 016132 (2001).
20
121
 
21
122
    Returns a dictionary of betweenness values keyed by node.
22
123
    The betweenness is normalized to be between [0,1].
23
 
    The algorithm is described in [brandes-2003-faster]_.
24
124
 
25
125
    If normalized=False the resulting betweenness is not normalized.
26
 
 
27
 
    Reference:
28
 
 
29
 
    .. [brandes-2003-faster] Ulrik Brandes,
30
 
       Faster Evaluation of Shortest-Path Based Centrality Indices, 2003,
31
 
       available at http://citeseer.nj.nec.com/brandes00faster.html
32
 
    
 
126
    
 
127
    If weighted_edges is True then use Dijkstra for finding shortest paths.
 
128
    
 
129
 
33
130
    """
34
 
    if v:   # only one node
35
 
        betweenness=0
36
 
        for source in G.nodes(): 
37
 
            ubetween=_node_betweenness(G,source,cutoff=cutoff,normalized=normalized )
 
131
    if v is not None:   # only one node
 
132
        betweenness=0.0
 
133
        for source in G: 
 
134
            ubetween=_node_betweenness(G,source,
 
135
                                       cutoff=cutoff,
 
136
                                       normalized=normalized,
 
137
                                       weighted_edges=weighted_edges)
38
138
            betweenness+=ubetween[v]
39
139
        return betweenness
40
140
    else:
41
 
        betweenness={}.fromkeys(G.nodes(),0) 
 
141
        betweenness={}.fromkeys(G,0.0) 
42
142
        for source in betweenness: 
43
 
            ubetween=_node_betweenness(G,source,cutoff=cutoff,normalized=False)
 
143
            ubetween=_node_betweenness(G,source,
 
144
                                       cutoff=cutoff,
 
145
                                       normalized=False,
 
146
                                       weighted_edges=weighted_edges)
44
147
            for vk in ubetween:
45
148
                betweenness[vk]+=ubetween[vk]
46
149
        if normalized:
47
150
            order=len(betweenness)
 
151
            if order <=2:
 
152
                return betweenness # no normalization b=0 for all nodes
48
153
            scale=1.0/((order-1)*(order-2))
49
154
            for v in betweenness:
50
155
                betweenness[v] *= scale
51
156
        return betweenness  # all nodes
52
157
 
53
 
def _node_betweenness(G,source,cutoff=False,normalized=True):
54
 
    """See betweenness_centrality for what you probably want.
55
 
 
56
 
    This is betweenness for a single node.
57
 
    The fraction of number of shortests paths from source that go
58
 
    through each node.
59
 
 
60
 
    To get the betweenness for a node you need to do all-pairs
61
 
    shortest paths.  
 
158
def _node_betweenness(G,source,cutoff=False,normalized=True,weighted_edges=False):
 
159
    """Node betweenness helper:
 
160
    see betweenness_centrality for what you probably want.
 
161
 
 
162
    This actually computes "load" and not betweenness.
 
163
    See https://networkx.lanl.gov/ticket/103
 
164
 
 
165
    This calculates the load of each node for paths from a single source.
 
166
    (The fraction of number of shortests paths from source that go
 
167
    through each node.)
 
168
 
 
169
    To get the load for a node you need to do all-pairs shortest paths.
 
170
 
 
171
    If weighted_edges is True then use Dijkstra for finding shortest paths.
 
172
    In this case a cutoff is not implemented and so is ignored.
62
173
 
63
174
    """
 
175
 
64
176
    # get the predecessor and path length data
65
 
    (pred,length)=_fast_predecessor(G,source,cutoff) 
 
177
    if weighted_edges:
 
178
        (pred,length)=dijkstra_predecessor_and_distance(G,source) 
 
179
    else:
 
180
        (pred,length)=predecessor(G,source,cutoff=cutoff,return_seen=True) 
66
181
 
67
182
    # order the nodes by path length
68
183
    onodes = [ (l,vert) for (vert,l) in length.items() ]
69
184
    onodes.sort()
70
 
    onodes[:] = [vert for (l,vert) in onodes]
 
185
    onodes[:] = [vert for (l,vert) in onodes if l>0]
71
186
    
72
187
    # intialize betweenness
73
188
    between={}.fromkeys(length,1.0)
74
 
    # work through all paths
75
 
    # remove source
 
189
 
76
190
    while onodes:           
77
191
        v=onodes.pop()
78
 
        if (pred.has_key(v)):
 
192
        if v in pred:
79
193
            num_paths=len(pred[v])   # Discount betweenness if more than 
80
194
            for x in pred[v]:        # one shortest path.
81
 
                if x==source:        # stop if hit source because all remaining v  
82
 
                    break            #  also have pred[v]==[source]
83
 
                between[x]+=between[v]/num_paths
 
195
                if x==source:   # stop if hit source because all remaining v  
 
196
                    break       #  also have pred[v]==[source]
 
197
                between[x]+=between[v]/float(num_paths)
 
198
    #  remove source
84
199
    for v in between:
85
200
        between[v]-=1
86
201
    # rescale to be between 0 and 1                
92
207
                between[v] *= scale
93
208
    return between
94
209
 
95
 
def edge_betweenness(G,nodes=False,cutoff=False):
 
210
betweenness_centrality=brandes_betweenness_centrality
 
211
 
 
212
load_centrality=newman_betweenness_centrality
 
213
 
 
214
def betweenness_centrality_source(G,normalized=True,
 
215
                                  weighted_edges=False,
 
216
                                  sources=None):
 
217
    """
 
218
    Enchanced version of the method in centrality module that allows
 
219
    specifying a list of sources (subgraph).
 
220
 
 
221
    weighted_edges:: consider edge weights by running Dijkstra's algorithm          (no effect on unweighted graphs).
 
222
 
 
223
    sources:: list of nodes to consider as subgraph
 
224
 
 
225
    See Sec. 4 in 
 
226
    Ulrik Brandes,
 
227
    A Faster Algorithm for Betweenness Centrality.
 
228
    Journal of Mathematical Sociology 25(2):163-177, 2001.
 
229
    http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
 
230
 
 
231
 
 
232
    This algorithm does not count the endpoints, i.e.
 
233
    a path from s to t does not contribute to the betweenness of s and t.
 
234
    """
 
235
    import heapq
 
236
    
 
237
    if sources == None:
 
238
        sources = G.nodes()
 
239
 
 
240
    betweenness=dict.fromkeys(G,0.0)
 
241
    for s in sources:
 
242
        S,P,D,sigma = _brandes_betweenness_helper(G,s,weighted_edges)
 
243
 
 
244
        delta=dict.fromkeys(G,0) # unnormalized betweenness
 
245
        while S:
 
246
            w=S.pop()
 
247
            for v in P[w]:
 
248
                delta[v] = delta[v] + \
 
249
                           (float(sigma[v])/float(sigma[w]))*(1.0+delta[w])
 
250
            if w == s:
 
251
                continue
 
252
            betweenness[w] = betweenness[w] + delta[w]
 
253
                   
 
254
    # normalize to size of entire graph
 
255
    if normalized and G.number_of_edges() > 1:
 
256
        order=len(betweenness)
 
257
        scale=1.0/((order-1)*(order-2))
 
258
        for v in betweenness:
 
259
            betweenness[v] *= scale
 
260
 
 
261
    return betweenness
 
262
 
 
263
 
 
264
def edge_betweenness(G,normalized=True,weighted_edges=False,sources=None):
 
265
    """
 
266
    Edge betweenness centrality. 
 
267
 
 
268
    weighted_edges:: consider edge weights by running Dijkstra's algorithm          (no effect on unweighted graphs).
 
269
 
 
270
    sources:: list of nodes to consider as subgraph
 
271
 
 
272
    """
 
273
    import heapq
 
274
    
 
275
    if sources == None:
 
276
        sources = G.nodes()
 
277
 
 
278
    if not weighted_edges:
 
279
        betweenness=dict.fromkeys(G.edges(),0.0)
 
280
    else:
 
281
        betweenness=dict.fromkeys(map(lambda x:x[0:2], G.edges()), 0.0)
 
282
 
 
283
    if G.is_directed():
 
284
        for s in sources:
 
285
            S, P, D, sigma =_brandes_betweenness_helper(G,s,weighted_edges)
 
286
            delta=dict.fromkeys(G,0.0)
 
287
            while S:
 
288
                w=S.pop()
 
289
                for v in P[w]:
 
290
                    edgeFlow = (float(sigma[v])/float(sigma[w]))*(1.0+delta[w])
 
291
                    edge = (v,w)
 
292
                    delta[v]          += edgeFlow
 
293
                    betweenness[edge] += edgeFlow
 
294
    else:
 
295
        for s in sources:
 
296
            S, P, D, sigma =_brandes_betweenness_helper(G,s,weighted_edges)
 
297
            delta=dict.fromkeys(G,0.0)
 
298
            while S:
 
299
                w=S.pop()
 
300
                for v in P[w]:
 
301
                    edgeFlow = (float(sigma[v])/float(sigma[w]))*(1.0+delta[w])
 
302
                    if betweenness.has_key((v,w)):
 
303
                        edge = (v,w)
 
304
                    else:
 
305
                        edge = (w,v)
 
306
                    delta[v]          += edgeFlow
 
307
                    betweenness[edge] += edgeFlow
 
308
 
 
309
                   
 
310
    if normalized and G.number_of_edges() > 1:
 
311
        # normalize to size of entire graph (beware of disconnected components)
 
312
        order=len(betweenness)
 
313
        scale=1.0/((order-1)*(order-2))
 
314
        for edge in betweenness:
 
315
            betweenness[edge] *= scale
 
316
 
 
317
    return betweenness
 
318
 
 
319
 
 
320
def _brandes_betweenness_helper(G,root,weighted_edges):
 
321
    """
 
322
    Helper for betweenness centrality and edge betweenness centrality.
 
323
 
 
324
    Runs single-source shortest path from root node.
 
325
 
 
326
    weighted_edges:: consider edge weights 
 
327
 
 
328
    Finds::
 
329
 
 
330
    S=[] list of nodes reached during traversal
 
331
    P={} predecessors, keyed by child node
 
332
    D={} distances
 
333
    sigma={} indexed by node, is the number of paths to root
 
334
    going through the node
 
335
    """
 
336
    import heapq
 
337
    S=[]
 
338
    P={}
 
339
    for v in G:
 
340
        P[v]=[]
 
341
    sigma=dict.fromkeys(G,0.0)
 
342
    D={}
 
343
    sigma[root]=1
 
344
 
 
345
    if not weighted_edges:  # use BFS
 
346
        D[root]=0
 
347
        Q=[root]
 
348
        while Q:   # use BFS to find shortest paths
 
349
            v=Q.pop(0)
 
350
            S.append(v)
 
351
            for w in G.neighbors(v): #  for w in G.adj[v]: # speed hack, exposes internals
 
352
                if w not in D:
 
353
                    Q.append(w)
 
354
                    D[w]=D[v]+1
 
355
                if D[w]==D[v]+1:   # this is a shortest path, count paths
 
356
                    sigma[w]=sigma[w]+sigma[v]
 
357
                    P[w].append(v) # predecessors
 
358
    else:  # use Dijkstra's algorithm for shortest paths,
 
359
           # modified from Eppstein
 
360
        push=heapq.heappush
 
361
        pop=heapq.heappop
 
362
        seen = {root:0}
 
363
        Q=[]   # use Q as heap with (distance,node id) tuples
 
364
        push(Q,(0,root,root))
 
365
        while Q:   
 
366
            (dist,pred,v)=pop(Q)
 
367
            if v in D:
 
368
                continue # already searched this node.
 
369
            sigma[v]=sigma[v]+sigma[pred] # count paths
 
370
            S.append(v)
 
371
            D[v] = seen[v]
 
372
#                for w in G.adj[v]: # speed hack, exposes internals
 
373
            for w in G.neighbors(v): #  for w in G.adj[v]: # speed hack, exposes internals
 
374
                vw_dist = D[v] + G.get_edge(v,w)
 
375
                if w not in D and (w not in seen or vw_dist < seen[w]):
 
376
                    seen[w] = vw_dist
 
377
                    push(Q,(vw_dist,v,w))
 
378
                    P[w]=[v]
 
379
                elif vw_dist==seen[w]:  # handle equal paths
 
380
                    sigma[w]=sigma[w]+sigma[v]
 
381
                    P[w].append(v)
 
382
    return S, P, D, sigma
 
383
 
 
384
 
 
385
 
 
386
 
 
387
def edge_load(G,nodes=False,cutoff=False):
96
388
    """
97
389
    Edge Betweenness 
98
390
 
117
409
    """
118
410
    between={}
119
411
    # get the predecessor data
120
 
    (pred,length)=_fast_predecessor(G,source,cutoff) 
 
412
    #(pred,length)=_fast_predecessor(G,source,cutoff=cutoff) 
 
413
    from networkx.path import predecessor
 
414
    (pred,length)=predecessor(G,source,cutoff=cutoff,return_seen=True) 
121
415
    # order the nodes by path length
122
416
    onodes = map(lambda k: (length[k], k), length.keys())
123
417
    onodes.sort()
124
418
    onodes[:] = [val for (key, val) in onodes]
125
 
    # intialize betweenness
126
 
    [between.setdefault((u,v),1.0) for (u,v) in G.edges(nodes)] 
127
 
    [between.setdefault((v,u),1.0) for (u,v) in G.edges(nodes)] 
128
 
#    print between
 
419
    # intialize betweenness, doesn't account for any edge weights
 
420
    for e in G.edges(nodes):
 
421
        u,v=e[0],e[1]
 
422
        between[(u,v)]=1.0
 
423
        between[(v,u)]=1.0
 
424
 
129
425
    while onodes:           # work through all paths
130
426
        v=onodes.pop()
131
427
        if (pred.has_key(v)):
138
434
                        between[(x,w)]+=between[(w,v)]/num_paths
139
435
    return between
140
436
 
141
 
 
142
 
 
143
 
def _fast_predecessor(G,source,target=False,cutoff=False):
144
 
    """
145
 
    Helper for betweenness.
146
 
 
147
 
    Returns dict of predecessors and shortest path lengths.
148
 
    Cutoff is a limit on the number of hops traversed.
149
 
    """
150
 
    level=0                  # the current level
151
 
    nextlevel=[source]       # list of nodes to check at next level
152
 
    seen={source:level}      # level (number of hops) when seen in BFS
153
 
    pred={source:[]}         # predecessor hash
154
 
    while nextlevel:
155
 
        level=level+1
156
 
        thislevel=nextlevel
157
 
        nextlevel=[]
158
 
        for v in thislevel:
159
 
            for w in G.neighbors(v):
160
 
                if (not seen.has_key(w)): 
161
 
                    pred[w]=[v]
162
 
                    seen[w]=level
163
 
                    nextlevel.append(w)
164
 
                elif (seen[w]==level):# add v to predecessor list if it 
165
 
                    pred[w].append(v) # is at the correct level
166
 
        if (cutoff and cutoff <= level):
167
 
            break
168
 
 
169
 
    if target:
170
 
        if not pred.has_key(target): return ([],-1)  # No predecessor
171
 
        return (pred[target],seen[target])
172
 
    else:
173
 
        return (pred,seen) 
174
 
 
175
 
def degree_centrality(G,v=False):
 
437
def degree_centrality(G,v=None):
176
438
    """
177
439
    Degree centrality for nodes (fraction of nodes connected to).
178
440
 
179
441
    Returns a dictionary of degree centrality values keyed by node.
180
442
 
181
 
    The degree centrality is normalized to be between 0 and 1.
 
443
    The degree centrality is normalized to the maximum possible degree
 
444
    in the graph G.
182
445
 
183
446
    """
184
447
    degree_centrality={}
187
450
    for vk in deg:
188
451
        degree_centrality[vk]=deg[vk]*s
189
452
 
190
 
    if not v:
 
453
    if v is None:
191
454
        return degree_centrality
192
455
    else:
193
456
        return degree_centrality[v]
194
457
 
195
 
def closeness_centrality(G,v=False):
 
458
def closeness_centrality(G,v=None,weighted_edges=False):
196
459
    """
197
460
    Closeness centrality for nodes (1/average distance to all nodes).
198
461
 
200
463
    The closeness centrality is normalized to be between 0 and 1.
201
464
 
202
465
    """
203
 
    from networkx.paths import single_source_shortest_path_length
204
 
 
 
466
    from networkx.path import single_source_shortest_path_length,\
 
467
         single_source_dijkstra_path_length
 
468
    if weighted_edges:
 
469
        path_length=single_source_dijkstra_path_length
 
470
    else:
 
471
        path_length=single_source_shortest_path_length
205
472
    closeness_centrality={}
206
473
 
207
474
    for n in G.nodes():
208
 
        sp=single_source_shortest_path_length(G,n)
 
475
        sp=path_length(G,n)
209
476
        if sum(sp.values()) > 0.0:                                            
210
477
            s=(len(sp)-1.0)  # normalize to number of nodes-1 in connected part
211
478
            closeness_centrality[n]=s/sum(sp.values())                     
212
479
        else:                                                                
213
480
            closeness_centrality[n]=0.0           
214
 
    if not v:
 
481
    if v is None:
215
482
        return closeness_centrality
216
483
    else:
217
484
        return closeness_centrality[v]
223
490
    return suite
224
491
 
225
492
 
 
493
 
 
494
 
226
495
if __name__ == "__main__":
227
496
    import os
228
497
    import sys