4
<FONT color="green">001</FONT> // Copyright 2012, 2013 Brad Block, Pawjaw, LLC. (an Ohio Limited Liability Company)<a name="line.1"></a>
5
<FONT color="green">002</FONT> // <a name="line.2"></a>
6
<FONT color="green">003</FONT> // This file is part of JFPPR.<a name="line.3"></a>
7
<FONT color="green">004</FONT> // <a name="line.4"></a>
8
<FONT color="green">005</FONT> // JFPPR is free software: you can redistribute it and/or modify<a name="line.5"></a>
9
<FONT color="green">006</FONT> // it under the terms of the GNU General Public License as published by<a name="line.6"></a>
10
<FONT color="green">007</FONT> // the Free Software Foundation, either version 3 of the License, or<a name="line.7"></a>
11
<FONT color="green">008</FONT> // (at your option) any later version.<a name="line.8"></a>
12
<FONT color="green">009</FONT> // <a name="line.9"></a>
13
<FONT color="green">010</FONT> // JFPPR is distributed in the hope that it will be useful,<a name="line.10"></a>
14
<FONT color="green">011</FONT> // but WITHOUT ANY WARRANTY; without even the implied warranty of<a name="line.11"></a>
15
<FONT color="green">012</FONT> // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the<a name="line.12"></a>
16
<FONT color="green">013</FONT> // GNU General Public License for more details.<a name="line.13"></a>
17
<FONT color="green">014</FONT> // <a name="line.14"></a>
18
<FONT color="green">015</FONT> // You should have received a copy of the GNU General Public License<a name="line.15"></a>
19
<FONT color="green">016</FONT> // along with JFPPR. If not, see <http://www.gnu.org/licenses/>.<a name="line.16"></a>
20
<FONT color="green">017</FONT> <a name="line.17"></a>
21
<FONT color="green">018</FONT> package com.pawjaw.graph.fppr;<a name="line.18"></a>
22
<FONT color="green">019</FONT> <a name="line.19"></a>
23
<FONT color="green">020</FONT> import java.util.Arrays;<a name="line.20"></a>
24
<FONT color="green">021</FONT> import java.util.List;<a name="line.21"></a>
25
<FONT color="green">022</FONT> import java.util.Random;<a name="line.22"></a>
26
<FONT color="green">023</FONT> <a name="line.23"></a>
27
<FONT color="green">024</FONT> /**<a name="line.24"></a>
28
<FONT color="green">025</FONT> * This is a structure for representing a graph capable of performing Fully<a name="line.25"></a>
29
<FONT color="green">026</FONT> * Personalized PageRank efficiently through random walk sampling.<a name="line.26"></a>
30
<FONT color="green">027</FONT> * <p>This structure can be used as follows:<a name="line.27"></a>
31
<FONT color="green">028</FONT> * <ol><li>A Graph instance is instantiated with the number of {@link Vertex}<a name="line.28"></a>
32
<FONT color="green">029</FONT> * elements in the graph.</li><li>Following this, all outgoing edges are<a name="line.29"></a>
33
<FONT color="green">030</FONT> * specified with {@link Vertex#addOutgoingEdge(Vertex, float)}.</li><li>After<a name="line.30"></a>
34
<FONT color="green">031</FONT> * the edges are set, random walks are simulated on the graph using the<a name="line.31"></a>
35
<FONT color="green">032</FONT> * {@link #walk(int)} method.</li><li>Finally, for Vertex elements for which<a name="line.32"></a>
36
<FONT color="green">033</FONT> * a Personalized PageRank is desired, the {@link #rank(int,int,List)}<a name="line.33"></a>
37
<FONT color="green">034</FONT> * is used.</li><a name="line.34"></a>
38
<FONT color="green">035</FONT> * </ol><a name="line.35"></a>
39
<FONT color="green">036</FONT> */<a name="line.36"></a>
40
<FONT color="green">037</FONT> public final class Graph {<a name="line.37"></a>
41
<FONT color="green">038</FONT> private Random r = new Random();<a name="line.38"></a>
42
<FONT color="green">039</FONT> private Vertex[] vertices, sorted_vertices;<a name="line.39"></a>
43
<FONT color="green">040</FONT> <a name="line.40"></a>
44
<FONT color="green">041</FONT> /**<a name="line.41"></a>
45
<FONT color="green">042</FONT> * The probability of ending a random walk segment and either resetting<a name="line.42"></a>
46
<FONT color="green">043</FONT> * to a source vertex or jumping to another random vertex.<a name="line.43"></a>
47
<FONT color="green">044</FONT> */<a name="line.44"></a>
48
<FONT color="green">045</FONT> public static final float RESET_PROBABILITY = 0.2f;<a name="line.45"></a>
49
<FONT color="green">046</FONT> <a name="line.46"></a>
50
<FONT color="green">047</FONT> /**<a name="line.47"></a>
51
<FONT color="green">048</FONT> * @param vertex_count number of {@link Vertex} elements in the graph<a name="line.48"></a>
52
<FONT color="green">049</FONT> */<a name="line.49"></a>
53
<FONT color="green">050</FONT> public Graph(int vertex_count) {<a name="line.50"></a>
54
<FONT color="green">051</FONT> int i;<a name="line.51"></a>
55
<FONT color="green">052</FONT> vertices = new Vertex[vertex_count];<a name="line.52"></a>
56
<FONT color="green">053</FONT> sorted_vertices = new Vertex[vertex_count];<a name="line.53"></a>
57
<FONT color="green">054</FONT> for(i = 0;i < vertex_count;i++)<a name="line.54"></a>
58
<FONT color="green">055</FONT> sorted_vertices[i] = vertices[i] = new Vertex(i);<a name="line.55"></a>
59
<FONT color="green">056</FONT> }<a name="line.56"></a>
60
<FONT color="green">057</FONT> <a name="line.57"></a>
61
<FONT color="green">058</FONT> /**<a name="line.58"></a>
62
<FONT color="green">059</FONT> * @param vertex_id the id of the vertex to retrieve. Starting at zero<a name="line.59"></a>
63
<FONT color="green">060</FONT> * and going to the number of {@link Vertex} elements specified in the<a name="line.60"></a>
64
<FONT color="green">061</FONT> * constructor (exclusive).<a name="line.61"></a>
65
<FONT color="green">062</FONT> * @return the {@link Vertex} element corresponding to the specified vertex<a name="line.62"></a>
66
<FONT color="green">063</FONT> * id.<a name="line.63"></a>
67
<FONT color="green">064</FONT> */<a name="line.64"></a>
68
<FONT color="green">065</FONT> public Vertex vertex(int vertex_id) {<a name="line.65"></a>
69
<FONT color="green">066</FONT> return vertices[vertex_id];<a name="line.66"></a>
70
<FONT color="green">067</FONT> }<a name="line.67"></a>
71
<FONT color="green">068</FONT> <a name="line.68"></a>
72
<FONT color="green">069</FONT> /**<a name="line.69"></a>
73
<FONT color="green">070</FONT> * @return the number of {@link Vertex} elements specified in the<a name="line.70"></a>
74
<FONT color="green">071</FONT> * constructor.<a name="line.71"></a>
75
<FONT color="green">072</FONT> */<a name="line.72"></a>
76
<FONT color="green">073</FONT> public int vertices() {<a name="line.73"></a>
77
<FONT color="green">074</FONT> return vertices.length;<a name="line.74"></a>
78
<FONT color="green">075</FONT> }<a name="line.75"></a>
79
<FONT color="green">076</FONT> <a name="line.76"></a>
80
<FONT color="green">077</FONT> /**<a name="line.77"></a>
81
<FONT color="green">078</FONT> * Simulate random walks.<a name="line.78"></a>
82
<FONT color="green">079</FONT> *<a name="line.79"></a>
83
<FONT color="green">080</FONT> * @param walks_per_vertex the number of random walks per Vertex to take.<a name="line.80"></a>
84
<FONT color="green">081</FONT> * This number should be proportional to the logarithm of the number of<a name="line.81"></a>
85
<FONT color="green">082</FONT> * vertices. For graphs that will fit in main memory, this number will<a name="line.82"></a>
86
<FONT color="green">083</FONT> * probably not need exceed 10 or 15. A walk is a successive number of<a name="line.83"></a>
87
<FONT color="green">084</FONT> * weighted random transitions following outgoing edges until a reset<a name="line.84"></a>
88
<FONT color="green">085</FONT> * is encountered.<a name="line.85"></a>
89
<FONT color="green">086</FONT> *<a name="line.86"></a>
90
<FONT color="green">087</FONT> * @see #RESET_PROBABILITY<a name="line.87"></a>
91
<FONT color="green">088</FONT> */<a name="line.88"></a>
92
<FONT color="green">089</FONT> public void walk(int walks_per_vertex) {<a name="line.89"></a>
93
<FONT color="green">090</FONT> int i, I = vertices.length, w;<a name="line.90"></a>
94
<FONT color="green">091</FONT> for(w = 0;w < walks_per_vertex;w++)<a name="line.91"></a>
95
<FONT color="green">092</FONT> for(i = 0;i < I;i++)<a name="line.92"></a>
96
<FONT color="green">093</FONT> walk(vertices[i]);<a name="line.93"></a>
97
<FONT color="green">094</FONT> }<a name="line.94"></a>
98
<FONT color="green">095</FONT> <a name="line.95"></a>
99
<FONT color="green">096</FONT> private void walk(Vertex start) {<a name="line.96"></a>
100
<FONT color="green">097</FONT> Step last_step = new Step(start);<a name="line.97"></a>
101
<FONT color="green">098</FONT> while(r.nextFloat() > RESET_PROBABILITY)<a name="line.98"></a>
102
<FONT color="green">099</FONT> last_step = last_step.next_step = new Step(last_step.vertex.sampleNeighbor());<a name="line.99"></a>
103
<FONT color="green">100</FONT> }<a name="line.100"></a>
104
<FONT color="green">101</FONT> <a name="line.101"></a>
105
<FONT color="green">102</FONT> /**<a name="line.102"></a>
106
<FONT color="green">103</FONT> * Obtain a descending ranking of nearest neighbors according to a<a name="line.103"></a>
107
<FONT color="green">104</FONT> * Personalized PageRank of this Graph.<a name="line.104"></a>
108
<FONT color="green">105</FONT> *<a name="line.105"></a>
109
<FONT color="green">106</FONT> * @param start_vertex_id the id of the source {@link Vertex} for which to<a name="line.106"></a>
110
<FONT color="green">107</FONT> * perform the Personalized PageRank and obtain neighbors. This number goes<a name="line.107"></a>
111
<FONT color="green">108</FONT> * from zero to the number of vertices specified in the constructor<a name="line.108"></a>
112
<FONT color="green">109</FONT> * (exclusive)<a name="line.109"></a>
113
<FONT color="green">110</FONT> * @param walk_length the total number of walk steps taken. Resets to the<a name="line.110"></a>
114
<FONT color="green">111</FONT> * {@link Vertex} specified by the start_vertex_id are also counted as a<a name="line.111"></a>
115
<FONT color="green">112</FONT> * walk step. A reasonable number for this is on the order of 10,000.<a name="line.112"></a>
116
<FONT color="green">113</FONT> * @param result a user-specified destination for storing the ranked<a name="line.113"></a>
117
<FONT color="green">114</FONT> * {@link Vertex} ids such that the nearest neighbor will be stored first.<a name="line.114"></a>
118
<FONT color="green">115</FONT> *<a name="line.115"></a>
119
<FONT color="green">116</FONT> * @return the user-specified destination for storing the ranked<a name="line.116"></a>
120
<FONT color="green">117</FONT> * {@link Vertex} ids (returned for convenience).<a name="line.117"></a>
121
<FONT color="green">118</FONT> */<a name="line.118"></a>
122
<FONT color="green">119</FONT> public List<Integer> rank(int start_vertex_id, int walk_length, List<Integer> result) {<a name="line.119"></a>
123
<FONT color="green">120</FONT> int i, I = vertices.length, steps = 1;<a name="line.120"></a>
124
<FONT color="green">121</FONT> Step step;<a name="line.121"></a>
125
<FONT color="green">122</FONT> Vertex last_vertex = vertices[start_vertex_id];<a name="line.122"></a>
126
<FONT color="green">123</FONT> result.clear();<a name="line.123"></a>
127
<FONT color="green">124</FONT> for(i = 0;i < I;i++)<a name="line.124"></a>
128
<FONT color="green">125</FONT> vertices[i].reset();<a name="line.125"></a>
129
<FONT color="green">126</FONT> last_vertex.visit();<a name="line.126"></a>
130
<FONT color="green">127</FONT> while(steps++ < walk_length)<a name="line.127"></a>
131
<FONT color="green">128</FONT> if(r.nextFloat() <= RESET_PROBABILITY)<a name="line.128"></a>
132
<FONT color="green">129</FONT> (last_vertex = vertices[start_vertex_id]).visit();<a name="line.129"></a>
133
<FONT color="green">130</FONT> else {<a name="line.130"></a>
134
<FONT color="green">131</FONT> if((step = last_vertex.nextSegmentStart()) != null) {<a name="line.131"></a>
135
<FONT color="green">132</FONT> while((step = step.next_step) != null)<a name="line.132"></a>
136
<FONT color="green">133</FONT> step.vertex.visit();<a name="line.133"></a>
137
<FONT color="green">134</FONT> (last_vertex = vertices[start_vertex_id]).visit();<a name="line.134"></a>
138
<FONT color="green">135</FONT> } else<a name="line.135"></a>
139
<FONT color="green">136</FONT> (last_vertex = last_vertex.sampleNeighbor()).visit();<a name="line.136"></a>
140
<FONT color="green">137</FONT> }<a name="line.137"></a>
141
<FONT color="green">138</FONT> Arrays.sort(sorted_vertices);<a name="line.138"></a>
142
<FONT color="green">139</FONT> for(Vertex v : sorted_vertices)<a name="line.139"></a>
143
<FONT color="green">140</FONT> if(v.visits() == 0)<a name="line.140"></a>
144
<FONT color="green">141</FONT> break;<a name="line.141"></a>
145
<FONT color="green">142</FONT> else if(v.id != start_vertex_id)<a name="line.142"></a>
146
<FONT color="green">143</FONT> result.add(v.id);<a name="line.143"></a>
147
<FONT color="green">144</FONT> return result;<a name="line.144"></a>
148
<FONT color="green">145</FONT> }<a name="line.145"></a>
149
<FONT color="green">146</FONT> <a name="line.146"></a>
150
<FONT color="green">147</FONT> protected static class Step {<a name="line.147"></a>
151
<FONT color="green">148</FONT> Vertex vertex;<a name="line.148"></a>
152
<FONT color="green">149</FONT> Step next_step = null;<a name="line.149"></a>
153
<FONT color="green">150</FONT> <a name="line.150"></a>
154
<FONT color="green">151</FONT> Step(Vertex v) {<a name="line.151"></a>
155
<FONT color="green">152</FONT> vertex = v;<a name="line.152"></a>
156
<FONT color="green">153</FONT> v.addSegment(this);<a name="line.153"></a>
157
<FONT color="green">154</FONT> }<a name="line.154"></a>
158
<FONT color="green">155</FONT> }<a name="line.155"></a>
159
<FONT color="green">156</FONT> }<a name="line.156"></a>