1
% This file is part of the Stanford GraphBase (c) Stanford University 1993
2
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
5
\def\title{GB\_\,MILES}
7
\prerequisites{GB\_\,GRAPH}{GB\_\,IO}
8
@* Introduction. This GraphBase module contains the |miles| subroutine,
9
which creates a family of undirected graphs based on highway mileage data
10
between North American cities. Examples of the use of this procedure can be
11
found in the demo programs {\sc MILES\_\,SPAN} and {\sc GB\_\,PLANE}.
14
extern Graph *miles();
16
@ The subroutine call {\advance\thinmuskip 0mu plus 4mu
17
|miles(n,north_weight,west_weight,pop_weight,max_distance,max_degree,seed)|}
18
constructs a graph based on the information in \.{miles.dat}.
19
Each vertex of the graph corresponds to one of the 128 cities whose
20
name is alphabetically greater than or equal to `Ravenna, Ohio' in
21
the 1949 edition of Rand McNally {\char`\&} Company's {\sl Standard Highway
22
Mileage Guide}. Edges between vertices are assigned lengths representing
23
distances between cities, in miles. In most cases these mileages come
24
from the Rand McNally Guide, but several dozen entries needed to be changed
25
drastically because they were obviously too large or too small; in such cases
26
an educated guess was made. Furthermore, about 5\% of the entries were
27
adjusted slightly in order to
28
ensure that all distances satisfy the ``triangle inequality'': The
29
graph generated by |miles| has the property that the
30
distance from |u| to~|v| plus the distance from |v| to~|w| always exceeds
31
or equals the distance from |u| to~|w|.
33
The constructed graph will have $\min(n,128)$ vertices; the default value
34
|n=128| is substituted if |n=0|. If |n| is less
35
than 128, the |n| cities will be selected by assigning a weight to
36
each city and choosing the |n| with largest weight, using random
37
numbers to break ties in case of equal weights. Weights are computed
39
$$ |north_weight|\cdot|lat|+|west_weight|\cdot|lon|+|pop_weight|\cdot|pop|, $$
40
where |lat| is latitude north of the equator, |lon| is longitude
41
west of Greenwich, and |pop| is the population in 1980. Both |lat| and |lon|
42
are given in ``centidegrees'' (hundredths of degrees). For example,
43
San Francisco has |lat=3778|, |lon=12242|, and |pop=678974|;
44
this means that, before the recent earthquake, it was located at
45
$37.78^\circ$ north latitude and $122.42^\circ$ west longitude, and that it had
46
678,974 residents in the 1980 census. The weight parameters must satisfy
47
$$ \vert|north_weight|\vert\le100{,}000,\quad
48
\vert|west_weight|\vert\le100{,}000,\quad
49
\vert|pop_weight|\vert\le100.$$
51
The constructed graph will be ``complete''---that is, it will have
52
edges between every pair of vertices---unless special values are given to
54
|max_distance| or |max_degree|. If |max_distance!=0|, edges with more
55
than |max_distance| miles will not appear; if |max_degree!=0|, each
56
vertex will be limited to at most |max_degree| of its shortest edges.
58
Vertices of the graph will appear in order of decreasing weight.
59
The |seed| parameter defines the pseudo-random numbers used wherever
60
a ``random'' choice between equal-weight vertices or equal-length edges
66
#define MAX_N 128 /* maximum and default number of cities */
68
@ Examples: The call |miles(100,0,0,1,0,0,0)| will construct a
69
complete graph on 100 vertices, representing the 100 most populous
70
cities in the database. It turns out that San Diego, with a
71
population of 875,538, is the winning city by this criterion, followed
72
by San Antonio (population 786,023), San Francisco (678,974), and
73
Washington D.C. (638,432).
75
To get |n| cities in the western United States and Canada, you can say
76
$|miles|(n,0,1,0,\ldots\,)$; to get |n| cities in the Northeast, use a
77
call like $|miles|(n,1,-1,0,\ldots\,)$. A parameter setting like
78
$(50,-500,0,1,\ldots\,)$ produces mostly Southern cities, except for a
79
few large metropolises in the north.
81
If you ask for |miles(n,a,b,c,0,1,0)|, you get an edge between cities if
82
and only if each city is the nearest to the other, among the |n| cities
83
selected. (The graph is always undirected: There is an arc from |u| to~|v|
84
if and only if there's an arc of the same length from |v| to~|u|.)
86
A random selection of cities can be obtained by calling
87
|miles(n,0,0,0,m,d,s)|. Different choices of the seed number |s| will
88
produce different selections, in a system-independent manner;
89
identical results will be obtained on all computers when identical
90
parameters have been specified. Equivalent experiments on algorithms
91
for graph manipulation can therefore be performed by researchers in
92
different parts of the world. Any value of |s| between 0 and
93
$2^{31}-1$ is permissible.
95
@ If the |miles| routine encounters a problem, it returns |NULL|
96
(\.{NULL}), after putting a code number into the external variable
97
|panic_code|. This code number identifies the type of failure.
98
Otherwise |miles| returns a pointer to the newly created graph, which
99
will be represented with the data structures explained in {\sc GB\_\,GRAPH}.
100
(The external variable |panic_code| is itself defined in {\sc GB\_\,GRAPH}.)
102
@d panic(c) @+{@+panic_code=c;@+gb_trouble_code=0;@+return NULL;@+}
104
@ The \CEE/ file \.{gb\_miles.c} has the following overall shape:
107
#include "gb_io.h" /* we will use the {\sc GB\_\,IO} routines for input */
109
/* we will use the {\sc GB\_\,FLIP} routines for random numbers */
110
#include "gb_graph.h" /* we will use the {\sc GB\_\,GRAPH} data structures */
111
#include "gb_sort.h" /* and the linksort routine */
113
@<Type declarations@>@;
114
@<Private variables@>@;
116
Graph *miles(n,north_weight,west_weight,pop_weight,
117
max_distance,max_degree,seed)
118
unsigned long n; /* number of vertices desired */
119
long north_weight; /* coefficient of latitude in the weight function */
120
long west_weight; /* coefficient of longitude in the weight function */
121
long pop_weight; /* coefficient of population in the weight function */
122
unsigned long max_distance; /* maximum distance in an edge, if nonzero */
123
unsigned long max_degree;
124
/* maximum number of edges per vertex, if nonzero */
125
long seed; /* random number seed */
126
{@+@<Local variables@>@;@#
128
@<Check that the parameters are valid@>;
129
@<Set up a graph with |n| vertices@>;
130
@<Read the data file \.{miles.dat} and compute city weights@>;
131
@<Determine the |n| cities to use in the graph@>;
132
@<Put the appropriate edges into the graph@>;
133
if (gb_trouble_code) {
134
gb_recycle(new_graph);
135
panic(alloc_fault); /* oops, we ran out of memory somewhere back there */
141
Graph *new_graph; /* the graph constructed by |miles| */
142
register long j,k; /* all-purpose indices */
144
@ @<Check that the parameters are valid@>=
145
if (n==0 || n>MAX_N) n=MAX_N;
146
if (max_degree==0 || max_degree>=n) max_degree=n-1;
147
if (north_weight>100000 || west_weight>100000 || pop_weight>100 @|
148
|| north_weight<-100000 || west_weight<-100000 || pop_weight<-100)
149
panic(bad_specs); /* the magnitude of at least one weight is too big */
151
@ @<Set up a graph with |n| vertices@>=
152
new_graph=gb_new_graph(n);
154
panic(no_room); /* out of memory before we're even started */
155
sprintf(new_graph->id,"miles(%lu,%ld,%ld,%ld,%lu,%lu,%ld)",
156
n,north_weight,west_weight,pop_weight,max_distance,max_degree,seed);
157
strcpy(new_graph->util_types,"ZZIIIIZZZZZZZZ");
159
@* Vertices. As we read in the data, we construct a list of nodes,
160
each of which contains a city's name, latitude, longitude, population,
161
and weight. These nodes conform to the specifications stipulated in
162
the {\sc GB\_\,SORT} module. After the list has been sorted by weight, the
163
top |n| entries will be the vertices of the new graph.
166
typedef struct node_struct { /* records to be sorted by |gb_linksort| */
167
long key; /* the nonnegative sort key (weight plus $2^{30}$) */
168
struct node_struct *link; /* pointer to next record */
169
long kk; /* index of city in the original database */
170
long lat,lon,pop; /* latitude, longitude, population */
171
char name[30]; /* |"City Name, ST"| */
174
@ The constants defined here are taken from the specific data in \.{miles.dat},
175
because this routine is not intended to be perfectly general.
178
static long min_lat=2672, max_lat=5042, min_lon=7180, max_lon=12312,
179
min_pop=2521, max_pop=875538; /* tight bounds on data entries */
180
static node *node_block; /* array of nodes holding city info */
181
static long *distance; /* array of distances */
183
@ The data in \.{miles.dat} appears in 128 groups of lines, one for each
184
city, in reverse alphabetical order. These groups have the general form
185
$$\vcenter{\halign{\tt#\hfil\cr
186
City Name, ST[lat,lon]pop\cr
187
d1 d2 d3 d4 d5 d6 ... (possibly several lines' worth)\cr
189
where \.{City Name} is the name of the city (possibly including spaces);
190
\.{ST} is the two-letter state code; \.{lat} and \.{lon} are latitude
191
and longitude in hundredths of degrees; \.{pop} is the population; and
192
the remaining numbers \.{d1}, \.{d2}, \dots\ are distances to the
193
previously named cities in reverse order. Each distance is separated
194
from the previous item by either a blank space or a newline character.
195
For example, the line
196
$$\hbox{\tt San Francisco, CA[3778,12242]678974}$$
197
specifies the data about San Francisco that was mentioned earlier.
198
From the first few groups
199
$$\vcenter{\halign{\tt#\hfil\cr
200
Youngstown, OH[4110,8065]115436\cr
201
Yankton, SD[4288,9739]12011\cr
203
Yakima, WA[4660,12051]49826\cr
205
Worcester, MA[4227,7180]161799\cr
208
we learn that the distance from Worcester, Massachusetts, to Yakima,
209
Washington, is 2964 miles; from Worcester to Youngstown it is 604 miles.
211
The following two-letter ``state codes'' are used for Canadian provinces:
212
$\.{BC}=\null$British Columbia,
213
$\.{MB}=\null$Manitoba,
214
$\.{ON}=\null$Ontario,
215
$\.{SK}=\null$Saskatchewan.
217
@<Read the data file \.{miles.dat} and compute city weights@>=
218
node_block=gb_typed_alloc(MAX_N,node,new_graph->aux_data);
219
distance=gb_typed_alloc(MAX_N*MAX_N,long,new_graph->aux_data);
220
if (gb_trouble_code) {
221
gb_free(new_graph->aux_data);
222
panic(no_room+1); /* no room to copy the data */
224
if (gb_open("miles.dat")!=0)
225
panic(early_data_fault);
226
/* couldn't open |"miles.dat"| using GraphBase conventions;
227
|io_errors| tells why */
228
for (k=MAX_N-1; k>=0; k--) @<Read and store data for city |k|@>;
230
panic(late_data_fault);
231
/* something's wrong with |"miles.dat"|; see |io_errors| */
233
@ The bounds we've imposed on |north_weight|, |west_weight|, and |pop_weight|
234
guarantee that the key value computed here will be between 0 and~$2^{31}$.
236
@<Read and store...@>=
241
gb_string(p->name,'[');
242
if (gb_char()!='[') panic(syntax_error); /* out of sync in \.{miles.dat} */
243
p->lat=gb_number(10);
244
if (p->lat<min_lat || p->lat>max_lat || gb_char()!=',')
245
panic(syntax_error+1); /* latitude data was clobbered */
246
p->lon=gb_number(10);
247
if (p->lon<min_lon || p->lon>max_lon || gb_char()!=']')
248
panic(syntax_error+2); /* longitude data was clobbered */
249
p->pop=gb_number(10);
250
if (p->pop<min_pop || p->pop>max_pop)
251
panic(syntax_error+3); /* population data was clobbered */
252
p->key=north_weight*(p->lat-min_lat)
253
+west_weight*(p->lon-min_lon)
254
+pop_weight*(p->pop-min_pop)+0x40000000;
255
@<Read the mileage data for city |k|@>;
259
@ @d d(j,k) *(distance+(MAX_N*j+k))
261
@<Read the mileage...@>=
263
for (j=k+1; j<MAX_N; j++) {
266
d(j,k)=d(k,j)=gb_number(10);
270
@ Once all the nodes have been set up, we can use the |gb_linksort| routine
271
to sort them into the desired order. This routine, which is part of
272
the \\{gb\_graph} module, builds 128 lists from which the desired nodes
273
are readily accessed in decreasing order of weight, using random numbers
276
We set the population to zero in every city that isn't chosen. Then
277
that city will be excluded when edges are examined later.
279
@<Determine the |n| cities to use in the graph@>=
280
{@+register node *p; /* the current node being considered */
281
register Vertex *v=new_graph->vertices; /* the first unfilled vertex */
282
gb_linksort(node_block+MAX_N-1);
283
for (j=127; j>=0; j--)
284
for (p=(node*)gb_sorted[j]; p; p=p->link) {
285
if (v<new_graph->vertices+n) @<Add city |p->kk| to the graph@>@;
286
else p->pop=0; /* this city is not being used */
290
@ Utility fields |x| and |y| for each vertex are set to coordinates that
291
can be used in geometric computations; these coordinates are obtained by
292
simple linear transformations of latitude and longitude (not by any
293
kind of sophisticated polyconic projection). We will have
294
$$0\le x\le5132, \qquad 0\le y\le 3555.$$
295
Utility field~|z| is set to the city's index number (0 to 127) in the
296
original database. Utility field~|w| is set to the city's population.
298
The coordinates computed here are compatible with those in the \TEX/ file
299
\.{cities.texmap}. Users might want to incorporate edited copies of that file
300
into documents that display results obtained with |miles| graphs.
308
@<Add city |p->kk| to the graph@>=
310
v->x_coord=max_lon-p->lon; /* |x| coordinate is complement of longitude */
311
v->y_coord=p->lat-min_lat;
312
v->y_coord+=(v->y_coord)>>1; /* |y| coordinate is 1.5 times latitude */
315
v->name=gb_save_string(p->name);
320
#define x_coord @t\quad@> x.I
321
/* utility field definitions for the header file */
322
#define y_coord @t\quad@> y.I
323
#define index_no @t\quad@> z.I
324
#define people @t\quad@> w.I
326
@* Arcs. We make the distance negative in the matrix entry for an arc
327
that is not to be included. Nothing needs to be done in this regard
328
unless the user has specified a maximum degree or a maximum edge length.
330
@<Put the approp...@>=
331
if (max_distance>0 || max_degree>0)
332
@<Prune unwanted edges by negating their distances@>;
333
{@+register Vertex *u,*v;
334
for (u=new_graph->vertices;u<new_graph->vertices+n;u++) {
336
for (v=u+1;v<new_graph->vertices+n;v++) {
338
if (d(j,k)>0 && d(k,j)>0)
339
gb_new_edge(u,v,d(j,k));
346
if (max_degree==0) max_degree=MAX_N;
347
if (max_distance==0) max_distance=30000;
348
for (p=node_block; p<node_block+MAX_N; p++)
349
if (p->pop) { /* this city not deleted */
351
@<Blank out all undesired edges from city |k|@>;
355
@ Here we reuse the key fields of the nodes, storing complementary distances
356
there instead of weights. We also let the sorting routine change the
357
link fields. The other fields, however---especially |pop|---remain
358
unchanged. Yes, the author knows this is a wee bit tricky, but why~not?
362
register node*s=NULL; /* list of nodes containing edges from city |k| */
363
for (q=node_block; q<node_block+MAX_N; q++)
364
if (q->pop && q!=p) { /* another city not deleted */
365
j=d(k,q->kk); /* distance from |p| to |q| */
369
q->key=max_distance-j;
375
/* now all the surviving edges from |p| are in the list |gb_sorted[0]| */
376
j=0; /* |j| counts how many edges have been accepted */
377
for (q=(node*)gb_sorted[0]; q; q=q->link)
379
d(k,q->kk)=-d(k,q->kk);
382
@ Random access to the distance matrix is provided to users via
383
the external function |miles_distance|. Caution: This function can be
384
used only with the graph most recently made by |miles|, and only when
385
the graph's |aux_data| has not been recycled, and only when the
386
|z| utility fields have not been used for another purpose.
388
The result might be negative when an edge has been suppressed. Moreover,
389
we can in fact have |miles_distance(u,v)<0| when |miles_distance(v,u)>0|,
390
if the distance in question was suppressed by the |max_degree| constraint
391
on~|u| but not on~|v|.
393
@p long miles_distance(u,v)
396
return d(u->index_no,v->index_no);
400
extern long miles_distance();
402
@* Index. As usual, we close with an index that
403
shows where the identifiers of \\{gb\_miles} are defined and used.