3
* Copyright (C) 1993-1994. James Darrell McCauley.
5
* Author: James Darrell McCauley darrell@mccauley-usa.com
6
* http://mccauley-usa.com/
8
* This program is free software; you can redistribute it and/or
9
* modify it under the terms of the GNU General Public License
10
* as published by the Free Software Foundation; either version 2
11
* of the License, or (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22
* Modification History:
23
* 4.2B <27 Jan 1994> fixed RAND_MAX for Solaris 2.3
24
* <13 Sep 2000> released under GPL
25
* 10/2004 Upgrade to 5.7 Radim Blazek
2
/****************************************************************
6
* AUTHOR(S): James Darrell McCauley darrell@mccauley-usa.com (1993-1994)
7
* OGR support by Martin Landa <landa.martin gmail.com> (2009)
8
* Speed-up by Jan Vandrol and Jan Ruzicka (2013)
10
* PURPOSE: Randomly partition points into test/train sets.
12
* COPYRIGHT: (C) 2001-2014 by James Darrell McCauley, and the GRASS Development Team
14
* This program is free software under the GNU General
15
* Public License (>=v2). Read the file COPYING that
16
* comes with GRASS for details.
18
****************************************************************/
27
21
#include <stdlib.h>
30
23
#include <grass/gis.h>
31
24
#include <grass/dbmi.h>
32
#include <grass/Vect.h>
25
#include <grass/vector.h>
33
26
#include <grass/glocale.h>
37
#define RAND_MAX (pow(2.0,31.0)-1)
39
#if defined(__CYGWIN__) || defined(__APPLE__) || defined(__MINGW32__)
42
return (rand() / 32767.0);
45
#define srand48(sv) (srand((unsigned)(sv)))
51
28
struct Cell_head window;
53
int main(int argc, char *argv[])
30
int main(int argc, char *argv[])
55
int line, nlines, nlinks;
56
double east, north, (*rng) (), max, myrand();
57
int i, j, n, nsites, verbose, np, *p, dcmp();
59
struct Map_info In, Out;
60
static struct line_pnts *Points;
32
int i, line, nlines, nlinks;
33
int idx, *line_idx, lines_left;
34
double (*rng)(void) = G_drand48;
35
int nsites, p, np, min_count, spill;
41
struct line_pnts *Points;
61
42
struct line_cats *Cats;
63
43
struct GModule *module;
64
struct Option *in_opt, *out_opt, *col_opt, *npart_opt;
65
struct Flag *drand48_flag, *q_flag;
44
struct Option *map_opt, *col_opt, *npart_opt, *field_opt;
70
47
struct field_info *Fi;
75
54
module = G_define_module();
76
module->keywords = _("vector, statistics");
55
G_add_keyword(_("vector"));
56
G_add_keyword(_("statistics"));
57
G_add_keyword(_("points"));
58
G_add_keyword(_("point pattern"));
77
59
module->description =
78
60
_("Randomly partition points into test/train sets.");
80
in_opt = G_define_standard_option(G_OPT_V_INPUT);
81
out_opt = G_define_standard_option(G_OPT_V_OUTPUT);
62
map_opt = G_define_standard_option(G_OPT_V_MAP);
64
field_opt = G_define_standard_option(G_OPT_V_FIELD);
83
66
npart_opt = G_define_option();
67
npart_opt->key = "npartitions";
85
68
npart_opt->type = TYPE_INTEGER;
86
69
npart_opt->required = YES;
87
npart_opt->description = _("Number of partitions");
88
npart_opt->options = "1-32767";
70
npart_opt->label = _("Number of partitions");
71
npart_opt->description = _("Must be > 1");
90
col_opt = G_define_option();
91
col_opt->key = "column";
92
col_opt->type = TYPE_STRING;
93
col_opt->required = YES;
94
col_opt->multiple = NO;
73
col_opt = G_define_standard_option(G_OPT_DB_COLUMN);
95
74
col_opt->answer = "part";
96
75
col_opt->description =
97
76
_("Name for new column to which partition number is written");
99
drand48_flag = G_define_flag();
100
drand48_flag->key = 'd';
101
drand48_flag->description = _("Use drand48()");
103
/* please remove in GRASS 7 */
104
q_flag = G_define_flag();
106
q_flag->description = _("Quiet");
108
78
G_gisinit(argv[0]);
110
80
if (G_parser(argc, argv))
111
81
exit(EXIT_FAILURE);
113
verbose = (!q_flag->answer);
114
83
np = atoi(npart_opt->answer);
85
G_fatal_error(_("'%s' must be > 1"), npart_opt->key);
116
if (drand48_flag->answer) {
119
srand48((long)getpid());
87
/* FIXME - allow seed to be specified for repeatability */
127
90
Points = Vect_new_line_struct();
128
91
Cats = Vect_new_cats_struct();
130
93
/* open input vector */
131
if ((mapset = G_find_vector2(in_opt->answer, "")) == NULL) {
132
G_fatal_error(_("Vector map <%s> not found"), in_opt->answer);
135
94
Vect_set_open_level(2);
136
if (Vect_open_old(&In, in_opt->answer, mapset) < 2) {
95
if (Vect_open_old2(&Map, map_opt->answer, G_mapset(), field_opt->answer) < 2) {
137
96
G_fatal_error(_("Unable to open vector map <%s> at topological level %d"),
141
Vect_get_map_box(&In, &box);
100
layer = Vect_get_field_number(&Map, field_opt->answer);
102
G_fatal_error(_("Layer number must be positive"));
143
nsites = Vect_get_num_primitives(&In, GV_POINT);
104
nsites = Vect_get_num_primitives(&Map, GV_POINT);
145
106
if (nsites < np) {
146
G_fatal_error("Sites found: %i\nMore partitions than sites.", nsites);
107
G_fatal_error(_("More partitions than points (%d)"), nsites);
149
Vect_set_fatal_error(GV_FATAL_PRINT);
150
Vect_open_new(&Out, out_opt->answer, Vect_is_3d(&In));
152
Vect_copy_head_data(&In, &Out);
153
Vect_hist_copy(&In, &Out);
154
Vect_hist_command(&Out);
156
/* Copy vector lines */
157
Vect_copy_map_lines(&In, &Out);
160
if (Vect_copy_tables(&In, &Out, 0))
161
G_warning(_("Failed to copy attribute table to output map"));
164
111
db_init_string(&sql);
166
113
/* Check if there is a database for output */
167
nlinks = Vect_get_num_dblinks(&Out);
114
nlinks = Vect_get_num_dblinks(&Map);
169
Fi = Vect_default_field_info(&Out, 1, NULL, GV_1TABLE);
116
Fi = Vect_default_field_info(&Map, layer, NULL, GV_1TABLE);
171
Fi = Vect_get_field(&Out, 1);
118
Fi = Vect_get_field(&Map, layer);
172
119
if (Fi == NULL) {
173
Vect_delete(Out.name);
174
120
G_fatal_error(_("Unable to get layer info for vector map <%s>"),
178
124
Driver = db_start_driver_open_database(Fi->driver, Fi->database);
179
125
if (Driver == NULL) {
180
Vect_delete(Out.name);
181
126
G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
182
127
Fi->database, Fi->driver);
129
db_set_error_handler_driver(Driver);
186
133
sprintf(buf, "create table %s (%s integer, %s integer)", Fi->table,
187
134
Fi->key, col_opt->answer);
189
sprintf(buf, "alter table %s add column %s integer", Fi->table,
192
db_set_string(&sql, buf);
194
G_debug(3, "SQL: %s", db_get_string(&sql));
196
if (db_execute_immediate(Driver, &sql) != DB_OK) {
197
Vect_delete(Out.name);
198
G_fatal_error(_("Cannot alter table: %s"), db_get_string(&sql));
201
Vect_map_add_dblink(&Out, Fi->number, Fi->name, Fi->table, Fi->key,
136
dbColumn *column = NULL;
138
/* check if column exists */
139
db_get_column(Driver, Fi->table, col_opt->answer, &column);
141
int sqltype = db_get_column_sqltype(column);
142
int ctype = db_sqltype_to_Ctype(sqltype);
144
if (ctype != DB_C_TYPE_INT && ctype != DB_C_TYPE_DOUBLE) {
145
G_fatal_error(_("Column <%s> already exists but is not numeric"),
150
sprintf(buf, "alter table %s add column %s integer", Fi->table,
155
db_set_string(&sql, buf);
157
G_debug(3, "SQL: %s", db_get_string(&sql));
159
if (db_execute_immediate(Driver, &sql) != DB_OK) {
160
G_fatal_error(_("Unable to alter table: %s"), db_get_string(&sql));
165
Vect_set_open_level(1);
167
if (Vect_open_update_head(&Map, map_opt->answer, G_mapset()) < 1)
168
G_fatal_error(_("Unable to modify vector map stored in other mapset"));
169
Vect_map_add_dblink(&Map, layer, Fi->name, Fi->table, Fi->key,
202
170
Fi->database, Fi->driver);
205
* make histogram of number sites in each test partition since the
206
* number of sites will not always be a multiple of the number of
207
* partitions. make_histo() returns the maximum bin size.
209
n = make_histo(&p, np, nsites);
211
nlines = Vect_get_num_lines(&In);
212
pnt_part = (int *)G_calloc(nlines + 1, sizeof(int));
214
maxdist = 1.1 * sqrt(pow(box.E - box.W, 2.0) + pow(box.N - box.S, 2.0));
216
/* Assign fold to each point */
217
for (i = 0; i < np; ++i) { /* for each partition */
218
for (j = 0; j < p[i]; ++j) { /* create p[i] random points */
222
east = rng() / max * (box.E - box.W) + box.W;
223
north = rng() / max * (box.N - box.S) + box.S;
225
G_debug(3, "east = %f north = %f", east, north);
228
for (line = 1; line <= nlines; line++) {
232
if (pnt_part[line] > 0)
235
type = Vect_read_line(&In, Points, Cats, line);
237
if (!(type & GV_POINT))
240
cur_dist = hypot(Points->x[0] - east, Points->y[0] - north);
242
if (nearest < 1 || cur_dist < dist) {
248
G_debug(3, "nearest = %d", nearest);
251
if (nearest > 0) { /* shopuld be always */
254
Vect_read_line(&In, Points, Cats, nearest);
255
Vect_cat_get(Cats, 1, &cat);
258
sprintf(buf, "insert into %s (%s, %s) values (%d, %d)",
259
Fi->table, Fi->key, col_opt->answer, cat, i + 1);
261
sprintf(buf, "update %s set %s = %d where %s = %d",
262
Fi->table, col_opt->answer, i + 1, Fi->key, cat);
264
db_set_string(&sql, buf);
266
G_debug(3, "SQL: %s", db_get_string(&sql));
268
if (db_execute_immediate(Driver, &sql) != DB_OK) {
269
Vect_delete(Out.name);
270
G_fatal_error(_("Unable to insert row: %s"),
271
db_get_string(&sql));
273
pnt_part[nearest] = i + 1;
173
if (db_create_index2(Driver, Fi->table, Fi->key) !=
175
G_warning(_("Cannot create index"));
177
if (db_grant_on_table(Driver, Fi->table, DB_PRIV_SELECT,
178
DB_GROUP | DB_PUBLIC) != DB_OK)
179
G_warning(_("Cannot grant privileges on table %s"),
182
G_important_message(_("Select privileges were granted on the table"));
184
Vect_set_open_level(2);
185
if (Vect_open_old2(&Map, map_opt->answer, G_mapset(), field_opt->answer) < 2) {
186
G_fatal_error(_("Unable to open vector map <%s> at topological level %d"),
191
/* nlines - count of records */
192
nlines = Vect_get_num_lines(&Map);
194
/* last_pidx - current maximal index of partition array */
197
/* minimum number of features in one partition */
198
min_count = nsites / np;
199
G_debug(1, "min count: %d", min_count);
201
/* number of partitions that need min_count + 1 features */
202
spill = nsites - np * min_count; /* nsites % np, but modulus is evil */
203
G_debug(1, "spill: %d", spill);
205
/* array of available partitions */
206
part = (struct partition *)G_calloc(np, sizeof(struct partition));
208
G_fatal_error(_("Out of memory"));
211
line_idx = (int *)G_calloc(nlines, sizeof(int));
213
/* initialization of arrays */
214
for (p = 0; p < np; p++) {
215
part[p].id = p + 1; /* partition ids */
217
part[p].max = min_count;
219
for (p = 0; p < spill; p++) {
223
for (i = 0; i < nlines; i++)
226
/* proper randomization requires a 2-step randomization
228
* randomize partition assignment
229
* looping sequentially through all points creates a bias */
231
/* process all points */
232
db_begin_transaction(Driver);
237
G_percent(nlines - lines_left, nlines, 4);
241
idx = rng() * lines_left;
242
/* in case rng() returns 1.0: */
243
} while (idx >= lines_left);
245
line = line_idx[idx];
247
line_idx[idx] = line_idx[lines_left];
249
if (!Vect_line_alive(&Map, line))
252
type = Vect_read_line(&Map, Points, Cats, line);
254
if (!(type & GV_POINT))
257
/* random partition for current point */
259
p = rng() * (last_pidx + 1);
260
/* in case rng() returns 1.0: */
261
} while (p > last_pidx);
263
G_debug(3, "partition id = %d", part[p].id);
265
Vect_cat_get(Cats, 1, &cat);
267
G_warning(_("No category for line %d in layer %d"), line, layer);
271
sprintf(buf, "insert into %s (%s, %s) values (%d, %d)",
272
Fi->table, Fi->key, col_opt->answer, cat,
275
sprintf(buf, "update %s set %s = %d where %s = %d",
276
Fi->table, col_opt->answer, part[p].id,
279
db_set_string(&sql, buf);
281
G_debug(3, "SQL: %s", db_get_string(&sql));
283
if (db_execute_immediate(Driver, &sql) != DB_OK) {
284
G_fatal_error(_("Unable to insert row: %s"),
285
db_get_string(&sql));
288
/* increase count of features in partition */
290
/* if the partition is full */
291
if (part[p].count >= part[p].max) {
292
/* move last partition to current position */
294
part[p] = part[last_pidx];
296
/* disable last partition */
299
if (last_pidx < 0 && lines_left) {
300
G_fatal_error(_("internal error"));
306
db_commit_transaction(Driver);
281
307
db_close_database_shutdown_driver(Driver);
309
Vect_set_db_updated(&Map);
286
312
exit(EXIT_SUCCESS);