~ubuntu-branches/ubuntu/oneiric/postgis/oneiric-proposed

« back to all changes in this revision

Viewing changes to loader/shp2pgsql-core.c

  • Committer: Bazaar Package Importer
  • Author(s): Alan Boudreault
  • Date: 2010-09-29 09:16:10 UTC
  • mfrom: (1.1.12 upstream)
  • Revision ID: james.westby@ubuntu.com-20100929091610-vj4efw8woq34hdn7
Tags: 1.5.2-1
* New upstream release, with a few bug fixes.
* Added shp2pgsql-gui binary.
* Removed patches, applied upstream: getopt.    

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/**********************************************************************
2
 
 * $Id: shp2pgsql-core.c 5195 2010-02-03 22:42:13Z pramsey $
 
2
 * $Id: shp2pgsql-core.c 5989 2010-09-19 16:54:59Z mcayland $
3
3
 *
4
4
 * PostGIS - Spatial Types for PostgreSQL
5
5
 * http://postgis.refractions.net
37
37
 
38
38
 
39
39
/*
40
 
 * Internal functions
41
 
 */
42
 
 
43
 
char *utf8(const char *fromcode, char *inputbuf);
44
 
void vasbappend(stringbuffer_t *sb, char *fmt, ... );
 
40
* Internal return values
 
41
*/
 
42
 
 
43
#define UTF8_GOOD_RESULT 0
 
44
#define UTF8_BAD_RESULT 1
 
45
#define UTF8_NO_RESULT 2
 
46
 
 
47
/*
 
48
* Only turn this on if you want to skip bad UTF8 charaters. Not a good
 
49
* idea, generally, as "bad characters" usually indicate that the source
 
50
* encoding is *not* UTF8
 
51
*/
 
52
#define UTF8_DROP_BAD_CHARACTERS 0
 
53
 
 
54
 
 
55
int utf8(const char *fromcode, char *inputbuf, char **outputbuf);
45
56
char *escape_copy_string(char *str);
46
57
char *escape_insert_string(char *str);
47
58
 
76
87
}
77
88
 
78
89
/* Return allocated string containing UTF8 string converted from encoding fromcode */
79
 
char *
80
 
utf8(const char *fromcode, char *inputbuf)
 
90
int utf8(const char *fromcode, char *inputbuf, char **outputbuf)
81
91
{
82
92
        iconv_t cd;
83
93
        char *outputptr;
84
 
        char *outputbuf;
85
94
        size_t outbytesleft;
86
95
        size_t inbytesleft;
87
96
 
89
98
 
90
99
        cd = iconv_open("UTF-8", fromcode);
91
100
        if ( cd == ((iconv_t)(-1)) )
92
 
                return NULL;
 
101
                return UTF8_NO_RESULT;
93
102
 
94
103
        outbytesleft = inbytesleft * 3 + 1; /* UTF8 string can be 3 times larger */
95
 
        /* then local string */
96
 
        outputbuf = (char *)malloc(outbytesleft);
97
 
        if (!outputbuf)
98
 
                return NULL;
99
 
 
100
 
        memset(outputbuf, 0, outbytesleft);
101
 
        outputptr = outputbuf;
102
 
 
103
 
        if (-1 == iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft))
104
 
                return NULL;
105
 
 
106
 
        iconv_close (cd);
107
 
 
108
 
        return outputbuf;
 
104
 
 
105
        *outputbuf = (char *)malloc(outbytesleft);
 
106
        if (!(*outputbuf))
 
107
                return UTF8_NO_RESULT;
 
108
 
 
109
    /* Clean out the buffer */
 
110
        memset(*outputbuf, 0, outbytesleft);
 
111
        outputptr = *outputbuf;
 
112
 
 
113
    /* Does this string convert cleanly? */
 
114
        if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 )
 
115
        {
 
116
#ifdef HAVE_ICONVCTL
 
117
        int on = 1;
 
118
            /* No. Try to convert it while transliterating. */
 
119
        iconvctl(cd, ICONV_SET_TRANSLITERATE, &on);
 
120
        if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 )
 
121
        {
 
122
                /* No. Try to convert it while discarding errors. */
 
123
            iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &on);
 
124
                if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 )
 
125
                {
 
126
                /* Still no. Throw away the buffer and return. */
 
127
                free(*outputbuf);
 
128
                iconv_close(cd);
 
129
                return UTF8_NO_RESULT;
 
130
            }
 
131
        }
 
132
        iconv_close(cd);
 
133
        return UTF8_BAD_RESULT;
 
134
#else
 
135
        free(*outputbuf);
 
136
        iconv_close(cd);
 
137
        return UTF8_NO_RESULT;        
 
138
#endif
 
139
    }
 
140
    /* Return a good result, converted string is in buffer. */
 
141
        iconv_close(cd);
 
142
    return UTF8_GOOD_RESULT;
109
143
}
110
144
 
111
145
/**
217
251
 
218
252
 
219
253
/**
 
254
 * Escape strings that are to be used as part of a PostgreSQL connection string. If no 
 
255
 * characters require escaping, simply return the input pointer. Otherwise return a 
 
256
 * new allocated string.
 
257
 */
 
258
char *
 
259
escape_connection_string(char *str)
 
260
{
 
261
        /*
 
262
         * Escape apostrophes and backslashes:
 
263
         *   ' -> \'
 
264
         *   \ -> \\
 
265
         *
 
266
         * 1. find # of characters
 
267
         * 2. make new string
 
268
         */
 
269
 
 
270
        char *result;
 
271
        char *ptr, *optr;
 
272
        int toescape = 0;
 
273
        size_t size;
 
274
 
 
275
        ptr = str;
 
276
 
 
277
        /* Count how many characters we need to escape so we know the size of the string we need to return */
 
278
        while (*ptr)
 
279
        {
 
280
                if (*ptr == '\'' || *ptr == '\\')
 
281
                        toescape++;
 
282
 
 
283
                ptr++;
 
284
        }
 
285
 
 
286
        /* If we don't have to escape anything, simply return the input pointer */
 
287
        if (toescape == 0)
 
288
                return str;
 
289
 
 
290
        size = ptr - str + toescape + 1;
 
291
        result = calloc(1, size);
 
292
        optr = result;
 
293
        ptr = str;
 
294
 
 
295
        while (*ptr)
 
296
        {
 
297
                if (*ptr == '\'' || *ptr == '\\')
 
298
                        *optr++ = '\\';
 
299
 
 
300
                *optr++ = *ptr++;
 
301
        }
 
302
 
 
303
        *optr = '\0';
 
304
 
 
305
        return result;
 
306
}
 
307
 
 
308
 
 
309
/**
220
310
 * @brief Generate an allocated geometry string for shapefile object obj using the state parameters
221
311
 */
222
312
int
406
496
        if (state->config->simple_geometries == 0)
407
497
        {
408
498
                lwcollection = lwcollection_construct(MULTILINETYPE, state->config->sr_id, NULL, obj->nParts, lwmultilinestrings);
 
499
 
 
500
                /* When outputting wkt rather than wkb, we need to remove the SRID from the inner geometries */
 
501
                if (state->config->hwgeom)
 
502
                {
 
503
                        for (u = 0; u < obj->nParts; u++)
 
504
                                lwmultilinestrings[u]->SRID = -1;
 
505
                }
 
506
 
409
507
                serialized_lwgeom = lwgeom_serialize(lwcollection_as_lwgeom(lwcollection));
410
508
        }
411
509
        else
762
860
        if (state->config->simple_geometries == 0)
763
861
        {
764
862
                lwcollection = lwcollection_construct(MULTIPOLYGONTYPE, state->config->sr_id, NULL, polygon_total, lwpolygons);
 
863
 
 
864
                /* When outputting wkt rather than wkb, we need to remove the SRID from the inner geometries */
 
865
                if (state->config->hwgeom)
 
866
                {
 
867
                        for (u = 0; u < pi; u++)
 
868
                                lwpolygons[u]->SRID = -1;
 
869
                }
 
870
 
765
871
                serialized_lwgeom = lwgeom_serialize(lwcollection_as_lwgeom(lwcollection));
766
872
        }
767
873
        else
875
981
        state->hSHPHandle = NULL;
876
982
        state->hDBFHandle = NULL;
877
983
        state->wkbtype = 0;
 
984
    state->types = NULL;
 
985
    state->widths = NULL;
 
986
    state->precisions = NULL;
 
987
    state->col_names = NULL;
878
988
 
879
989
        return state;
880
990
}
1147
1257
 
1148
1258
                if (state->config->encoding)
1149
1259
                {
1150
 
                        /* If we are converting from another encoding to UTF8, convert the field name to UTF8 */
1151
 
                        utf8str = utf8(state->config->encoding, name);
1152
 
                        if (!utf8str)
 
1260
            static char *encoding_msg = "Try \"LATIN1\" (Western European), or one of the values described at http://www.postgresql.org/docs/current/static/multibyte.html.";
 
1261
 
 
1262
            int rv = utf8(state->config->encoding, name, &utf8str);
 
1263
                                                
 
1264
                        if (rv != UTF8_GOOD_RESULT)
1153
1265
                        {
1154
 
                                snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name \"%s\" to UTF-8: iconv reports \"%s\"", name, strerror(errno));
 
1266
                if( rv == UTF8_BAD_RESULT )
 
1267
                                    snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name \"%s\" to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", utf8str, strerror(errno), state->config->encoding, encoding_msg);
 
1268
                            else if( rv == UTF8_NO_RESULT )
 
1269
                                    snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", strerror(errno), state->config->encoding, encoding_msg);
 
1270
                                else 
 
1271
                                    snprintf(state->message, SHPLOADERMSGLEN, "Unexpected return value from utf8()");
 
1272
 
 
1273
                if( rv == UTF8_BAD_RESULT )
 
1274
                                free(utf8str);
 
1275
 
1155
1276
                                return SHPLOADERERR;
1156
1277
                        }
1157
1278
 
1463
1584
        stringbuffer_t *sbwarn;
1464
1585
        char val[MAXVALUELEN];
1465
1586
        char *escval;
1466
 
        char *geometry, *ret;
 
1587
        char *geometry=NULL, *ret;
1467
1588
        char *utf8str;
1468
1589
        int res, i;
1469
1590
 
1574
1695
 
1575
1696
                        if (state->config->encoding)
1576
1697
                        {
 
1698
                static char *encoding_msg = "Try \"LATIN1\" (Western European), or one of the values described at http://www.postgresql.org/docs/current/static/multibyte.html.";
1577
1699
                                /* If we are converting from another encoding to UTF8, convert the field value to UTF8 */
1578
 
                                utf8str = utf8(state->config->encoding, val);
1579
 
                                if (!utf8str)
1580
 
                                {
1581
 
                                        snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field value \"%s\" to UTF-8: iconv reports \"%s\"", val, strerror(errno));
1582
 
                                        return SHPLOADERERR;
1583
 
                                }
1584
 
 
1585
 
                                strncpy(val, utf8str, MAXVALUELEN);
1586
 
                                free(utf8str);
 
1700
                                int rv = utf8(state->config->encoding, val, &utf8str);
 
1701
                if ( !UTF8_DROP_BAD_CHARACTERS && rv != UTF8_GOOD_RESULT )
 
1702
                {
 
1703
                    if( rv == UTF8_BAD_RESULT )
 
1704
                                            snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert data value \"%s\" to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", utf8str, strerror(errno), state->config->encoding, encoding_msg);
 
1705
                                    else if( rv == UTF8_NO_RESULT )
 
1706
                                            snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert data value to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", strerror(errno), state->config->encoding, encoding_msg);
 
1707
                                        else 
 
1708
                                            snprintf(state->message, SHPLOADERMSGLEN, "Unexpected return value from utf8()");
 
1709
 
 
1710
                    if( rv == UTF8_BAD_RESULT )
 
1711
                                        free(utf8str);
 
1712
                            
 
1713
                        return SHPLOADERERR;
 
1714
                }
 
1715
                                /* Optionally (compile-time) suppress bad UTF8 values */
 
1716
                                if ( UTF8_DROP_BAD_CHARACTERS && rv != UTF8_GOOD_RESULT )
 
1717
                                {
 
1718
                                        val[0] = '.';
 
1719
                                        val[1] = '\0';
 
1720
                                }
 
1721
 
 
1722
                                
 
1723
                                /* The utf8str buffer is only alloc'ed if the UTF8 conversion works */
 
1724
                                if ( rv == UTF8_GOOD_RESULT )
 
1725
                                {
 
1726
                                        strncpy(val, utf8str, MAXVALUELEN);
 
1727
                                        free(utf8str);
 
1728
                                }
1587
1729
                        }
1588
1730
 
1589
1731
                        /* Escape attribute correctly according to dump format */
1689
1831
 
1690
1832
                                return SHPLOADERERR;
1691
1833
                        }
1692
 
                }
1693
 
 
1694
 
 
1695
 
                /* Now generate the geometry string according to the current configuration */
1696
 
                if (state->config->hwgeom)
1697
 
                {
1698
 
                        /* Old-style hwgeom (WKT) */
1699
 
                        if (!state->config->dump_format)
1700
 
                                vasbappend(sb, "GeomFromText('");
 
1834
 
 
1835
 
 
1836
                        /* Now generate the geometry string according to the current configuration */
 
1837
                        if (state->config->hwgeom)
 
1838
                        {
 
1839
                                /* Old-style hwgeom (WKT) */
 
1840
                                if (!state->config->dump_format)
 
1841
                                        vasbappend(sb, "GeomFromText('");
 
1842
                                else
 
1843
                                {
 
1844
                                        /* Output SRID if relevant */
 
1845
                                        if (state->config->sr_id != 0)
 
1846
                                                vasbappend(sb, "SRID=%d;", state->config->sr_id);
 
1847
                                }
 
1848
 
 
1849
                                vasbappend(sb, "%s", geometry);
 
1850
 
 
1851
                                if (!state->config->dump_format)
 
1852
                                {
 
1853
                                        vasbappend(sb, "'");
 
1854
 
 
1855
                                        /* Output SRID if relevant */
 
1856
                                        if (state->config->sr_id != 0)
 
1857
                                                vasbappend(sb, ", %d)", state->config->sr_id);
 
1858
                                        else
 
1859
                                                vasbappend(sb, ")");
 
1860
                                }
 
1861
                        }
1701
1862
                        else
1702
1863
                        {
1703
 
                                /* Output SRID if relevant */
1704
 
                                if (state->config->sr_id != 0)
1705
 
                                        vasbappend(sb, "SRID=%d;", state->config->sr_id);
1706
 
                        }
1707
 
 
1708
 
                        vasbappend(sb, "%s", geometry);
1709
 
 
1710
 
                        if (!state->config->dump_format)
1711
 
                        {
1712
 
                                vasbappend(sb, "'");
1713
 
 
1714
 
                                /* Output SRID if relevant */
1715
 
                                if (state->config->sr_id != 0)
1716
 
                                        vasbappend(sb, ", %d)", state->config->sr_id);
1717
 
                                else
1718
 
                                        vasbappend(sb, ")");
1719
 
                        }
1720
 
                }
1721
 
                else
1722
 
                {
1723
 
                        /* New style lwgeom (HEXEWKB) */
1724
 
                        if (!state->config->dump_format)
1725
 
                                vasbappend(sb, "'");
1726
 
 
1727
 
                        vasbappend(sb, "%s", geometry);
1728
 
 
1729
 
                        if (!state->config->dump_format)
1730
 
                                vasbappend(sb, "'");
 
1864
                                /* New style lwgeom (HEXEWKB) */
 
1865
                                if (!state->config->dump_format)
 
1866
                                        vasbappend(sb, "'");
 
1867
 
 
1868
                                vasbappend(sb, "%s", geometry);
 
1869
 
 
1870
                                if (!state->config->dump_format)
 
1871
                                        vasbappend(sb, "'");
 
1872
                        }
 
1873
 
 
1874
                        free(geometry);
1731
1875
                }
1732
1876
 
1733
1877
                /* Tidy up everything */
1734
1878
                SHPDestroyObject(obj);
1735
 
                free(geometry);
1736
1879
        }
1737
1880
 
1738
1881
        /* Close the line correctly for dump/insert format */