1
//////////////////////////////////////////////////////////////////////////
3
// pgAdmin III - PostgreSQL Tools
5
// Copyright (C) 2002 - 2009, The pgAdmin Development Team
6
// This software is released under the BSD Licence
8
// csvfiles.cpp - CSV file parsing
10
//////////////////////////////////////////////////////////////////////////
13
#include "utils/sysLogger.h"
14
#include "utils/csvfiles.h"
16
// PostgreSQL and GPDB now support CSV format logs.
17
// So, we need a way to parse the CSV files into lines, and lines into tokens (fields).
19
bool CSVTokenizer::HasMoreTokens() const
21
if ( m_string.length() > 0)
23
if ( m_pos >= m_string.length())
26
if ( m_string.find_first_not_of(wxT(','), m_pos) != wxString::npos )
27
// there are non delimiter characters left, so we do have more tokens
30
if (m_string[m_pos] == wxT('\n'))
33
return m_pos == 0 && !m_string.empty();
36
wxString CSVTokenizer::GetNextToken()
40
if ( !HasMoreTokens() )
43
// skip leading blanks if not quoted.
44
while (m_pos < m_string.length() && m_string[m_pos] == wxT(' '))
47
// Are we a quoted field? Must handle this special.
48
bool quoted_string = (m_string[m_pos] == wxT('\"'));
53
// find the end of this token.
54
for (; pos < m_string.length(); pos++)
56
if (quoted_string && m_string[pos] == wxT('\"'))
61
// Check to see if we have found the end of this token.
62
// Tokens normally end with a ',' delimiter.
63
if (m_string[pos] == wxT(','))
66
// Last token is delimited by '\n' or by end of string.
67
if (m_string[pos] == wxT('\n') && pos == m_string.length()-1)
72
if (quoted_string && !inquote)
74
token.assign(m_string, m_pos + 1, pos - m_pos - 2); // Remove leading and trailing quotes
76
// Remove double doublequote chars, replace with single doublequote chars
77
token.Replace(wxT("\"\""),wxT("\""),true);
80
token.assign(m_string, m_pos, pos - m_pos);
82
if (quoted_string && inquote)
83
wxLogNotice(wxT("unterminated double quoted string: %s\n"), token.c_str());
85
m_pos = pos + 1; // Skip token and delimiter
87
if (m_pos > m_string.length()) // Perhaps no delimiter if at end of string if orig string didn't have '\n'.
88
m_pos = m_string.length();
93
bool CSVLineTokenizer::HasMoreLines() const
95
if ( m_string.find_first_not_of(wxT('\n'), m_pos) != wxString::npos )
96
// there are non line-end characters left, so we do have more lines
101
wxString CSVLineTokenizer::GetNextLine(bool & partial)
106
if ( !HasMoreLines() )
109
// find the end of this line. CSV lines end in "\n", but
110
// CSV lines may have "\n" chars inside double-quoted strings, so we need to find that out.
112
bool inquote = false;
113
for (size_t pos = m_pos; pos < m_string.length(); pos++)
115
if (m_string[pos] == wxT('\"'))
118
if (m_string[pos] == wxT('\n') && !inquote)
120
// Good, we found a complete log line terminated
121
// by "\n", and the "\n" wasn't in a quoted string.
123
size_t len = pos - m_pos + 1; // return the line, including the trailing "\n"
124
token.assign(m_string, m_pos, len);
125
m_pos = pos + 1; // point to next line.
131
// no more delimiters, so the line is everything till the end of
132
// string, but we don't have all of the CSV the line... Some must still be coming.
134
token.assign(m_string, m_pos, wxString::npos);
137
m_pos = m_string.length();