1
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
5
// Package csv reads and writes comma-separated values (CSV) files.
7
// A csv file contains zero or more records of one or more fields per record.
8
// Each record is separated by the newline character. The final record may
9
// optionally be followed by a newline character.
11
// field1,field2,field3
13
// White space is considered part of a field.
15
// Carriage returns before newline characters are silently removed.
17
// Blank lines are ignored. A line with only whitespace characters (excluding
18
// the ending newline character) is not considered a blank line.
20
// Fields which start and stop with the quote character " are called
21
// quoted-fields. The beginning and ending quote are not part of the
26
// normal string,"quoted-field"
28
// results in the fields
30
// {`normal string`, `quoted-field`}
32
// Within a quoted-field a quote character followed by a second quote
33
// character is considered a single quote.
35
// "the ""word"" is true","a ""quoted-field"""
39
// {`the "word" is true`, `a "quoted-field"`}
41
// Newlines and commas may be included in a quoted-field
44
// field","comma is ,"
49
// field`, `comma is ,`}
61
// A ParseError is returned for parsing errors.
62
// The first line is 1. The first column is 0.
63
type ParseError struct {
64
Line int // Line where the error occurred
65
Column int // Column (rune index) where the error occurred
66
Error os.Error // The actual error
69
func (e *ParseError) String() string {
70
return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Error)
73
// These are the errors that can be returned in ParseError.Error
75
ErrTrailingComma = os.NewError("extra delimiter at end of line")
76
ErrBareQuote = os.NewError("bare \" in non-quoted-field")
77
ErrQuote = os.NewError("extraneous \" in field")
78
ErrFieldCount = os.NewError("wrong number of fields in line")
81
// A Reader reads records from a CSV-encoded file.
83
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
84
// The exported fields can be changed to customize the details before the
85
// first call to Read or ReadAll.
87
// Comma is the field delimiter. It defaults to ','.
89
// Comment, if not 0, is the comment character. Lines beginning with the
90
// Comment character is ignored.
92
// If FieldsPerRecord is positive, Read requires each record to
93
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
94
// the number of fields in the first record, so that future records must
95
// have the same field count.
97
// If LazyQuotes is true, a quote may appear in an unquoted field and a
98
// non-doubled quote may appear in a quoted field.
100
// If TrailingComma is true, the last field may be a unquoted empty field.
102
// If TrimLeadingSpace is true, leading white space in a field is ignored.
104
Comma int // Field delimiter (set to ',' by NewReader)
105
Comment int // Comment character for start of line
106
FieldsPerRecord int // Number of expected fields per record
107
LazyQuotes bool // Allow lazy quotes
108
TrailingComma bool // Allow trailing comma
109
TrimLeadingSpace bool // Trim leading space
116
// NewReader returns a new Reader that reads from r.
117
func NewReader(r io.Reader) *Reader {
120
r: bufio.NewReader(r),
124
// error creates a new ParseError based on err.
125
func (r *Reader) error(err os.Error) os.Error {
133
// Read reads one record from r. The record is a slice of strings with each
134
// string representing one field.
135
func (r *Reader) Read() (record []string, err os.Error) {
137
record, err = r.parseRecord()
146
if r.FieldsPerRecord > 0 {
147
if len(record) != r.FieldsPerRecord {
148
r.column = 0 // report at start of record
149
return record, r.error(ErrFieldCount)
151
} else if r.FieldsPerRecord == 0 {
152
r.FieldsPerRecord = len(record)
157
// ReadAll reads all the remaining records from r.
158
// Each record is a slice of fields.
159
func (r *Reader) ReadAll() (records [][]string, err os.Error) {
161
record, err := r.Read()
168
records = append(records, record)
173
// readRune reads one rune from r, folding \r\n to \n and keeping track
174
// of our far into the line we have read. r.column will point to the start
175
// of this rune, not the end of this rune.
176
func (r *Reader) readRune() (int, os.Error) {
177
rune, _, err := r.r.ReadRune()
179
// Handle \r\n here. We make the simplifying assumption that
180
// anytime \r is followed by \n that it can be folded to \n.
181
// We will not detect files which contain both \r\n and bare \n.
183
rune, _, err = r.r.ReadRune()
195
// unreadRune puts the last rune read from r back.
196
func (r *Reader) unreadRune() {
201
// skip reads runes up to and including the rune delim or until error.
202
func (r *Reader) skip(delim int) os.Error {
204
rune, err := r.readRune()
215
// parseRecord reads and parses a single csv record from r.
216
func (r *Reader) parseRecord() (fields []string, err os.Error) {
217
// Each record starts on a new line. We increment our line
218
// number (lines start at 1, not 0) and set column to -1
219
// so as we increment in readRune it points to the character we read.
223
// Peek at the first rune. If it is an error we are done.
224
// If we are support comments and it is the comment character
225
// the skip to the end of line.
227
rune, _, err := r.r.ReadRune()
232
if r.Comment != 0 && rune == r.Comment {
233
return nil, r.skip('\n')
237
// At this point we have at least one field.
239
haveField, delim, err := r.parseField()
241
fields = append(fields, r.field.String())
243
if delim == '\n' || err == os.EOF {
245
} else if err != nil {
253
// parseField parses the next field in the record. The read field is
254
// located in r.field. Delim is the first character not part of the field
255
// (r.Comma or '\n').
256
func (r *Reader) parseField() (haveField bool, delim int, err os.Error) {
259
rune, err := r.readRune()
261
// If we have EOF and are not at the start of a line
262
// then we return the empty field. We have already
263
// checked for trailing commas if needed.
264
if err == os.EOF && r.column != 0 {
270
if r.TrimLeadingSpace {
271
for unicode.IsSpace(rune) {
272
rune, err = r.readRune()
284
// We are a trailing empty field or a blank linke
286
return false, rune, nil
288
return true, rune, nil
294
rune, err = r.readRune()
300
return false, 0, r.error(ErrQuote)
306
rune, err = r.readRune()
307
if err != nil || rune == r.Comma {
311
return true, rune, nil
316
return false, 0, r.error(ErrQuote)
318
// accept the bare quote
319
r.field.WriteRune('"')
325
r.field.WriteRune(rune)
331
r.field.WriteRune(rune)
332
rune, err = r.readRune()
333
if err != nil || rune == r.Comma {
337
return true, rune, nil
339
if !r.LazyQuotes && rune == '"' {
340
return false, 0, r.error(ErrBareQuote)
352
if !r.TrailingComma {
353
// We don't allow trailing commas. See if we
354
// are at the end of the line (being mindful
357
rune, err = r.readRune()
358
if r.TrimLeadingSpace {
359
for unicode.IsSpace(rune) {
360
rune, err = r.readRune()
366
if err == os.EOF || rune == '\n' {
367
r.column = c // report the comma
368
return false, 0, r.error(ErrTrailingComma)
372
return true, rune, nil