1
// Copyright 2016 Feather Developers
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
15
#include "feather/writer.h"
20
#include "feather/common.h"
21
#include "feather/status.h"
25
TableWriter::TableWriter() :
26
initialized_stream_(false) {}
28
Status TableWriter::Open(const std::shared_ptr<OutputStream>& stream) {
33
Status TableWriter::OpenFile(const std::string& abspath,
34
std::unique_ptr<TableWriter>* out) {
35
auto stream = std::unique_ptr<FileOutputStream>(new FileOutputStream());
36
RETURN_NOT_OK(stream->Open(abspath));
37
std::shared_ptr<OutputStream> sink(stream.release());
38
out->reset(new TableWriter());
39
return (*out)->Open(sink);
42
void TableWriter::SetDescription(const std::string& desc) {
43
metadata_.SetDescription(desc);
46
void TableWriter::SetNumRows(int64_t num_rows) {
47
metadata_.SetNumRows(num_rows);
50
Status TableWriter::Init() {
51
return stream_->Write(reinterpret_cast<const uint8_t*>(FEATHER_MAGIC_BYTES),
52
strlen(FEATHER_MAGIC_BYTES));
55
Status TableWriter::Finalize() {
56
if (!initialized_stream_) {
57
RETURN_NOT_OK(Init());
61
auto buffer = metadata_.GetBuffer();
64
RETURN_NOT_OK(stream_->Write(buffer->data(), buffer->size()));
66
uint32_t buffer_size = buffer->size();
68
// Footer: metadata length, magic bytes
69
RETURN_NOT_OK(stream_->Write(reinterpret_cast<const uint8_t*>(&buffer_size),
71
RETURN_NOT_OK(stream_->Write(
72
reinterpret_cast<const uint8_t*>(FEATHER_MAGIC_BYTES),
73
strlen(FEATHER_MAGIC_BYTES)));
75
return stream_->Close();
78
Status TableWriter::AppendPrimitive(const PrimitiveArray& values,
79
ArrayMetadata* meta) {
80
if (!initialized_stream_) {
81
RETURN_NOT_OK(Init());
83
meta->type = values.type;
84
meta->encoding = Encoding::PLAIN;
85
meta->offset = stream_->Tell();
86
meta->length = values.length;
87
meta->null_count = values.null_count;
88
meta->total_bytes = 0;
90
// Write the null bitmask
91
if (values.null_count > 0) {
92
// We assume there is one bit for each value in values.nulls, aligned on a
93
// byte boundary, and we write this much data into the stream
94
size_t null_bytes = util::bytes_for_bits(values.length);
96
RETURN_NOT_OK(stream_->Write(values.nulls, null_bytes));
97
meta->total_bytes += null_bytes;
100
size_t value_byte_size = ByteSize(values.type);
103
if (IsVariableLength(values.type)) {
104
size_t offset_bytes = sizeof(int32_t) * (values.length + 1);
106
values_bytes = values.offsets[values.length] * value_byte_size;
108
// Write the variable-length offsets
109
RETURN_NOT_OK(stream_->Write(reinterpret_cast<const uint8_t*>(values.offsets),
111
meta->total_bytes += offset_bytes;
113
if (values.type == PrimitiveType::BOOL) {
114
// Booleans are bit-packed
115
values_bytes = util::bytes_for_bits(values.length);
117
values_bytes = values.length * value_byte_size;
120
RETURN_NOT_OK(stream_->Write(values.values, values_bytes));
121
meta->total_bytes += values_bytes;
126
Status TableWriter::AppendPlain(const std::string& name,
127
const PrimitiveArray& values) {
128
// Prepare metadata payload
130
AppendPrimitive(values, &meta);
132
// Append the metadata
133
auto meta_builder = metadata_.AddColumn(name);
134
meta_builder->SetValues(meta);
135
meta_builder->Finish();
140
Status TableWriter::AppendCategory(const std::string& name,
141
const PrimitiveArray& values,
142
const PrimitiveArray& levels, bool ordered) {
144
if (!IsInteger(values.type)) {
145
return Status::Invalid("Category values must be integers");
148
ArrayMetadata values_meta, levels_meta;
150
AppendPrimitive(values, &values_meta);
151
AppendPrimitive(levels, &levels_meta);
153
auto meta_builder = metadata_.AddColumn(name);
154
meta_builder->SetValues(values_meta);
155
meta_builder->SetCategory(levels_meta, ordered);
156
meta_builder->Finish();
161
Status TableWriter::AppendTimestamp(const std::string& name,
162
const PrimitiveArray& values,
163
const TimestampMetadata& meta) {
165
if (values.type != PrimitiveType::INT64)
166
return Status::Invalid("Timestamp values must be INT64");
168
ArrayMetadata values_meta;
169
AppendPrimitive(values, &values_meta);
171
auto meta_builder = metadata_.AddColumn(name);
172
meta_builder->SetValues(values_meta);
173
meta_builder->SetTimestamp(meta.unit, meta.timezone);
174
meta_builder->Finish();
178
Status TableWriter::AppendTime(const std::string& name, const PrimitiveArray& values,
179
const TimeMetadata& meta) {
181
if (values.type != PrimitiveType::INT64)
182
return Status::Invalid("Timestamp values must be INT64");
184
ArrayMetadata values_meta;
185
AppendPrimitive(values, &values_meta);
187
auto meta_builder = metadata_.AddColumn(name);
188
meta_builder->SetValues(values_meta);
189
meta_builder->SetTime(meta.unit);
190
meta_builder->Finish();
194
Status TableWriter::AppendDate(const std::string& name,
195
const PrimitiveArray& values) {
197
if (values.type != PrimitiveType::INT32)
198
return Status::Invalid("Date values must be INT32");
200
ArrayMetadata values_meta;
201
AppendPrimitive(values, &values_meta);
203
auto meta_builder = metadata_.AddColumn(name);
204
meta_builder->SetValues(values_meta);
205
meta_builder->SetDate();
206
meta_builder->Finish();
210
} // namespace feather