1
# Copyright 2016 Feather Developers
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
7
# http://www.apache.org/licenses/LICENSE-2.0
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
20
from pandas.util.testing import assert_frame_equal
23
from feather.compat import guid
24
from feather import FeatherReader, FeatherWriter
29
return 'feather_{}'.format(guid())
32
class TestFeatherReader(unittest.TestCase):
38
for path in self.test_files:
44
def test_file_not_exist(self):
45
with self.assertRaises(feather.FeatherError):
46
FeatherReader('test_invalid_file')
48
def _check_pandas_roundtrip(self, df, expected=None):
50
self.test_files.append(path)
51
feather.write_dataframe(df, path)
52
if not os.path.exists(path):
53
raise Exception('file not written')
55
result = feather.read_dataframe(path)
59
assert_frame_equal(result, expected)
61
def test_num_rows_attr(self):
62
df = pd.DataFrame({'foo': [1, 2, 3, 4, 5]})
64
self.test_files.append(path)
65
feather.write_dataframe(df, path)
67
reader = feather.FeatherReader(path)
68
assert reader.num_rows == len(df)
72
self.test_files.append(path)
73
feather.write_dataframe(df, path)
75
reader = feather.FeatherReader(path)
76
assert reader.num_rows == 0
78
def test_float_no_nulls(self):
80
numpy_dtypes = ['f4', 'f8']
83
for dtype in numpy_dtypes:
84
values = np.random.randn(num_values)
85
data[dtype] = values.astype(dtype)
87
df = pd.DataFrame(data)
88
self._check_pandas_roundtrip(df)
90
def test_float_nulls(self):
94
self.test_files.append(path)
95
writer = FeatherWriter(path)
97
null_mask = np.random.randint(0, 10, size=num_values) < 3
101
values = np.random.randn(num_values).astype(name)
102
writer.write_array(name, values, null_mask)
104
values[null_mask] = np.nan
106
expected_cols.append(values)
110
ex_frame = pd.DataFrame(dict(zip(dtypes, expected_cols)),
113
result = feather.read_dataframe(path)
114
assert_frame_equal(result, ex_frame)
116
def test_integer_no_nulls(self):
119
numpy_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8']
122
for dtype in numpy_dtypes:
123
info = np.iinfo(dtype)
124
values = np.random.randint(info.min,
125
min(info.max, np.iinfo('i8').max),
127
data[dtype] = values.astype(dtype)
129
df = pd.DataFrame(data)
130
self._check_pandas_roundtrip(df)
132
def test_integer_with_nulls(self):
133
# pandas requires upcast to float dtype
135
self.test_files.append(path)
137
int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8']
140
writer = FeatherWriter(path)
142
null_mask = np.random.randint(0, 10, size=num_values) < 3
144
for name in int_dtypes:
145
values = np.random.randint(0, 100, size=num_values)
146
writer.write_array(name, values, null_mask)
148
expected = values.astype('f8')
149
expected[null_mask] = np.nan
151
expected_cols.append(expected)
153
ex_frame = pd.DataFrame(dict(zip(int_dtypes, expected_cols)),
158
result = feather.read_dataframe(path)
159
assert_frame_equal(result, ex_frame)
161
def test_boolean_no_nulls(self):
166
df = pd.DataFrame({'bools': np.random.randn(num_values) > 0})
167
self._check_pandas_roundtrip(df)
169
def test_boolean_nulls(self):
170
# pandas requires upcast to object dtype
172
self.test_files.append(path)
177
writer = FeatherWriter(path)
179
mask = np.random.randint(0, 10, size=num_values) < 3
180
values = np.random.randint(0, 10, size=num_values) < 5
181
writer.write_array('bools', values, mask)
183
expected = values.astype(object)
184
expected[mask] = None
188
ex_frame = pd.DataFrame({'bools': expected})
190
result = feather.read_dataframe(path)
191
assert_frame_equal(result, ex_frame)
193
def test_boolean_object_nulls(self):
194
arr = np.array([False, None, True] * 100, dtype=object)
195
df = pd.DataFrame({'bools': arr})
196
self._check_pandas_roundtrip(df)
198
def test_strings(self):
200
values = [b'foo', None, u'bar', 'qux', np.nan]
201
df = pd.DataFrame({'strings': values * repeats})
203
values = ['foo', None, u'bar', 'qux', None]
204
expected = pd.DataFrame({'strings': values * repeats})
205
self._check_pandas_roundtrip(df, expected)
207
def test_nan_as_null(self):
208
# Create a nan that is not numpy.nan
209
values = np.array(['foo', np.nan, np.nan * 2, 'bar'] * 10)
210
df = pd.DataFrame({'strings': values})
211
self._check_pandas_roundtrip(df)
213
def test_category(self):
215
values = ['foo', None, u'bar', 'qux', np.nan]
216
df = pd.DataFrame({'strings': values * repeats})
217
df['strings'] = df['strings'].astype('category')
218
self._check_pandas_roundtrip(df)
220
def test_timestamp(self):
221
df = pd.DataFrame({'naive': pd.date_range('2016-03-28', periods=10)})
222
df['with_tz'] = (df.naive.dt.tz_localize('utc')
223
.dt.tz_convert('America/Los_Angeles'))
225
self._check_pandas_roundtrip(df)
227
def test_non_string_columns(self):
228
df = pd.DataFrame({0: [1, 2, 3, 4],
229
1: [True, False, True, False]})
231
expected = df.rename(columns=str)
232
self._check_pandas_roundtrip(df, expected)