1
from datetime import datetime, timedelta
7
from pandas._libs import iNaT
22
import pandas._testing as tm
25
class TestSeriesMissingData:
26
def test_timedelta_fillna(self):
30
Timestamp("20130101"),
31
Timestamp("20130101"),
32
Timestamp("20130102"),
33
Timestamp("20130103 9:01:01"),
39
result = td.fillna(Timedelta(seconds=0))
45
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
48
tm.assert_series_equal(result, expected)
50
# interpreted as seconds, deprecated
51
with pytest.raises(TypeError, match="Passing integers to fillna"):
54
result = td.fillna(Timedelta(seconds=1))
60
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
63
tm.assert_series_equal(result, expected)
65
result = td.fillna(timedelta(days=1, seconds=1))
68
timedelta(days=1, seconds=1),
71
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
74
tm.assert_series_equal(result, expected)
76
result = td.fillna(np.timedelta64(int(1e9)))
82
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
85
tm.assert_series_equal(result, expected)
87
result = td.fillna(NaT)
93
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
97
tm.assert_series_equal(result, expected)
102
expected = td.fillna(Timedelta(seconds=0))
104
tm.assert_series_equal(result, expected)
109
expected = td.fillna(Timedelta(seconds=0))
110
expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
111
tm.assert_series_equal(result, expected)
113
def test_datetime64_fillna(self):
117
Timestamp("20130101"),
118
Timestamp("20130101"),
119
Timestamp("20130102"),
120
Timestamp("20130103 9:01:01"),
129
Timestamp("20130101"),
130
Timestamp("20130101"),
131
Timestamp("20130101"),
132
Timestamp("20130103 9:01:01"),
135
tm.assert_series_equal(result, expected)
141
Timestamp("20130101"),
142
Timestamp("20130101"),
143
Timestamp("20130103 9:01:01"),
144
Timestamp("20130103 9:01:01"),
147
tm.assert_series_equal(result, expected)
150
# make sure that we are treating as integer when filling
151
# this also tests inference of a datetime-like with NaT's
152
s = Series([pd.NaT, pd.NaT, "2013-08-05 15:30:00.000001"])
155
"2013-08-05 15:30:00.000001",
156
"2013-08-05 15:30:00.000001",
157
"2013-08-05 15:30:00.000001",
161
result = s.fillna(method="backfill")
162
tm.assert_series_equal(result, expected)
164
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
165
def test_datetime64_tz_fillna(self, tz):
169
Timestamp("2011-01-01 10:00"),
171
Timestamp("2011-01-03 10:00"),
175
null_loc = pd.Series([False, True, False, True])
177
result = s.fillna(pd.Timestamp("2011-01-02 10:00"))
180
Timestamp("2011-01-01 10:00"),
181
Timestamp("2011-01-02 10:00"),
182
Timestamp("2011-01-03 10:00"),
183
Timestamp("2011-01-02 10:00"),
186
tm.assert_series_equal(expected, result)
187
# check s is not changed
188
tm.assert_series_equal(pd.isna(s), null_loc)
190
result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz))
193
Timestamp("2011-01-01 10:00"),
194
Timestamp("2011-01-02 10:00", tz=tz),
195
Timestamp("2011-01-03 10:00"),
196
Timestamp("2011-01-02 10:00", tz=tz),
199
tm.assert_series_equal(expected, result)
200
tm.assert_series_equal(pd.isna(s), null_loc)
202
result = s.fillna("AAA")
205
Timestamp("2011-01-01 10:00"),
207
Timestamp("2011-01-03 10:00"),
212
tm.assert_series_equal(expected, result)
213
tm.assert_series_equal(pd.isna(s), null_loc)
217
1: pd.Timestamp("2011-01-02 10:00", tz=tz),
218
3: pd.Timestamp("2011-01-04 10:00"),
223
Timestamp("2011-01-01 10:00"),
224
Timestamp("2011-01-02 10:00", tz=tz),
225
Timestamp("2011-01-03 10:00"),
226
Timestamp("2011-01-04 10:00"),
229
tm.assert_series_equal(expected, result)
230
tm.assert_series_equal(pd.isna(s), null_loc)
233
{1: pd.Timestamp("2011-01-02 10:00"), 3: pd.Timestamp("2011-01-04 10:00")}
237
Timestamp("2011-01-01 10:00"),
238
Timestamp("2011-01-02 10:00"),
239
Timestamp("2011-01-03 10:00"),
240
Timestamp("2011-01-04 10:00"),
243
tm.assert_series_equal(expected, result)
244
tm.assert_series_equal(pd.isna(s), null_loc)
247
idx = pd.DatetimeIndex(
248
["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz=tz
251
assert s.dtype == f"datetime64[ns, {tz}]"
252
tm.assert_series_equal(pd.isna(s), null_loc)
254
result = s.fillna(pd.Timestamp("2011-01-02 10:00"))
257
Timestamp("2011-01-01 10:00", tz=tz),
258
Timestamp("2011-01-02 10:00"),
259
Timestamp("2011-01-03 10:00", tz=tz),
260
Timestamp("2011-01-02 10:00"),
263
tm.assert_series_equal(expected, result)
264
tm.assert_series_equal(pd.isna(s), null_loc)
266
result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz))
267
idx = pd.DatetimeIndex(
276
expected = Series(idx)
277
tm.assert_series_equal(expected, result)
278
tm.assert_series_equal(pd.isna(s), null_loc)
280
result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime())
281
idx = pd.DatetimeIndex(
290
expected = Series(idx)
291
tm.assert_series_equal(expected, result)
292
tm.assert_series_equal(pd.isna(s), null_loc)
294
result = s.fillna("AAA")
297
Timestamp("2011-01-01 10:00", tz=tz),
299
Timestamp("2011-01-03 10:00", tz=tz),
304
tm.assert_series_equal(expected, result)
305
tm.assert_series_equal(pd.isna(s), null_loc)
309
1: pd.Timestamp("2011-01-02 10:00", tz=tz),
310
3: pd.Timestamp("2011-01-04 10:00"),
315
Timestamp("2011-01-01 10:00", tz=tz),
316
Timestamp("2011-01-02 10:00", tz=tz),
317
Timestamp("2011-01-03 10:00", tz=tz),
318
Timestamp("2011-01-04 10:00"),
321
tm.assert_series_equal(expected, result)
322
tm.assert_series_equal(pd.isna(s), null_loc)
326
1: pd.Timestamp("2011-01-02 10:00", tz=tz),
327
3: pd.Timestamp("2011-01-04 10:00", tz=tz),
332
Timestamp("2011-01-01 10:00", tz=tz),
333
Timestamp("2011-01-02 10:00", tz=tz),
334
Timestamp("2011-01-03 10:00", tz=tz),
335
Timestamp("2011-01-04 10:00", tz=tz),
338
tm.assert_series_equal(expected, result)
339
tm.assert_series_equal(pd.isna(s), null_loc)
341
# filling with a naive/other zone, coerce to object
342
result = s.fillna(Timestamp("20130101"))
345
Timestamp("2011-01-01 10:00", tz=tz),
346
Timestamp("2013-01-01"),
347
Timestamp("2011-01-03 10:00", tz=tz),
348
Timestamp("2013-01-01"),
351
tm.assert_series_equal(expected, result)
352
tm.assert_series_equal(pd.isna(s), null_loc)
354
result = s.fillna(Timestamp("20130101", tz="US/Pacific"))
357
Timestamp("2011-01-01 10:00", tz=tz),
358
Timestamp("2013-01-01", tz="US/Pacific"),
359
Timestamp("2011-01-03 10:00", tz=tz),
360
Timestamp("2013-01-01", tz="US/Pacific"),
363
tm.assert_series_equal(expected, result)
364
tm.assert_series_equal(pd.isna(s), null_loc)
366
def test_fillna_dt64tz_with_method(self):
369
ser = pd.Series([pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT])
372
pd.Timestamp("2012-11-11 00:00:00+01:00"),
373
pd.Timestamp("2012-11-11 00:00:00+01:00"),
376
tm.assert_series_equal(ser.fillna(method="pad"), exp)
378
ser = pd.Series([pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")])
381
pd.Timestamp("2012-11-11 00:00:00+01:00"),
382
pd.Timestamp("2012-11-11 00:00:00+01:00"),
385
tm.assert_series_equal(ser.fillna(method="bfill"), exp)
387
def test_fillna_consistency(self):
389
# fillna with a tz aware to a tz-naive, should result in object
391
s = Series([Timestamp("20130101"), pd.NaT])
393
result = s.fillna(Timestamp("20130101", tz="US/Eastern"))
395
[Timestamp("20130101"), Timestamp("2013-01-01", tz="US/Eastern")],
398
tm.assert_series_equal(result, expected)
400
# where (we ignore the errors=)
402
[True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore"
404
tm.assert_series_equal(result, expected)
407
[True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore"
409
tm.assert_series_equal(result, expected)
411
# with a non-datetime
412
result = s.fillna("foo")
413
expected = Series([Timestamp("20130101"), "foo"])
414
tm.assert_series_equal(result, expected)
419
tm.assert_series_equal(s2, expected)
421
def test_datetime64tz_fillna_round_issue(self):
425
[pd.NaT, pd.NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)]
428
filled = data.fillna(method="bfill")
430
expected = pd.Series(
432
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
433
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
434
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
438
tm.assert_series_equal(filled, expected)
440
def test_fillna_downcast(self):
442
# infer int64 from float64
443
s = pd.Series([1.0, np.nan])
444
result = s.fillna(0, downcast="infer")
445
expected = pd.Series([1, 0])
446
tm.assert_series_equal(result, expected)
448
# infer int64 from float64 when fillna value is a dict
449
s = pd.Series([1.0, np.nan])
450
result = s.fillna({1: 0}, downcast="infer")
451
expected = pd.Series([1, 0])
452
tm.assert_series_equal(result, expected)
454
def test_fillna_int(self):
455
s = Series(np.random.randint(-100, 100, 50))
456
return_value = s.fillna(method="ffill", inplace=True)
457
assert return_value is None
458
tm.assert_series_equal(s.fillna(method="ffill", inplace=False), s)
460
def test_categorical_nan_equality(self):
461
cat = Series(Categorical(["a", "b", "c", np.nan]))
462
exp = Series([True, True, True, False])
464
tm.assert_series_equal(res, exp)
466
def test_categorical_nan_handling(self):
468
# NaNs are represented as -1 in labels
469
s = Series(Categorical(["a", "b", np.nan, "a"]))
470
tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
471
tm.assert_numpy_array_equal(
472
s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8)
475
def test_fillna_nat(self):
476
series = Series([0, 1, 2, iNaT], dtype="M8[ns]")
478
filled = series.fillna(method="pad")
479
filled2 = series.fillna(value=series.values[2])
481
expected = series.copy()
482
expected.values[3] = expected.values[2]
484
tm.assert_series_equal(filled, expected)
485
tm.assert_series_equal(filled2, expected)
487
df = DataFrame({"A": series})
488
filled = df.fillna(method="pad")
489
filled2 = df.fillna(value=series.values[2])
490
expected = DataFrame({"A": expected})
491
tm.assert_frame_equal(filled, expected)
492
tm.assert_frame_equal(filled2, expected)
494
series = Series([iNaT, 0, 1, 2], dtype="M8[ns]")
496
filled = series.fillna(method="bfill")
497
filled2 = series.fillna(value=series[1])
499
expected = series.copy()
500
expected[0] = expected[1]
502
tm.assert_series_equal(filled, expected)
503
tm.assert_series_equal(filled2, expected)
505
df = DataFrame({"A": series})
506
filled = df.fillna(method="bfill")
507
filled2 = df.fillna(value=series[1])
508
expected = DataFrame({"A": expected})
509
tm.assert_frame_equal(filled, expected)
510
tm.assert_frame_equal(filled2, expected)
512
def test_isna_for_inf(self):
513
s = Series(["a", np.inf, np.nan, pd.NA, 1.0])
514
with pd.option_context("mode.use_inf_as_na", True):
517
e = Series([False, True, True, True, False])
518
de = Series(["a", 1.0], index=[0, 4])
519
tm.assert_series_equal(r, e)
520
tm.assert_series_equal(dr, de)
522
def test_isnull_for_inf_deprecated(self):
524
s = Series(["a", np.inf, np.nan, 1.0])
525
with pd.option_context("mode.use_inf_as_null", True):
529
e = Series([False, True, True, False])
530
de = Series(["a", 1.0], index=[0, 3])
531
tm.assert_series_equal(r, e)
532
tm.assert_series_equal(dr, de)
534
def test_fillna(self, datetime_series):
535
ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
537
tm.assert_series_equal(ts, ts.fillna(method="ffill"))
541
exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index)
542
tm.assert_series_equal(ts.fillna(method="ffill"), exp)
544
exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index)
545
tm.assert_series_equal(ts.fillna(method="backfill"), exp)
547
exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index)
548
tm.assert_series_equal(ts.fillna(value=5), exp)
550
msg = "Must specify a fill 'value' or 'method'"
551
with pytest.raises(ValueError, match=msg):
554
msg = "Cannot specify both 'value' and 'method'"
555
with pytest.raises(ValueError, match=msg):
556
datetime_series.fillna(value=0, method="ffill")
559
s1 = Series([np.nan])
561
result = s1.fillna(s2)
562
expected = Series([1.0])
563
tm.assert_series_equal(result, expected)
564
result = s1.fillna({})
565
tm.assert_series_equal(result, s1)
566
result = s1.fillna(Series((), dtype=object))
567
tm.assert_series_equal(result, s1)
568
result = s2.fillna(s1)
569
tm.assert_series_equal(result, s2)
570
result = s1.fillna({0: 1})
571
tm.assert_series_equal(result, expected)
572
result = s1.fillna({1: 1})
573
tm.assert_series_equal(result, Series([np.nan]))
574
result = s1.fillna({0: 1, 1: 1})
575
tm.assert_series_equal(result, expected)
576
result = s1.fillna(Series({0: 1, 1: 1}))
577
tm.assert_series_equal(result, expected)
578
result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
579
tm.assert_series_equal(result, s1)
581
s1 = Series([0, 1, 2], list("abc"))
582
s2 = Series([0, np.nan, 2], list("bac"))
583
result = s2.fillna(s1)
584
expected = Series([0, 0, 2.0], list("bac"))
585
tm.assert_series_equal(result, expected)
588
s = Series(np.nan, index=[0, 1, 2])
589
result = s.fillna(999, limit=1)
590
expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
591
tm.assert_series_equal(result, expected)
593
result = s.fillna(999, limit=2)
594
expected = Series([999, 999, np.nan], index=[0, 1, 2])
595
tm.assert_series_equal(result, expected)
598
# make sure a string representation of int/float values can be filled
599
# correctly without raising errors or being converted
600
vals = ["0", "1.5", "-0.3"]
602
s = Series([0, 1, np.nan, np.nan, 4], dtype="float64")
603
result = s.fillna(val)
604
expected = Series([0, 1, val, val, 4], dtype="object")
605
tm.assert_series_equal(result, expected)
607
def test_fillna_bug(self):
608
x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
609
filled = x.fillna(method="ffill")
610
expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], x.index)
611
tm.assert_series_equal(filled, expected)
613
filled = x.fillna(method="bfill")
614
expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], x.index)
615
tm.assert_series_equal(filled, expected)
617
def test_fillna_invalid_method(self, datetime_series):
619
datetime_series.fillna(method="ffil")
620
except ValueError as inst:
621
assert "ffil" in str(inst)
623
def test_ffill(self):
624
ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
626
tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill"))
628
def test_ffill_mixed_dtypes_without_missing_data(self):
630
series = pd.Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
631
result = series.ffill()
632
tm.assert_series_equal(series, result)
634
def test_bfill(self):
635
ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
637
tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill"))
639
def test_timedelta64_nan(self):
641
td = Series([timedelta(days=i) for i in range(10)])
643
# nan ops on timedeltas
647
assert td1[0].value == iNaT
649
assert not isna(td1[0])
651
# GH#16674 iNaT is treated as an integer when given by the user
653
assert not isna(td1[1])
654
assert td1.dtype == np.object_
655
assert td1[1] == iNaT
657
assert not isna(td1[1])
661
assert td1[2].value == iNaT
663
assert not isna(td1[2])
665
# FIXME: don't leave commented-out
667
# this doesn't work, not sure numpy even supports it
668
# result = td[(td>np.timedelta64(timedelta(days=3))) &
669
# td<np.timedelta64(timedelta(days=7)))] = np.nan
670
# assert isna(result).sum() == 7
672
# NumPy limitation =(
674
# def test_logical_range_select(self):
675
# np.random.seed(12345)
676
# selector = -0.5 <= datetime_series <= 0.5
677
# expected = (datetime_series >= -0.5) & (datetime_series <= 0.5)
678
# tm.assert_series_equal(selector, expected)
680
def test_dropna_empty(self):
681
s = Series([], dtype=object)
683
assert len(s.dropna()) == 0
684
return_value = s.dropna(inplace=True)
685
assert return_value is None
689
msg = "No axis named 1 for object type Series"
690
with pytest.raises(ValueError, match=msg):
693
def test_datetime64_tz_dropna(self):
697
Timestamp("2011-01-01 10:00"),
699
Timestamp("2011-01-03 10:00"),
705
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")], index=[0, 2]
707
tm.assert_series_equal(result, expected)
710
idx = pd.DatetimeIndex(
711
["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz="Asia/Tokyo"
714
assert s.dtype == "datetime64[ns, Asia/Tokyo]"
718
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
719
Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"),
723
assert result.dtype == "datetime64[ns, Asia/Tokyo]"
724
tm.assert_series_equal(result, expected)
726
def test_dropna_no_nan(self):
727
for s in [Series([1, 2, 3], name="x"), Series([False, True, False], name="x")]:
730
tm.assert_series_equal(result, s)
731
assert result is not s
734
return_value = s2.dropna(inplace=True)
735
assert return_value is None
736
tm.assert_series_equal(s2, s)
738
def test_dropna_intervals(self):
741
IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]),
745
expected = s.iloc[1:]
746
tm.assert_series_equal(result, expected)
748
def test_valid(self, datetime_series):
749
ts = datetime_series.copy()
750
ts.index = ts.index._with_freq(None)
754
assert len(result) == ts.count()
755
tm.assert_series_equal(result, ts[1::2])
756
tm.assert_series_equal(result, ts[pd.notna(ts)])
759
ser = Series([0, 5.4, 3, np.nan, -0.001])
760
expected = Series([False, False, False, True, False])
761
tm.assert_series_equal(ser.isna(), expected)
763
ser = Series(["hi", "", np.nan])
764
expected = Series([False, False, True])
765
tm.assert_series_equal(ser.isna(), expected)
767
def test_notna(self):
768
ser = Series([0, 5.4, 3, np.nan, -0.001])
769
expected = Series([True, True, True, False, True])
770
tm.assert_series_equal(ser.notna(), expected)
772
ser = Series(["hi", "", np.nan])
773
expected = Series([True, True, False])
774
tm.assert_series_equal(ser.notna(), expected)
776
def test_pad_nan(self):
778
[np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float
781
return_value = x.fillna(method="pad", inplace=True)
782
assert return_value is None
785
[np.nan, 1.0, 1.0, 3.0, 3.0], ["z", "a", "b", "c", "d"], dtype=float
787
tm.assert_series_equal(x[1:], expected[1:])
788
assert np.isnan(x[0]), np.isnan(expected[0])
790
def test_pad_require_monotonicity(self):
791
rng = date_range("1/1/2000", "3/1/2000", freq="B")
793
# neither monotonic increasing or decreasing
794
rng2 = rng[[1, 0, 2]]
796
msg = "index must be monotonic increasing or decreasing"
797
with pytest.raises(ValueError, match=msg):
798
rng2.get_indexer(rng, method="pad")
800
def test_dropna_preserve_name(self, datetime_series):
801
datetime_series[:5] = np.nan
802
result = datetime_series.dropna()
803
assert result.name == datetime_series.name
804
name = datetime_series.name
805
ts = datetime_series.copy()
806
return_value = ts.dropna(inplace=True)
807
assert return_value is None
808
assert ts.name == name
810
def test_series_fillna_limit(self):
811
index = np.arange(10)
812
s = Series(np.random.randn(10), index=index)
814
result = s[:2].reindex(index)
815
result = result.fillna(method="pad", limit=5)
817
expected = s[:2].reindex(index).fillna(method="pad")
818
expected[-3:] = np.nan
819
tm.assert_series_equal(result, expected)
821
result = s[-2:].reindex(index)
822
result = result.fillna(method="bfill", limit=5)
824
expected = s[-2:].reindex(index).fillna(method="backfill")
825
expected[:3] = np.nan
826
tm.assert_series_equal(result, expected)
828
def test_series_pad_backfill_limit(self):
829
index = np.arange(10)
830
s = Series(np.random.randn(10), index=index)
832
result = s[:2].reindex(index, method="pad", limit=5)
834
expected = s[:2].reindex(index).fillna(method="pad")
835
expected[-3:] = np.nan
836
tm.assert_series_equal(result, expected)
838
result = s[-2:].reindex(index, method="backfill", limit=5)
840
expected = s[-2:].reindex(index).fillna(method="backfill")
841
expected[:3] = np.nan
842
tm.assert_series_equal(result, expected)