~pythonxy/pythonxy-upstream/python-pandas

Viewing changes to pandas/lib/src/tseries.pyx

Committer: Wes McKinney
Date: 2009-08-05 03:30:16 UTC
Revision ID: git-v1:c6b236db73ff81007909be6406f0e484edc4a9eb

first commit with cleaned up code

git-svn-id: http://pandas.googlecode.com/svn/trunk@5 d5231056-7de3-11de-ac95-d976489f1ece

files added:
LICENSE

MANIFEST.in

README

TODO

pandas

pandas/__init__.py

pandas/core

pandas/core/__init__.py

pandas/core/api.py

pandas/core/collection.py

pandas/core/daterange.py

pandas/core/datetools.py

pandas/core/frame.py

pandas/core/groupby.py

pandas/core/index.py

pandas/core/matrix.py

pandas/core/mixins.py

pandas/core/pytools.py

pandas/core/series.py

pandas/core/tests

pandas/core/tests/__init__.py

pandas/core/tests/test_dataframe.py

pandas/core/tests/test_datamatrix.py

pandas/core/tests/test_datetools.py

pandas/core/tests/test_groupby.py

pandas/core/tests/test_index.py

pandas/core/tests/test_series.py

pandas/io

pandas/io/__init__.py

pandas/io/parsers.py

pandas/lib

pandas/lib/__init__.py

pandas/lib/include

pandas/lib/include/Python.pxi

pandas/lib/include/datetime.pxi

pandas/lib/include/numpy.pxi

pandas/lib/include/python_datetime.pxd

pandas/lib/src

pandas/lib/src/tdates.c

pandas/lib/src/tseries.c

pandas/lib/src/tseries.pyx

setup.py

setupegg.py

Show diffs side-by-side

added added

removed removed

pandas/lib/src/tseries.pyx

include "numpy.pxi"

include "datetime.pxi"

include "Python.pxi"

# initialize numpy

import_array()

import numpy as np

cimport numpy as np

isnan = np.isnan

cdef double NaN = <double> np.NaN

from datetime import datetime as pydatetime

from python_dict cimport *

from numpy cimport ndarray, npy_float64, npy_int32, npy_int8, npy_float128

cimport cython

cdef inline object trycall(object func, object arg):

cdef object result

try:

result = func(arg)

except:

raise Exception('Error calling func on index %s' % arg)

return result

cdef inline int int_max(int a, int b): return a if a >= b else b

cdef inline int int_min(int a, int b): return a if a >= b else b

def map_indices(ndarray index):

'''

Produce a dict mapping the values of the input array to their respective

locations.

Example:

array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1}

Better to do this with Cython because of the enormous speed boost.

'''

cdef int i, length

cdef flatiter iter

cdef dict result

cdef object idx

result = {}

iter = PyArray_IterNew(index)

length = PyArray_SIZE(index)

for i from 0 <= i < length:

idx = PyArray_GETITEM(index, <void *> iter.dataptr)

result[idx] = i

PyArray_ITER_NEXT(iter)

return result

def match(ndarray A, ndarray B):

'''

--> match(a, b)

Close equivalent of R's match function.

For given input index A, find matching locations for values of A in B.

Example:

>>> b

array([[ 0. , 0.26929312],

[ 1. , 0.49540359],

[ 2. , 0.66389941],

[ 3. , 0.66235806],

[ 4. , 0.97993956],

[ 5. , 0.83804732],

[ 6. , 0.75033074],

[ 7. , 0.10250388],

[ 8. , 0.66591799],

[ 9. , 0.18337242]])

>>> a

array([1, 3, 6, 8, 4, 5, 7, 0, 2, 9])

# Now with match we can realign b based on a

>>> b[match(a, b[:,0]),:]

array([[ 1. , 0.49540359],

[ 3. , 0.66235806],

[ 6. , 0.75033074],

[ 8. , 0.66591799],

[ 4. , 0.97993956],

[ 5. , 0.83804732],

[ 7. , 0.10250388],

[ 0. , 0.26929312],

[ 2. , 0.66389941],

[ 9. , 0.18337242]])

'''

cdef int i, length

100

cdef flatiter itera

101

cdef dict bmap

102

cdef double *result_data

103

cdef double nan

104

cdef object idx

105

cdef ndarray result

106

107

nan = <double> np.NaN

108

109

bmap = map_indices(B)

110

111

itera = PyArray_IterNew(A)

112

length = PyArray_SIZE(A)

113

114

result = <ndarray> np.empty(length, np.float64)

115

116

result_data = <double *> result.data

117

118

for i from 0 <= i < length:

119

idx = PyArray_GETITEM(A, <void *> itera.dataptr)

120

if idx in bmap:

121

result_data[i] = <double> bmap[idx]

122

else:

123

result_data[i] = nan

124

125

PyArray_ITER_NEXT(itera)

126

127

return result.astype(int)

128

129

def reindex(ndarray index, ndarray arr, dict idxMap):

130

'''

131

Using the provided new index, a given array, and a mapping of index-value

132

correpondences in the value array, return a new ndarray conforming to

133

the new index.

134

135

This is significantly faster than doing it in pure Python.

136

'''

137

cdef ndarray result

138

cdef double *result_data

139

cdef int i, length

140

cdef flatiter itera, iteridx

141

cdef double nan

142

cdef object idx

143

144

nan = <double> np.NaN

145

146

length = PyArray_SIZE(index)

147

148

result = <ndarray> np.empty(length, np.float64)

149

150

result_data = <double *> result.data

151

152

itera = PyArray_IterNew(arr)

153

iteridx = PyArray_IterNew(index)

154

155

for i from 0 <= i < length:

156

idx = PyArray_GETITEM(index, <void *> iteridx.dataptr)

157

PyArray_ITER_NEXT(iteridx)

158

if idx not in idxMap:

159

result_data[i] = nan

160

continue

161

PyArray_ITER_GOTO1D(itera, idxMap[idx])

162

result_data[i] = (<double *>(itera.dataptr))[0]

163

164

return result

165

166

def reindexObj(ndarray index, ndarray arr, dict idxMap):

167

'''

168

Using the provided new index, a given array, and a mapping of index-value

169

correpondences in the value array, return a new ndarray conforming to

170

the new index.

171

172

This is significantly faster than doing it in pure Python.

173

'''

174

cdef ndarray result

175

cdef int i, length

176

cdef flatiter itera, iteridx, iterresult

177

cdef object idx, nan, obj

178

179

nan = np.NaN

180

length = PyArray_SIZE(index)

181

182

result = <ndarray> np.empty(length, dtype=np.object_)

183

184

itera = PyArray_IterNew(arr)

185

iteridx = PyArray_IterNew(index)

186

iterresult = PyArray_IterNew(result)

187

188

cdef int res

189

190

for i from 0 <= i < length:

191

idx = PyArray_GETITEM(index, <void *> iteridx.dataptr)

192

PyArray_ITER_NEXT(iteridx)

193

194

if idx not in idxMap:

195

PyArray_SETITEM(result, <void *> iterresult.dataptr, nan)

196

PyArray_ITER_NEXT(iterresult)

197

continue

198

199

PyArray_ITER_GOTO1D(itera, idxMap[idx])

200

obj = PyArray_GETITEM(arr, <void *> itera.dataptr)

201

202

res = PyArray_SETITEM(result, <void *> iterresult.dataptr, obj)

203

PyArray_ITER_NEXT(iterresult)

204

205

return result

206

207

@cython.boundscheck(False)

208

def reindexObject(ndarray[object, ndim=1] index,

209

ndarray[object, ndim=1] arr,

210

dict idxMap):

211

'''

212

Using the provided new index, a given array, and a mapping of index-value

213

correpondences in the value array, return a new ndarray conforming to

214

the new index.

215

'''

216

cdef int j, loc, length

217

cdef object idx, value

218

cdef object nan = np.NaN

219

220

length = index.shape[0]

221

cdef ndarray[object, ndim = 1] result = np.empty(length, dtype=object)

222

223

loc = 0

224

cdef int i = 0

225

for i from 0 <= i < length:

226

idx = index[i]

227

if not PyDict_Contains(idxMap, idx):

228

result[i] = nan

229

continue

230

value = arr[idxMap[idx]]

231

result[i] = value

232

return result

233

234

cdef tuple _nofill(ndarray oldIndex, ndarray newIndex, dict oldMap, dict newMap):

235

cdef int *fillLocs

236

cdef char *mask

237

cdef int i, j, length, newLength

238

239

cdef flatiter iterold

240

cdef object idx

241

cdef ndarray fillVec

242

cdef ndarray maskVec

243

244

fillVec = <ndarray> np.empty(len(newIndex), dtype = np.int32)

245

maskVec = <ndarray> np.zeros(len(newIndex), dtype = np.int8)

246

247

fillLocs = <int *> fillVec.data

248

mask = <char *> maskVec.data

249

250

newLength = PyArray_SIZE(fillVec)

251

252

length = PyArray_SIZE(oldIndex)

253

iterold = PyArray_IterNew(oldIndex)

254

255

for i from 0 <= i < length:

256

idx = PyArray_GETITEM(oldIndex, <void *> iterold.dataptr)

257

if i < length - 1:

258

PyArray_ITER_NEXT(iterold)

259

if idx in newMap:

260

j = newMap[idx]

261

fillLocs[j] = i

262

mask[j] = 1

263

264

for i from 0 <= i < newLength:

265

if mask[i] == 0:

266

fillLocs[i] = -1

267

268

return fillVec, maskVec

269

270

cdef tuple _backfill(ndarray oldIndex, ndarray newIndex, dict oldMap, dict newMap):

271

'''

272

Backfilling logic for generating fill vector

273

274

Diagram of what's going on

275

276

Old New Fill vector Mask

277

. 0 1

278

. 0 1

279

. 0 1

280

A A 0 1

281

. 1 1

282

. 1 1

283

. 1 1

284

. 1 1

285

. 1 1

286

B B 1 1

287

. 2 1

288

. 2 1

289

. 2 1

290

C C 2 1

291

. 0

292

. 0

293

294

'''

295

cdef int i, j, oldLength, newLength, curLoc

296

# Make empty vectors

297

cdef ndarray fillVec

298

cdef ndarray maskVec

299

fillVec = <ndarray> np.empty(len(newIndex), dtype = np.int32)

300

maskVec = <ndarray> np.zeros(len(newIndex), dtype = np.int8)

301

302

# Get references

303

cdef int *fillLocs

304

cdef char *mask

305

fillLocs = <int *> fillVec.data

306

mask = <char *> maskVec.data

307

308

# Create the iterators

309

cdef flatiter iterold, iternew

310

iterold = PyArray_IterNew(oldIndex)

311

iternew = PyArray_IterNew(newIndex)

312

313

# Get the size

314

oldLength = PyArray_SIZE(oldIndex)

315

newLength = PyArray_SIZE(newIndex)

316

317

# Current positions

318

cdef int newPos, oldPos

319

oldPos = oldLength - 1

320

newPos = newLength - 1

321

322

# References holding indices

323

cdef object prevOld, curOld

324

325

while newPos >= 0:

326

# Move to the current position

327

PyArray_ITER_GOTO1D(iternew, newPos)

328

PyArray_ITER_GOTO1D(iterold, oldPos)

329

330

# Get the current index

331

curOld = PyArray_GETITEM(oldIndex, <void *> iterold.dataptr)

332

333

# Until we reach a point where we are before the curOld point

334

while PyArray_GETITEM(newIndex, <void *> iternew.dataptr) > curOld:

335

newPos -= 1

336

if newPos < 0:

337

break

338

PyArray_ITER_GOTO1D(iternew, newPos)

339

340

# Get the location in the old index

341

curLoc = oldMap[curOld]

342

343

# At the beginning of the old index

344

if oldPos == 0:

345

346

# Make sure we are before the curOld index

347

if PyArray_GETITEM(newIndex, <void *> iternew.dataptr) <= curOld:

348

fillVec[:newPos + 1] = curLoc

349

maskVec[:newPos + 1] = 1

350

351

# Exit the main loop

352

break

353

354

else:

355

# Move one position back

356

PyArray_ITER_GOTO1D(iterold, oldPos - 1)

357

358

# Get the index there

359

prevOld = PyArray_GETITEM(oldIndex, <void *> iterold.dataptr)

360

361

# Until we reach the previous index

362

while PyArray_GETITEM(newIndex, <void *> iternew.dataptr) > prevOld:

363

364

# Set the current fill location

365

fillLocs[newPos] = curLoc

366

mask[newPos] = 1

367

368

newPos -= 1

369

if newPos < 0:

370

break

371

372

# Move the iterator back

373

PyArray_ITER_GOTO1D(iternew, newPos)

374

375

# Move one period back

376

oldPos -= 1

377

378

for i from 0 <= i < newLength:

379

if mask[i] == 0:

380

# Fill from some generic location

381

fillLocs[i] = -1

382

383

return (fillVec, maskVec)

384

385

cdef tuple _pad(ndarray oldIndex, ndarray newIndex, dict oldMap, dict newMap):

386

'''

387

Padding logic for generating fill vector

388

389

Diagram of what's going on

390

391

Old New Fill vector Mask

392

. 0

393

. 0

394

. 0

395

A A 0 1

396

. 0 1

397

. 0 1

398

. 0 1

399

. 0 1

400

. 0 1

401

B B 1 1

402

. 1 1

403

. 1 1

404

. 1 1

405

C C 2 1

406

'''

407

408

# Declare variables

409

cdef ndarray fillVec

410

cdef ndarray maskVec

411

cdef int *fillLocs

412

cdef char *mask

413

cdef int i, j, oldLength, newLength, curLoc, newPos, oldPos

414

cdef flatiter iterold, iternew

415

cdef object nextOld, curOld

416

cdef char done

417

418

# Make empty fill vector and mask vector, cast to ndarray

419

fillVec = <ndarray> np.empty(len(newIndex), dtype = np.int32)

420

maskVec = <ndarray> np.zeros(len(newIndex), dtype = np.int8)

421

422

# Get reference to the arrays inside

423

fillLocs = <int *> fillVec.data

424

mask = <char *> maskVec.data

425

426

# Create simple ndarray iterators using C API

427

iterold = PyArray_IterNew(oldIndex)

428

iternew = PyArray_IterNew(newIndex)

429

430

# Length of each index

431

oldLength = PyArray_SIZE(oldIndex)

432

newLength = PyArray_SIZE(newIndex)

433

434

oldPos = 0

435

newPos = 0

436

while newPos < newLength:

437

curOld = PyArray_GETITEM(oldIndex, <void *> iterold.dataptr)

438

439

# At beginning, keep going until we go exceed the

440

# first OLD index in the NEW index

441

while PyArray_GETITEM(newIndex, <void *> iternew.dataptr) < curOld:

442

newPos += 1

443

if newPos > newLength - 1:

444

break

445

PyArray_ITER_NEXT(iternew)

446

447

# We got there, get the current location in the old index

448

curLoc = oldMap[curOld]

449

450

# We're at the end of the road, need to propagate this value to the end

451

if oldPos == oldLength - 1:

452

if PyArray_GETITEM(newIndex, <void *> iternew.dataptr) >= curOld:

453

fillVec[newPos:] = curLoc

454

maskVec[newPos:] = 1

455

break

456

else:

457

# Not at the end, need to go about filling

458

459

# Get the next index so we know when to stop propagating this value

460

PyArray_ITER_NEXT(iterold)

461

nextOld = PyArray_GETITEM(oldIndex, <void *> iterold.dataptr)

462

463

done = 0

464

465

# Until we reach the next OLD value in the NEW index

466

while PyArray_GETITEM(newIndex, <void *> iternew.dataptr) < nextOld:

467

468

# Use this location to fill

469

fillLocs[newPos] = curLoc

470

471

# Set mask to be 1 so will not be NaN'd

472

mask[newPos] = 1

473

newPos += 1

474

475

# We got to the end of the new index

476

if newPos > newLength - 1:

477

done = 1

478

break

479

480

# Advance the pointer

481

PyArray_ITER_NEXT(iternew)

482

483

# We got to the end of the new index

484

if done:

485

break

486

487

# We already advanced the iterold pointer to the next value,

488

# inc the count

489

oldPos += 1

490

491

# Places where the mask is 0, fill with an arbitrary value

492

# (will be NA'd out)

493

for i from 0 <= i < newLength:

494

if mask[i] == 0:

495

fillLocs[i] = -1

496

497

return fillVec, maskVec

498

499

def getFillVec(ndarray oldIndex, ndarray newIndex, dict oldMap, dict newMap,

500

object kind):

501

502

if kind == '':

503

fillVec, maskVec = _nofill(oldIndex, newIndex, oldMap, newMap)

504

elif kind == 'PAD':

505

fillVec, maskVec = _pad(oldIndex, newIndex, oldMap, newMap)

506

elif kind == 'BACKFILL':

507

fillVec, maskVec = _backfill(oldIndex, newIndex, oldMap, newMap)

508

509

return fillVec, maskVec.astype(np.bool)

510

511

def getMergeVec(ndarray values, dict indexMap):

512

cdef int *fillLocs

513

cdef char *mask

514

cdef int i, j, length

515

516

cdef flatiter itervals

517

cdef object val

518

cdef ndarray fillVec

519

cdef ndarray maskVec

520

521

cdef int newLength = len(values)

522

523

fillVec = <ndarray> np.empty(newLength, dtype = np.int32)

524

maskVec = <ndarray> np.zeros(newLength, dtype = np.int8)

525

526

fillLocs = <int *> fillVec.data

527

mask = <char *> maskVec.data

528

529

length = PyArray_SIZE(values)

530

itervals = PyArray_IterNew(values)

531

532

for i from 0 <= i < length:

533

val = PyArray_GETITEM(values, <void *> itervals.dataptr)

534

if val in indexMap:

535

j = indexMap[val]

536

fillLocs[i] = j

537

mask[i] = 1

538

539

PyArray_ITER_NEXT(itervals)

540

541

for i from 0 <= i < newLength:

542

if mask[i] == 0:

543

fillLocs[i] = -1

544

545

return fillVec, maskVec.astype(np.bool)

546

547

cdef double INF = <double> np.inf

548

cdef double NEGINF = -INF

549

550

cdef inline _checknull(object val):

551

return val is None or val != val or val == INF or val == NEGINF

552

553

cdef ndarray _isnullobj(input):

554

cdef int i, length

555

cdef object val

556

cdef ndarray[npy_int8, ndim=1] result

557

cdef flatiter iter

558

559

length = PyArray_SIZE(input)

560

561

result = <ndarray> np.zeros(length, dtype=np.int8)

562

563

iter= PyArray_IterNew(input)

564

565

for i from 0 <= i < length:

566

val = PyArray_GETITEM(input, <void *> iter.dataptr)

567

568

if _checknull(val):

569

result[i] = 1

570

571

PyArray_ITER_NEXT(iter)

572

573

return result

574

575

def isnull(input):

576

'''

577

Replacement for numpy.isnan / -numpy.isfinite which is suitable

578

for use on object arrays.

579

580

Parameters

581

----------

582

arr: ndarray or object value

583

584

Returns

585

-------

586

boolean ndarray or boolean

587

'''

588

cdef ndarray[npy_int8, ndim=1] result

589

590

if isinstance(input, np.ndarray):

591

if input.dtype.kind in ('O', 'S'):

592

result = _isnullobj(input)

593

594

return result.astype(np.bool)

595

else:

596

return -np.isfinite(input)

597

else:

598

return _checknull(input)

599

600

def notnull(input):

601

'''

602

Replacement for numpy.isfinite / -numpy.isnan which is suitable

603

for use on object arrays.

604

605

Parameters

606

----------

607

arr: ndarray or object value

608

609

Returns

610

-------

611

boolean ndarray or boolean

612

'''

613

if isinstance(input, np.ndarray):

614

return -isnull(input)

615

else:

616

return not bool(_checknull(input))

617

618

def reindexNew(ndarray index, ndarray arr, dict idxMap):

619

'''

620

Using the provided new index, a given array, and a mapping of index-value

621

correpondences in the value array, return a new ndarray conforming to

622

the new index.

623

624

This is significantly faster than doing it in pure Python.

625

'''

626

cdef ndarray result

627

cdef double *result_data

628

cdef int i, length

629

cdef flatiter itera, iteridx

630

cdef double nan

631

cdef object idx

632

633

nan = <double> np.NaN

634

635

length = PyArray_SIZE(index)

636

637

result = <ndarray> np.empty(length, np.float64)

638

639

result_data = <double *> result.data

640

641

itera = PyArray_IterNew(arr)

642

iteridx = PyArray_IterNew(index)

643

644

for i from 0 <= i < length:

645

idx = PyArray_GETITEM(index, <void *> iteridx.dataptr)

646

PyArray_ITER_NEXT(iteridx)

647

if idx not in idxMap:

648

result_data[i] = nan

649

continue

650

PyArray_ITER_GOTO1D(itera, idxMap[idx])

651

result_data[i] = (<double *>(itera.dataptr))[0]

652

653

return result

654

655

cdef double __add(double a, double b):

656

return a + b

657

cdef double __sub(double a, double b):

658

return a - b

659

cdef double __div(double a, double b):

660

return a / b

661

cdef double __mul(double a, double b):

662

return a * b

663

cdef double __eq(double a, double b):

664

return a == b

665

cdef double __ne(double a, double b):

666

return a != b

667

cdef double __lt(double a, double b):

668

return a < b

669

cdef double __gt(double a, double b):

670

return a > b

671

cdef double __pow(double a, double b):

672

return a ** b

673

674

ctypedef double (* double_func)(double a, double b)

675

676

cdef ndarray _applyFunc(double_func func, ndarray index, object ao,

677

object bo, dict aMap, dict bMap):

678

'''

679

C function taking a function pointer for quickly adding two Series objects.

680

'''

681

cdef ndarray A, B, result

682

cdef double *result_data

683

cdef int i, length

684

cdef flatiter itera, iterb, iteridx

685

cdef double nan

686

cdef object idx

687

688

# This is EXTREMELY important, otherwise you will get very

689

# undesired results

690

A = PyArray_ContiguousFromAny(ao, NPY_DOUBLE, 1, 1)

691

B = PyArray_ContiguousFromAny(bo, NPY_DOUBLE, 1, 1)

692

693

nan = <double> np.NaN

694

length = PyArray_SIZE(index)

695

696

result = <ndarray> np.empty(length, np.float64)

697

result_data = <double *>result.data

698

699

itera = <flatiter> PyArray_IterNew(A)

700

iterb = <flatiter> PyArray_IterNew(B)

701

iteridx = PyArray_IterNew(index)

702

703

for i from 0 <= i < length:

704

idx = PyArray_GETITEM(index, <void *> iteridx.dataptr)

705

PyArray_ITER_NEXT(iteridx)

706

707

if idx not in aMap or idx not in bMap:

708

result_data[i] = nan

709

continue

710

711

result_data[i] = func((<double *>A.data)[aMap[idx]],

712

(<double *>B.data)[bMap[idx]])

713

714

return result

715

716

def combineFunc(object name, ndarray index, object ao,

717

object bo, dict aMap, dict bMap):

718

'''

719

Combine two series (values and index maps for each passed in) using the

720

indicated function.

721

'''

722

if name == "__add__":

723

return _applyFunc(__add, index, ao, bo, aMap, bMap)

724

elif name == "__sub__":

725

return _applyFunc(__sub, index, ao, bo, aMap, bMap)

726

elif name == "__div__":

727

return _applyFunc(__div, index, ao, bo, aMap, bMap)

728

elif name == "__mul__":

729

return _applyFunc(__mul, index, ao, bo, aMap, bMap)

730

elif name == "__eq__":

731

return _applyFunc(__eq, index, ao, bo, aMap, bMap)

732

elif name == "__ne__":

733

return _applyFunc(__ne, index, ao, bo, aMap, bMap)

734

elif name == "__lt__":

735

return _applyFunc(__lt, index, ao, bo, aMap, bMap)

736

elif name == "__gt__":

737

return _applyFunc(__gt, index, ao, bo, aMap, bMap)

738

elif name == "__pow__":

739

return _applyFunc(__pow, index, ao, bo, aMap, bMap)

740

else:

741

raise Exception('bad funcname requested of Cython code')

742

743

#-------------------------------------------------------------------------------

744

# Groupby-related functions

745

746

@cython.boundscheck(False)

747

def arrmap(ndarray[object, ndim=1] index, object func):

748

cdef int length = index.shape[0]

749

cdef int i = 0

750

751

cdef ndarray[object, ndim=1] result = np.empty(length, dtype=np.object_)

752

753

for i from 0 <= i < length:

754

result[i] = func(index[i])

755

756

return result

757

758

@cython.boundscheck(False)

759

def groupby_withnull_old(ndarray[object, ndim = 1] index, object keyfunc):

760

cdef dict groups

761

cdef int length = index.shape[0]

762

cdef object idx

763

cdef object curKey, key

764

cdef list members

765

766

groups = PyDict_New()

767

768

if length != index.shape[0]:

769

raise Exception('Dates and values were not the same length!')

770

771

cdef ndarray[object, ndim=1] mapped_index = arrmap(index, keyfunc)

772

773

cdef ndarray[npy_int8, ndim=1] null_mask = _isnullobj(mapped_index)

774

775

bool_mask = null_mask.astype(bool)

776

777

null_values = np.asarray(index)[bool_mask]

778

779

if null_values.any():

780

PyDict_SetItem(groups, np.NaN, null_values)

781

782

cdef int i = 0

783

idx = index[0]

784

key = mapped_index[0]

785

786

# Algorithm notes

787

# - Tries to reduce the number of calls to PyDict_GetItem,

788

# 'lazily' evaluates

789

790

while i < length:

791

if not PyDict_Contains(groups, key):

792

members = [idx]

793

PyDict_SetItem(groups, key, members)

794

i += 1

795

curKey = key

796

while i < length:

797

if null_mask[i]:

798

i += 1

799

continue

800

801

idx = index[i]

802

key = mapped_index[i]

803

if key == curKey:

804

members.append(idx)

805

i += 1

806

else:

807

break

808

else:

809

members = <list> PyDict_GetItem(groups, key)

810

members.append(idx)

811

i += 1

812

curKey = key

813

while null_mask[i] and i < length:

814

i += 1

815

816

while i < length:

817

if null_mask[i]:

818

i += 1

819

continue

820

821

idx = index[i]

822

key = mapped_index[i]

823

if key == curKey:

824

members.append(idx)

825

i += 1

826

else:

827

break

828

829

return groups

830

831

@cython.boundscheck(False)

832

def groupby_withnull(ndarray[object, ndim = 1] index, object keyfunc):

833

cdef dict groups

834

cdef int length = index.shape[0]

835

cdef object idx

836

cdef object curKey, key

837

cdef list members

838

839

groups = PyDict_New()

840

841

if length != index.shape[0]:

842

raise Exception('Dates and values were not the same length!')

843

844

cdef ndarray[object, ndim=1] mapped_index = arrmap(index, keyfunc)

845

846

cdef ndarray[npy_int8, ndim=1] null_mask = _isnullobj(mapped_index)

847

848

bool_mask = null_mask.astype(bool)

849

850

null_values = np.asarray(index)[bool_mask]

851

852

if null_values.any():

853

PyDict_SetItem(groups, np.NaN, null_values)

854

855

cdef int i = 0

856

idx = index[0]

857

key = mapped_index[0]

858

859

# Algorithm notes

860

# - Tries to reduce the number of calls to PyDict_GetItem,

861

# 'lazily' evaluates

862

863

while i < length:

864

if key not in groups:

865

members = [idx]

866

groups[key] = members

867

i += 1

868

curKey = key

869

while i < length:

870

if null_mask[i]:

871

i += 1

872

continue

873

874

idx = index[i]

875

key = mapped_index[i]

876

if key == curKey:

877

members.append(idx)

878

i += 1

879

else:

880

break

881

else:

882

members = <list> groups[key]

883

members.append(idx)

884

i += 1

885

curKey = key

886

while null_mask[i] and i < length:

887

i += 1

888

889

while i < length:

890

if null_mask[i]:

891

i += 1

892

continue

893

894

idx = index[i]

895

key = mapped_index[i]

896

if key == curKey:

897

members.append(idx)

898

i += 1

899

else:

900

break

901

902

return groups

903

904

@cython.boundscheck(False)

905

def groupby(ndarray[object, ndim = 1] index, object keyfunc):

906

cdef dict groups

907

cdef int length = index.shape[0]

908

cdef object idx

909

cdef object curKey, key

910

cdef list members

911

912

groups = PyDict_New()

913

914

if length != index.shape[0]:

915

raise Exception('Dates and values were not the same length!')

916

917

cdef int i = 0

918

idx = index[i]

919

key = keyfunc(idx)

920

921

# Algorithm notes

922

# - Tries to reduce the number of calls to PyDict_GetItem, 'lazily' evaluates

923

924

while i < length:

925

if not PyDict_Contains(groups, key):

926

members = [idx]

927

PyDict_SetItem(groups, key, members)

928

i += 1

929

curKey = key

930

while i < length:

931

idx = index[i]

932

key = trycall(keyfunc, idx)

933

if key == curKey:

934

members.append(idx)

935

i += 1

936

else:

937

break

938

else:

939

members = <list> PyDict_GetItem(groups, key)

940

members.append(idx)

941

i += 1

942

curKey = key

943

while i < length:

944

idx = index[i]

945

key = trycall(keyfunc, idx)

946

if key == curKey:

947

members.append(idx)

948

i += 1

949

else:

950

break

951

952

return groups

953

954

@cython.boundscheck(False)

955

def groupbyfunc(ndarray[object, ndim = 1] index,

956

ndarray[npy_float64, ndim = 1] values,

957

object keyfunc, object applyfunc):

958

'''

959

Doing this proper in Cython

960

Not sure how much it will really speed things up

961

'''

962

cdef dict groups

963

cdef int length = values.shape[0]

964

cdef object idx

965

cdef object curKey, key

966

cdef list members, grouplist

967

968

groups = PyDict_New()

969

970

if length != index.shape[0]:

971

raise Exception('Dates and values were not the same length!')

972

973

cdef int i = 0

974

idx = index[i]

975

key = trycall(keyfunc, idx)

976

977

# Algorithm notes

978

# - Tries to reduce the number of calls to PyDict_GetItem,

979

# 'lazily' evaluates

980

981

while i < length:

982

if not PyDict_Contains(groups, key):

983

members = [values[i]]

984

PyDict_SetItem(groups, key, members)

985

i += 1

986

curKey = key

987

while i < length:

988

idx = index[i]

989

key = trycall(keyfunc, idx)

990

if key == curKey:

991

members.append(values[i])

992

i += 1

993

else:

994

break

995

else:

996

members = <list> PyDict_GetItem(groups, key)

997

members.append(values[i])

998

i += 1

999

curKey = key

1000

while i < length:

1001

idx = index[i]

1002

key = trycall(keyfunc, idx)

1003

if key == curKey:

1004

members.append(values[i])

1005

i += 1

1006

else:

1007

break

1008

1009

grouplist = PyDict_Keys(groups)

1010

1011

i = 0

1012

length = len(grouplist)

1013

for i from 0 <= i < length:

1014

key = grouplist[i]

1015

members = <list> PyDict_GetItem(groups, key)

1016

PyDict_SetItem(groups, key, applyfunc(np.asarray(members)))

1017

1018

return groups

1019

Older »