~ubuntu-branches/debian/stretch/pixman/stretch

Committer: Package Import Robot
Author(s): Julien Cristau, Andreas Boll, intrigeri
Date: 2015-09-12 13:08:02 UTC
mfrom: (1.1.29)
Revision ID: package-import@ubuntu.com-20150912130802-0egoj4agog1fsobn

Tags: 0.33.2-1

http://bugs.debian.org/786345

http://bugs.debian.org/760100

[ Andreas Boll ]
* New upstream release candidate.
* Enable vmx on ppc64el (closes: #786345).
* Update Vcs-* fields.
* Add upstream url.
* Drop XC- prefix from Package-Type field.
* Bump standards version to 3.9.6.

[ intrigeri ]
* Simplify hardening build flags handling (closes: #760100).
Thanks to Simon Ruderich <simon@ruderich.org> for the patch.
* Enable all hardening build flags. Thanks to Simon Ruderich too.

files added:
compile

pixman/pixman-arm-asm.h

test/affine-bench.c

test/radial-invalid.c

test/solid-test.c

test/tolerance-test.c

files removed:
debian/patches/ppc64el.diff

files modified:
ChangeLog

Makefile.in

aclocal.m4

config.guess

config.sub

configure

configure.ac

debian/changelog

debian/control

debian/patches/series

debian/rules

demos/Makefile.in

depcomp

install-sh

missing

pixman/Makefile.am

pixman/Makefile.in

pixman/pixman-arm-common.h

pixman/pixman-arm-neon-asm-bilinear.S

pixman/pixman-arm-neon-asm.S

pixman/pixman-arm-neon-asm.h

pixman/pixman-arm-neon.c

pixman/pixman-arm-simd-asm-scaled.S

pixman/pixman-arm-simd-asm.S

pixman/pixman-arm-simd-asm.h

pixman/pixman-arm-simd.c

pixman/pixman-combine-float.c

pixman/pixman-combine32.c

pixman/pixman-fast-path.c

pixman/pixman-general.c

pixman/pixman-gradient-walker.c

pixman/pixman-inlines.h

pixman/pixman-mips-dspr2-asm.S

pixman/pixman-mips-dspr2-asm.h

pixman/pixman-mips-dspr2.c

pixman/pixman-mips-dspr2.h

pixman/pixman-mmx.c

pixman/pixman-private.h

pixman/pixman-sse2.c

pixman/pixman-version.h

pixman/pixman-vmx.c

pixman/pixman.c

test-driver

test/Makefile.in

test/Makefile.sources

test/blitters-test.c

test/check-formats.c

test/composite.c

test/lowlevel-blt-bench.c

test/pixel-test.c

test/thread-test.c

test/utils.c

test/utils.h

Show diffs side-by-side

added added

removed removed

pixman/pixman-vmx.c

#endif

#include "pixman-private.h"

#include "pixman-combine32.h"

#include "pixman-inlines.h"

#include <altivec.h>

#define AVV(x...) {x}

static vector unsigned int mask_00ff;

static vector unsigned int mask_ff000000;

static vector unsigned int mask_red;

static vector unsigned int mask_green;

static vector unsigned int mask_blue;

static vector unsigned int mask_565_fix_rb;

static vector unsigned int mask_565_fix_g;

static force_inline vector unsigned int

splat_alpha (vector unsigned int pix)

{

#ifdef WORDS_BIGENDIAN

return vec_perm (pix, pix,

(vector unsigned char)AVV (

0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,

0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));

#else

return vec_perm (pix, pix,

(vector unsigned char)AVV (

0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07,

0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F));

#endif

}

static force_inline vector unsigned int

/* unpack to short */

hi = (vector unsigned short)

#ifdef WORDS_BIGENDIAN

vec_mergeh ((vector unsigned char)AVV (0),

(vector unsigned char)p);

#else

vec_mergeh ((vector unsigned char) p,

(vector unsigned char) AVV (0));

#endif

mod = (vector unsigned short)

#ifdef WORDS_BIGENDIAN

vec_mergeh ((vector unsigned char)AVV (0),

(vector unsigned char)a);

#else

vec_mergeh ((vector unsigned char) a,

(vector unsigned char) AVV (0));

#endif

hi = vec_mladd (hi, mod, (vector unsigned short)

AVV (0x0080, 0x0080, 0x0080, 0x0080,

/* unpack to short */

lo = (vector unsigned short)

#ifdef WORDS_BIGENDIAN

vec_mergel ((vector unsigned char)AVV (0),

(vector unsigned char)p);

#else

100

vec_mergel ((vector unsigned char) p,

101

(vector unsigned char) AVV (0));

102

#endif

103

104

mod = (vector unsigned short)

105

#ifdef WORDS_BIGENDIAN

106

vec_mergel ((vector unsigned char)AVV (0),

107

(vector unsigned char)a);

108

#else

109

vec_mergel ((vector unsigned char) a,

110

(vector unsigned char) AVV (0));

111

#endif

112

113

lo = vec_mladd (lo, mod, (vector unsigned short)

114

AVV (0x0080, 0x0080, 0x0080, 0x0080,

129

166

over (pix_multiply (src, mask), \

130

167

pix_multiply (srca, mask), dest)

131

168

169

#ifdef WORDS_BIGENDIAN

132

170

133

171

#define COMPUTE_SHIFT_MASK(source) \

134

172

source ## _mask = vec_lvsl (0, source);

140

178

mask ## _mask = vec_lvsl (0, mask); \

141

179

source ## _mask = vec_lvsl (0, source);

142

180

143

/* notice you have to declare temp vars...

144

* Note: tmp3 and tmp4 must remain untouched!

145

181

#define LOAD_VECTOR(source) \

182

do \

183

{ \

184

vector unsigned char tmp1, tmp2; \

185

tmp1 = (typeof(tmp1))vec_ld (0, source); \

186

tmp2 = (typeof(tmp2))vec_ld (15, source); \

187

v ## source = (typeof(v ## source)) \

188

vec_perm (tmp1, tmp2, source ## _mask); \

189

} while (0)

146

190

147

191

#define LOAD_VECTORS(dest, source) \

148

tmp1 = (typeof(tmp1))vec_ld (0, source); \

149

tmp2 = (typeof(tmp2))vec_ld (15, source); \

150

v ## source = (typeof(v ## source)) \

151

vec_perm (tmp1, tmp2, source ## _mask); \

152

v ## dest = (typeof(v ## dest))vec_ld (0, dest);

192

do \

193

{ \

194

LOAD_VECTOR(source); \

195

v ## dest = (typeof(v ## dest))vec_ld (0, dest); \

196

} while (0)

153

197

154

198

#define LOAD_VECTORSC(dest, source, mask) \

155

tmp1 = (typeof(tmp1))vec_ld (0, source); \

156

tmp2 = (typeof(tmp2))vec_ld (15, source); \

157

v ## source = (typeof(v ## source)) \

158

vec_perm (tmp1, tmp2, source ## _mask); \

159

tmp1 = (typeof(tmp1))vec_ld (0, mask); \

160

v ## dest = (typeof(v ## dest))vec_ld (0, dest); \

161

tmp2 = (typeof(tmp2))vec_ld (15, mask); \

162

v ## mask = (typeof(v ## mask)) \

163

vec_perm (tmp1, tmp2, mask ## _mask);

199

do \

200

{ \

201

LOAD_VECTORS(dest, source); \

202

LOAD_VECTOR(mask); \

203

} while (0)

204

205

#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask

206

#define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask

207

208

#else

209

210

/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.

211

* They are defined that way because little endian altivec can do unaligned

212

* reads natively and have no need for constructing the permutation pattern

213

* variables.

214

215

#define COMPUTE_SHIFT_MASK(source)

216

217

#define COMPUTE_SHIFT_MASKS(dest, source)

218

219

#define COMPUTE_SHIFT_MASKC(dest, source, mask)

220

221

# define LOAD_VECTOR(source) \

222

v ## source = *((typeof(v ## source)*)source);

223

224

# define LOAD_VECTORS(dest, source) \

225

LOAD_VECTOR(source); \

226

LOAD_VECTOR(dest); \

227

228

# define LOAD_VECTORSC(dest, source, mask) \

229

LOAD_VECTORS(dest, source); \

230

LOAD_VECTOR(mask); \

231

232

#define DECLARE_SRC_MASK_VAR

233

#define DECLARE_MASK_MASK_VAR

234

235

#endif /* WORDS_BIGENDIAN */

164

236

165

237

#define LOAD_VECTORSM(dest, source, mask) \

166

LOAD_VECTORSC (dest, source, mask) \

238

LOAD_VECTORSC (dest, source, mask); \

167

239

v ## source = pix_multiply (v ## source, \

168

240

splat_alpha (v ## mask));

169

241

170

242

#define STORE_VECTOR(dest) \

171

243

vec_st ((vector unsigned int) v ## dest, 0, dest);

172

244

245

/* load 4 pixels from a 16-byte boundary aligned address */

246

static force_inline vector unsigned int

247

load_128_aligned (const uint32_t* src)

248

{

249

return *((vector unsigned int *) src);

250

}

251

252

/* load 4 pixels from a unaligned address */

253

static force_inline vector unsigned int

254

load_128_unaligned (const uint32_t* src)

255

{

256

vector unsigned int vsrc;

257

DECLARE_SRC_MASK_VAR;

258

259

COMPUTE_SHIFT_MASK (src);

260

LOAD_VECTOR (src);

261

262

return vsrc;

263

}

264

265

/* save 4 pixels on a 16-byte boundary aligned address */

266

static force_inline void

267

save_128_aligned (uint32_t* data,

268

vector unsigned int vdata)

269

{

270

STORE_VECTOR(data)

271

}

272

273

static force_inline vector unsigned int

274

create_mask_16_128 (uint16_t mask)

275

{

276

uint16_t* src;

277

vector unsigned short vsrc;

278

DECLARE_SRC_MASK_VAR;

279

280

src = &mask;

281

282

COMPUTE_SHIFT_MASK (src);

283

LOAD_VECTOR (src);

284

return (vector unsigned int) vec_splat(vsrc, 0);

285

}

286

287

static force_inline vector unsigned int

288

create_mask_1x32_128 (const uint32_t *src)

289

{

290

vector unsigned int vsrc;

291

DECLARE_SRC_MASK_VAR;

292

293

COMPUTE_SHIFT_MASK (src);

294

LOAD_VECTOR (src);

295

return vec_splat(vsrc, 0);

296

}

297

298

static force_inline vector unsigned int

299

create_mask_32_128 (uint32_t mask)

300

{

301

return create_mask_1x32_128(&mask);

302

}

303

304

static force_inline vector unsigned int

305

unpack_32_1x128 (uint32_t data)

306

{

307

vector unsigned int vdata = {0, 0, 0, data};

308

vector unsigned short lo;

309

310

lo = (vector unsigned short)

311

#ifdef WORDS_BIGENDIAN

312

vec_mergel ((vector unsigned char) AVV(0),

313

(vector unsigned char) vdata);

314

#else

315

vec_mergel ((vector unsigned char) vdata,

316

(vector unsigned char) AVV(0));

317

#endif

318

319

return (vector unsigned int) lo;

320

}

321

322

static force_inline vector unsigned int

323

unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)

324

{

325

vector unsigned char lo;

326

327

/* unpack to short */

328

lo = (vector unsigned char)

329

#ifdef WORDS_BIGENDIAN

330

vec_mergel ((vector unsigned char) data2,

331

(vector unsigned char) data1);

332

#else

333

vec_mergel ((vector unsigned char) data1,

334

(vector unsigned char) data2);

335

#endif

336

337

return (vector unsigned int) lo;

338

}

339

340

static force_inline vector unsigned int

341

unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)

342

{

343

vector unsigned char hi;

344

345

/* unpack to short */

346

hi = (vector unsigned char)

347

#ifdef WORDS_BIGENDIAN

348

vec_mergeh ((vector unsigned char) data2,

349

(vector unsigned char) data1);

350

#else

351

vec_mergeh ((vector unsigned char) data1,

352

(vector unsigned char) data2);

353

#endif

354

355

return (vector unsigned int) hi;

356

}

357

358

static force_inline vector unsigned int

359

unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)

360

{

361

vector unsigned short lo;

362

363

/* unpack to char */

364

lo = (vector unsigned short)

365

#ifdef WORDS_BIGENDIAN

366

vec_mergel ((vector unsigned short) data2,

367

(vector unsigned short) data1);

368

#else

369

vec_mergel ((vector unsigned short) data1,

370

(vector unsigned short) data2);

371

#endif

372

373

return (vector unsigned int) lo;

374

}

375

376

static force_inline vector unsigned int

377

unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)

378

{

379

vector unsigned short hi;

380

381

/* unpack to char */

382

hi = (vector unsigned short)

383

#ifdef WORDS_BIGENDIAN

384

vec_mergeh ((vector unsigned short) data2,

385

(vector unsigned short) data1);

386

#else

387

vec_mergeh ((vector unsigned short) data1,

388

(vector unsigned short) data2);

389

#endif

390

391

return (vector unsigned int) hi;

392

}

393

394

static force_inline void

395

unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,

396

vector unsigned int* data_lo, vector unsigned int* data_hi)

397

{

398

*data_lo = unpacklo_128_16x8(data1, data2);

399

*data_hi = unpackhi_128_16x8(data1, data2);

400

}

401

402

static force_inline void

403

unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,

404

vector unsigned int* data_lo, vector unsigned int* data_hi)

405

{

406

*data_lo = unpacklo_128_8x16(data1, data2);

407

*data_hi = unpackhi_128_8x16(data1, data2);

408

}

409

410

static force_inline vector unsigned int

411

unpack_565_to_8888 (vector unsigned int lo)

412

{

413

vector unsigned int r, g, b, rb, t;

414

415

r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);

416

g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);

417

b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);

418

419

rb = vec_or (r, b);

420

t = vec_and (rb, mask_565_fix_rb);

421

t = vec_sr (t, create_mask_32_128(5));

422

rb = vec_or (rb, t);

423

424

t = vec_and (g, mask_565_fix_g);

425

t = vec_sr (t, create_mask_32_128(6));

426

g = vec_or (g, t);

427

428

return vec_or (rb, g);

429

}

430

431

static force_inline uint32_t

432

pack_1x128_32 (vector unsigned int data)

433

{

434

vector unsigned char vpack;

435

436

vpack = vec_packsu((vector unsigned short) data,

437

(vector unsigned short) AVV(0));

438

439

return vec_extract((vector unsigned int) vpack, 1);

440

}

441

442

static force_inline vector unsigned int

443

pack_2x128_128 (vector unsigned int lo, vector unsigned int hi)

444

{

445

vector unsigned char vpack;

446

447

vpack = vec_packsu((vector unsigned short) hi,

448

(vector unsigned short) lo);

449

450

return (vector unsigned int) vpack;

451

}

452

453

static force_inline void

454

negate_2x128 (vector unsigned int data_lo,

455

vector unsigned int data_hi,

456

vector unsigned int* neg_lo,

457

vector unsigned int* neg_hi)

458

{

459

*neg_lo = vec_xor (data_lo, mask_00ff);

460

*neg_hi = vec_xor (data_hi, mask_00ff);

461

}

462

463

static force_inline int

464

is_opaque (vector unsigned int x)

465

{

466

uint32_t cmp_result;

467

vector bool int ffs = vec_cmpeq(x, x);

468

469

cmp_result = vec_all_eq(x, ffs);

470

471

return (cmp_result & 0x8888) == 0x8888;

472

}

473

474

static force_inline int

475

is_zero (vector unsigned int x)

476

{

477

uint32_t cmp_result;

478

479

cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));

480

481

return cmp_result == 0xffff;

482

}

483

484

static force_inline int

485

is_transparent (vector unsigned int x)

486

{

487

uint32_t cmp_result;

488

489

cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));

490

return (cmp_result & 0x8888) == 0x8888;

491

}

492

493

static force_inline vector unsigned int

494

expand_pixel_8_1x128 (uint8_t data)

495

{

496

vector unsigned int vdata;

497

498

vdata = unpack_32_1x128 ((uint32_t) data);

499

500

#ifdef WORDS_BIGENDIAN

501

return vec_perm (vdata, vdata,

502

(vector unsigned char)AVV (

503

0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

504

0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));

505

#else

506

return vec_perm (vdata, vdata,

507

(vector unsigned char)AVV (

508

0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,

509

0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));

510

#endif

511

}

512

513

static force_inline vector unsigned int

514

expand_alpha_1x128 (vector unsigned int data)

515

{

516

#ifdef WORDS_BIGENDIAN

517

return vec_perm (data, data,

518

(vector unsigned char)AVV (

519

0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,

520

0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));

521

#else

522

return vec_perm (data, data,

523

(vector unsigned char)AVV (

524

0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,

525

0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));

526

#endif

527

}

528

529

static force_inline void

530

expand_alpha_2x128 (vector unsigned int data_lo,

531

vector unsigned int data_hi,

532

vector unsigned int* alpha_lo,

533

vector unsigned int* alpha_hi)

534

{

535

536

*alpha_lo = expand_alpha_1x128(data_lo);

537

*alpha_hi = expand_alpha_1x128(data_hi);

538

}

539

540

static force_inline void

541

expand_alpha_rev_2x128 (vector unsigned int data_lo,

542

vector unsigned int data_hi,

543

vector unsigned int* alpha_lo,

544

vector unsigned int* alpha_hi)

545

{

546

#ifdef WORDS_BIGENDIAN

547

*alpha_lo = vec_perm (data_lo, data_lo,

548

(vector unsigned char)AVV (

549

0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,

550

0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));

551

552

*alpha_hi = vec_perm (data_hi, data_hi,

553

(vector unsigned char)AVV (

554

0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,

555

0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));

556

#else

557

*alpha_lo = vec_perm (data_lo, data_lo,

558

(vector unsigned char)AVV (

559

0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,

560

0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));

561

562

*alpha_hi = vec_perm (data_hi, data_hi,

563

(vector unsigned char)AVV (

564

0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,

565

0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));

566

#endif

567

}

568

569

static force_inline void

570

pix_multiply_2x128 (vector unsigned int* data_lo,

571

vector unsigned int* data_hi,

572

vector unsigned int* alpha_lo,

573

vector unsigned int* alpha_hi,

574

vector unsigned int* ret_lo,

575

vector unsigned int* ret_hi)

576

{

577

*ret_lo = pix_multiply(*data_lo, *alpha_lo);

578

*ret_hi = pix_multiply(*data_hi, *alpha_hi);

579

}

580

581

static force_inline void

582

over_2x128 (vector unsigned int* src_lo,

583

vector unsigned int* src_hi,

584

vector unsigned int* alpha_lo,

585

vector unsigned int* alpha_hi,

586

vector unsigned int* dst_lo,

587

vector unsigned int* dst_hi)

588

{

589

vector unsigned int t1, t2;

590

591

negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);

592

593

pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);

594

595

*dst_lo = (vector unsigned int)

596

vec_adds ((vector unsigned char) *src_lo,

597

(vector unsigned char) *dst_lo);

598

599

*dst_hi = (vector unsigned int)

600

vec_adds ((vector unsigned char) *src_hi,

601

(vector unsigned char) *dst_hi);

602

}

603

604

static force_inline void

605

in_over_2x128 (vector unsigned int* src_lo,

606

vector unsigned int* src_hi,

607

vector unsigned int* alpha_lo,

608

vector unsigned int* alpha_hi,

609

vector unsigned int* mask_lo,

610

vector unsigned int* mask_hi,

611

vector unsigned int* dst_lo,

612

vector unsigned int* dst_hi)

613

{

614

vector unsigned int s_lo, s_hi;

615

vector unsigned int a_lo, a_hi;

616

617

pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);

618

pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);

619

620

over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);

621

}

622

623

static force_inline uint32_t

624

core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)

625

{

626

uint8_t a;

627

vector unsigned int vmxs;

628

629

a = src >> 24;

630

631

if (a == 0xff)

632

{

633

return src;

634

}

635

else if (src)

636

{

637

vmxs = unpack_32_1x128 (src);

638

return pack_1x128_32(

639

over(vmxs, expand_alpha_1x128 (vmxs), unpack_32_1x128 (dst)));

640

}

641

642

return dst;

643

}

644

645

static force_inline uint32_t

646

combine1 (const uint32_t *ps, const uint32_t *pm)

647

{

648

uint32_t s = *ps;

649

650

if (pm)

651

{

652

vector unsigned int ms, mm;

653

654

mm = unpack_32_1x128 (*pm);

655

mm = expand_alpha_1x128 (mm);

656

657

ms = unpack_32_1x128 (s);

658

ms = pix_multiply (ms, mm);

659

660

s = pack_1x128_32 (ms);

661

}

662

663

return s;

664

}

665

666

static force_inline vector unsigned int

667

combine4 (const uint32_t* ps, const uint32_t* pm)

668

{

669

vector unsigned int vmx_src_lo, vmx_src_hi;

670

vector unsigned int vmx_msk_lo, vmx_msk_hi;

671

vector unsigned int s;

672

673

if (pm)

674

{

675

vmx_msk_lo = load_128_unaligned(pm);

676

677

if (is_transparent(vmx_msk_lo))

678

return (vector unsigned int) AVV(0);

679

}

680

681

s = load_128_unaligned(ps);

682

683

if (pm)

684

{

685

unpack_128_2x128(s, (vector unsigned int) AVV(0),

686

&vmx_src_lo, &vmx_src_hi);

687

688

unpack_128_2x128(vmx_msk_lo, (vector unsigned int) AVV(0),

689

&vmx_msk_lo, &vmx_msk_hi);

690

691

expand_alpha_2x128(vmx_msk_lo, vmx_msk_hi, &vmx_msk_lo, &vmx_msk_hi);

692

693

pix_multiply_2x128(&vmx_src_lo, &vmx_src_hi,

694

&vmx_msk_lo, &vmx_msk_hi,

695

&vmx_src_lo, &vmx_src_hi);

696

697

s = pack_2x128_128(vmx_src_lo, vmx_src_hi);

698

}

699

700

return s;

701

}

702

173

703

static void

174

704

vmx_combine_over_u_no_mask (uint32_t * dest,

175

705

const uint32_t *src,

177

707

{

178

708

int i;

179

709

vector unsigned int vdest, vsrc;

180

vector unsigned char tmp1, tmp2, src_mask;

710

DECLARE_SRC_MASK_VAR;

181

711

182

712

while (width && ((uintptr_t)dest & 15))

183

713

{

227

757

{

228

758

int i;

229

759

vector unsigned int vdest, vsrc, vmask;

230

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

760

DECLARE_SRC_MASK_VAR;

761

DECLARE_MASK_MASK_VAR;

231

762

232

763

while (width && ((uintptr_t)dest & 15))

233

764

{

298

829

{

299

830

int i;

300

831

vector unsigned int vdest, vsrc;

301

vector unsigned char tmp1, tmp2, src_mask;

832

DECLARE_SRC_MASK_VAR;

302

833

303

834

while (width && ((uintptr_t)dest & 15))

304

835

{

346

877

{

347

878

int i;

348

879

vector unsigned int vdest, vsrc, vmask;

349

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

880

DECLARE_SRC_MASK_VAR;

881

DECLARE_MASK_MASK_VAR;

350

882

351

883

while (width && ((uintptr_t)dest & 15))

352

884

{

414

946

{

415

947

int i;

416

948

vector unsigned int vdest, vsrc;

417

vector unsigned char tmp1, tmp2, src_mask;

949

DECLARE_SRC_MASK_VAR;

418

950

419

951

while (width && ((uintptr_t)dest & 15))

420

952

{

459

991

{

460

992

int i;

461

993

vector unsigned int vdest, vsrc, vmask;

462

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

994

DECLARE_SRC_MASK_VAR;

995

DECLARE_MASK_MASK_VAR;

463

996

464

997

while (width && ((uintptr_t)dest & 15))

465

998

{

524

1057

{

525

1058

int i;

526

1059

vector unsigned int vdest, vsrc;

527

vector unsigned char tmp1, tmp2, src_mask;

1060

DECLARE_SRC_MASK_VAR;

528

1061

529

1062

while (width && ((uintptr_t)dest & 15))

530

1063

{

571

1104

{

572

1105

int i;

573

1106

vector unsigned int vdest, vsrc, vmask;

574

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

1107

DECLARE_SRC_MASK_VAR;

1108

DECLARE_MASK_MASK_VAR;

575

1109

576

1110

while (width && ((uintptr_t)dest & 15))

577

1111

{

638

1172

{

639

1173

int i;

640

1174

vector unsigned int vdest, vsrc;

641

vector unsigned char tmp1, tmp2, src_mask;

1175

DECLARE_SRC_MASK_VAR;

642

1176

643

1177

while (width && ((uintptr_t)dest & 15))

644

1178

{

685

1219

{

686

1220

int i;

687

1221

vector unsigned int vdest, vsrc, vmask;

688

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

1222

DECLARE_SRC_MASK_VAR;

1223

DECLARE_MASK_MASK_VAR;

689

1224

690

1225

while (width && ((uintptr_t)dest & 15))

691

1226

{

750

1285

{

751

1286

int i;

752

1287

vector unsigned int vdest, vsrc;

753

vector unsigned char tmp1, tmp2, src_mask;

1288

DECLARE_SRC_MASK_VAR;

754

1289

755

1290

while (width && ((uintptr_t)dest & 15))

756

1291

{

798

1333

{

799

1334

int i;

800

1335

vector unsigned int vdest, vsrc, vmask;

801

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

1336

DECLARE_SRC_MASK_VAR;

1337

DECLARE_MASK_MASK_VAR;

802

1338

803

1339

while (width && ((uintptr_t)dest & 15))

804

1340

{

865

1401

{

866

1402

int i;

867

1403

vector unsigned int vdest, vsrc;

868

vector unsigned char tmp1, tmp2, src_mask;

1404

DECLARE_SRC_MASK_VAR;

869

1405

870

1406

while (width && ((uintptr_t)dest & 15))

871

1407

{

917

1453

{

918

1454

int i;

919

1455

vector unsigned int vdest, vsrc, vmask;

920

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

1456

DECLARE_SRC_MASK_VAR;

1457

DECLARE_MASK_MASK_VAR;

921

1458

922

1459

while (width && ((uintptr_t)dest & 15))

923

1460

{

993

1530

{

994

1531

int i;

995

1532

vector unsigned int vdest, vsrc;

996

vector unsigned char tmp1, tmp2, src_mask;

1533

DECLARE_SRC_MASK_VAR;

997

1534

998

1535

while (width && ((uintptr_t)dest & 15))

999

1536

{

1045

1582

{

1046

1583

int i;

1047

1584

vector unsigned int vdest, vsrc, vmask;

1048

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

1585

DECLARE_SRC_MASK_VAR;

1586

DECLARE_MASK_MASK_VAR;

1049

1587

1050

1588

while (width && ((uintptr_t)dest & 15))

1051

1589

{

1121

1659

{

1122

1660

int i;

1123

1661

vector unsigned int vdest, vsrc;

1124

vector unsigned char tmp1, tmp2, src_mask;

1662

DECLARE_SRC_MASK_VAR;

1125

1663

1126

1664

while (width && ((uintptr_t)dest & 15))

1127

1665

{

1173

1711

{

1174

1712

int i;

1175

1713

vector unsigned int vdest, vsrc, vmask;

1176

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

1714

DECLARE_SRC_MASK_VAR;

1715

DECLARE_MASK_MASK_VAR;

1177

1716

1178

1717

while (width && ((uintptr_t)dest & 15))

1179

1718

{

1249

1788

{

1250

1789

int i;

1251

1790

vector unsigned int vdest, vsrc;

1252

vector unsigned char tmp1, tmp2, src_mask;

1791

DECLARE_SRC_MASK_VAR;

1253

1792

1254

1793

while (width && ((uintptr_t)dest & 15))

1255

1794

{

1295

1834

{

1296

1835

int i;

1297

1836

vector unsigned int vdest, vsrc, vmask;

1298

vector unsigned char tmp1, tmp2, src_mask, mask_mask;

1837

DECLARE_SRC_MASK_VAR;

1838

DECLARE_MASK_MASK_VAR;

1299

1839

1300

1840

while (width && ((uintptr_t)dest & 15))

1301

1841

{

1363

1903

{

1364

1904

int i;

1365

1905

vector unsigned int vdest, vsrc, vmask;

1366

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

1906

DECLARE_SRC_MASK_VAR;

1907

DECLARE_MASK_MASK_VAR;

1367

1908

1368

1909

while (width && ((uintptr_t)dest & 15))

1369

1910

{

1413

1954

{

1414

1955

int i;

1415

1956

vector unsigned int vdest, vsrc, vmask;

1416

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

1957

DECLARE_SRC_MASK_VAR;

1958

DECLARE_MASK_MASK_VAR;

1417

1959

1418

1960

while (width && ((uintptr_t)dest & 15))

1419

1961

{

1471

2013

{

1472

2014

int i;

1473

2015

vector unsigned int vdest, vsrc, vmask;

1474

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2016

DECLARE_SRC_MASK_VAR;

2017

DECLARE_MASK_MASK_VAR;

1475

2018

1476

2019

while (width && ((uintptr_t)dest & 15))

1477

2020

{

1527

2070

{

1528

2071

int i;

1529

2072

vector unsigned int vdest, vsrc, vmask;

1530

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2073

DECLARE_SRC_MASK_VAR;

2074

DECLARE_MASK_MASK_VAR;

1531

2075

1532

2076

while (width && ((uintptr_t)dest & 15))

1533

2077

{

1581

2125

{

1582

2126

int i;

1583

2127

vector unsigned int vdest, vsrc, vmask;

1584

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2128

DECLARE_SRC_MASK_VAR;

2129

DECLARE_MASK_MASK_VAR;

1585

2130

1586

2131

while (width && ((uintptr_t)dest & 15))

1587

2132

{

1636

2181

{

1637

2182

int i;

1638

2183

vector unsigned int vdest, vsrc, vmask;

1639

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2184

DECLARE_SRC_MASK_VAR;

2185

DECLARE_MASK_MASK_VAR;

1640

2186

1641

2187

while (width && ((uintptr_t)dest & 15))

1642

2188

{

1693

2239

{

1694

2240

int i;

1695

2241

vector unsigned int vdest, vsrc, vmask;

1696

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2242

DECLARE_SRC_MASK_VAR;

2243

DECLARE_MASK_MASK_VAR;

1697

2244

1698

2245

while (width && ((uintptr_t)dest & 15))

1699

2246

{

1750

2297

{

1751

2298

int i;

1752

2299

vector unsigned int vdest, vsrc, vmask, vsrca;

1753

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2300

DECLARE_SRC_MASK_VAR;

2301

DECLARE_MASK_MASK_VAR;

1754

2302

1755

2303

while (width && ((uintptr_t)dest & 15))

1756

2304

{

1816

2364

{

1817

2365

int i;

1818

2366

vector unsigned int vdest, vsrc, vmask;

1819

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2367

DECLARE_SRC_MASK_VAR;

2368

DECLARE_MASK_MASK_VAR;

1820

2369

1821

2370

while (width && ((uintptr_t)dest & 15))

1822

2371

{

1879

2428

{

1880

2429

int i;

1881

2430

vector unsigned int vdest, vsrc, vmask;

1882

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2431

DECLARE_SRC_MASK_VAR;

2432

DECLARE_MASK_MASK_VAR;

1883

2433

1884

2434

while (width && ((uintptr_t)dest & 15))

1885

2435

{

1942

2492

{

1943

2493

int i;

1944

2494

vector unsigned int vdest, vsrc, vmask;

1945

vector unsigned char tmp1, tmp2, mask_mask, src_mask;

2495

DECLARE_SRC_MASK_VAR;

2496

DECLARE_MASK_MASK_VAR;

1946

2497

1947

2498

while (width && ((uintptr_t)dest & 15))

1948

2499

{

1986

2537

}

1987

2538

}

1988

2539

2540

static pixman_bool_t

2541

vmx_fill (pixman_implementation_t *imp,

2542

uint32_t * bits,

2543

int stride,

2544

int bpp,

2545

int x,

2546

int y,

2547

int width,

2548

int height,

2549

uint32_t filler)

2550

{

2551

uint32_t byte_width;

2552

uint8_t *byte_line;

2553

2554

vector unsigned int vfiller;

2555

2556

if (bpp == 8)

2557

{

2558

uint8_t b;

2559

uint16_t w;

2560

2561

stride = stride * (int) sizeof (uint32_t) / 1;

2562

byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);

2563

byte_width = width;

2564

stride *= 1;

2565

2566

b = filler & 0xff;

2567

w = (b << 8) | b;

2568

filler = (w << 16) | w;

2569

}

2570

else if (bpp == 16)

2571

{

2572

stride = stride * (int) sizeof (uint32_t) / 2;

2573

byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);

2574

byte_width = 2 * width;

2575

stride *= 2;

2576

2577

filler = (filler & 0xffff) * 0x00010001;

2578

}

2579

else if (bpp == 32)

2580

{

2581

stride = stride * (int) sizeof (uint32_t) / 4;

2582

byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);

2583

byte_width = 4 * width;

2584

stride *= 4;

2585

}

2586

else

2587

{

2588

return FALSE;

2589

}

2590

2591

vfiller = create_mask_1x32_128(&filler);

2592

2593

while (height--)

2594

{

2595

int w;

2596

uint8_t *d = byte_line;

2597

byte_line += stride;

2598

w = byte_width;

2599

2600

if (w >= 1 && ((uintptr_t)d & 1))

2601

{

2602

*(uint8_t *)d = filler;

2603

w -= 1;

2604

d += 1;

2605

}

2606

2607

while (w >= 2 && ((uintptr_t)d & 3))

2608

{

2609

*(uint16_t *)d = filler;

2610

w -= 2;

2611

d += 2;

2612

}

2613

2614

while (w >= 4 && ((uintptr_t)d & 15))

2615

{

2616

*(uint32_t *)d = filler;

2617

2618

w -= 4;

2619

d += 4;

2620

}

2621

2622

while (w >= 128)

2623

{

2624

vec_st(vfiller, 0, (uint32_t *) d);

2625

vec_st(vfiller, 0, (uint32_t *) d + 4);

2626

vec_st(vfiller, 0, (uint32_t *) d + 8);

2627

vec_st(vfiller, 0, (uint32_t *) d + 12);

2628

vec_st(vfiller, 0, (uint32_t *) d + 16);

2629

vec_st(vfiller, 0, (uint32_t *) d + 20);

2630

vec_st(vfiller, 0, (uint32_t *) d + 24);

2631

vec_st(vfiller, 0, (uint32_t *) d + 28);

2632

2633

d += 128;

2634

w -= 128;

2635

}

2636

2637

if (w >= 64)

2638

{

2639

vec_st(vfiller, 0, (uint32_t *) d);

2640

vec_st(vfiller, 0, (uint32_t *) d + 4);

2641

vec_st(vfiller, 0, (uint32_t *) d + 8);

2642

vec_st(vfiller, 0, (uint32_t *) d + 12);

2643

2644

d += 64;

2645

w -= 64;

2646

}

2647

2648

if (w >= 32)

2649

{

2650

vec_st(vfiller, 0, (uint32_t *) d);

2651

vec_st(vfiller, 0, (uint32_t *) d + 4);

2652

2653

d += 32;

2654

w -= 32;

2655

}

2656

2657

if (w >= 16)

2658

{

2659

vec_st(vfiller, 0, (uint32_t *) d);

2660

2661

d += 16;

2662

w -= 16;

2663

}

2664

2665

while (w >= 4)

2666

{

2667

*(uint32_t *)d = filler;

2668

2669

w -= 4;

2670

d += 4;

2671

}

2672

2673

if (w >= 2)

2674

{

2675

*(uint16_t *)d = filler;

2676

w -= 2;

2677

d += 2;

2678

}

2679

2680

if (w >= 1)

2681

{

2682

*(uint8_t *)d = filler;

2683

w -= 1;

2684

d += 1;

2685

}

2686

}

2687

2688

return TRUE;

2689

}

2690

2691

static void

2692

vmx_composite_src_x888_8888 (pixman_implementation_t *imp,

2693

pixman_composite_info_t *info)

2694

{

2695

PIXMAN_COMPOSITE_ARGS (info);

2696

uint32_t *dst_line, *dst;

2697

uint32_t *src_line, *src;

2698

int32_t w;

2699

int dst_stride, src_stride;

2700

2701

PIXMAN_IMAGE_GET_LINE (

2702

dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);

2703

PIXMAN_IMAGE_GET_LINE (

2704

src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);

2705

2706

while (height--)

2707

{

2708

dst = dst_line;

2709

dst_line += dst_stride;

2710

src = src_line;

2711

src_line += src_stride;

2712

w = width;

2713

2714

while (w && (uintptr_t)dst & 15)

2715

{

2716

*dst++ = *src++ | 0xff000000;

2717

w--;

2718

}

2719

2720

while (w >= 16)

2721

{

2722

vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4;

2723

2724

vmx_src1 = load_128_unaligned (src);

2725

vmx_src2 = load_128_unaligned (src + 4);

2726

vmx_src3 = load_128_unaligned (src + 8);

2727

vmx_src4 = load_128_unaligned (src + 12);

2728

2729

save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000));

2730

save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000));

2731

save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000));

2732

save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000));

2733

2734

dst += 16;

2735

src += 16;

2736

w -= 16;

2737

}

2738

2739

while (w)

2740

{

2741

*dst++ = *src++ | 0xff000000;

2742

w--;

2743

}

2744

}

2745

}

2746

2747

static void

2748

vmx_composite_over_8888_8888 (pixman_implementation_t *imp,

2749

pixman_composite_info_t *info)

2750

{

2751

PIXMAN_COMPOSITE_ARGS (info);

2752

int dst_stride, src_stride;

2753

uint32_t *dst_line, *dst;

2754

uint32_t *src_line, *src;

2755

2756

PIXMAN_IMAGE_GET_LINE (

2757

dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);

2758

PIXMAN_IMAGE_GET_LINE (

2759

src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);

2760

2761

dst = dst_line;

2762

src = src_line;

2763

2764

while (height--)

2765

{

2766

vmx_combine_over_u (imp, op, dst, src, NULL, width);

2767

2768

dst += dst_stride;

2769

src += src_stride;

2770

}

2771

}

2772

2773

static void

2774

vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,

2775

pixman_composite_info_t *info)

2776

{

2777

PIXMAN_COMPOSITE_ARGS (info);

2778

uint32_t src;

2779

uint32_t *dst_line, d;

2780

uint32_t *mask_line, m;

2781

uint32_t pack_cmp;

2782

int dst_stride, mask_stride;

2783

2784

vector unsigned int vsrc, valpha, vmask, vdest;

2785

2786

vector unsigned int vmx_dst, vmx_dst_lo, vmx_dst_hi;

2787

vector unsigned int vmx_mask, vmx_mask_lo, vmx_mask_hi;

2788

2789

src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);

2790

2791

if (src == 0)

2792

return;

2793

2794

PIXMAN_IMAGE_GET_LINE (

2795

dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);

2796

PIXMAN_IMAGE_GET_LINE (

2797

mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);

2798

2799

vsrc = unpacklo_128_16x8(create_mask_1x32_128 (&src),

2800

(vector unsigned int) AVV(0));

2801

2802

valpha = expand_alpha_1x128(vsrc);

2803

2804

while (height--)

2805

{

2806

int w = width;

2807

const uint32_t *pm = (uint32_t *)mask_line;

2808

uint32_t *pd = (uint32_t *)dst_line;

2809

2810

dst_line += dst_stride;

2811

mask_line += mask_stride;

2812

2813

while (w && (uintptr_t)pd & 15)

2814

{

2815

m = *pm++;

2816

2817

if (m)

2818

{

2819

d = *pd;

2820

vmask = unpack_32_1x128(m);

2821

vdest = unpack_32_1x128(d);

2822

2823

*pd = pack_1x128_32(in_over (vsrc, valpha, vmask, vdest));

2824

}

2825

2826

pd++;

2827

w--;

2828

}

2829

2830

while (w >= 4)

2831

{

2832

/* pm is NOT necessarily 16-byte aligned */

2833

vmx_mask = load_128_unaligned (pm);

2834

2835

pack_cmp = vec_all_eq(vmx_mask, (vector unsigned int) AVV(0));

2836

2837

/* if all bits in mask are zero, pack_cmp is not 0 */

2838

if (pack_cmp == 0)

2839

{

2840

/* pd is 16-byte aligned */

2841

vmx_dst = load_128_aligned (pd);

2842

2843

unpack_128_2x128 (vmx_mask, (vector unsigned int) AVV(0),

2844

&vmx_mask_lo, &vmx_mask_hi);

2845

2846

unpack_128_2x128 (vmx_dst, (vector unsigned int) AVV(0),

2847

&vmx_dst_lo, &vmx_dst_hi);

2848

2849

in_over_2x128 (&vsrc, &vsrc,

2850

&valpha, &valpha,

2851

&vmx_mask_lo, &vmx_mask_hi,

2852

&vmx_dst_lo, &vmx_dst_hi);

2853

2854

save_128_aligned(pd, pack_2x128_128(vmx_dst_lo, vmx_dst_hi));

2855

}

2856

2857

pd += 4;

2858

pm += 4;

2859

w -= 4;

2860

}

2861

2862

while (w)

2863

{

2864

m = *pm++;

2865

2866

if (m)

2867

{

2868

d = *pd;

2869

vmask = unpack_32_1x128(m);

2870

vdest = unpack_32_1x128(d);

2871

2872

*pd = pack_1x128_32(in_over (vsrc, valpha, vmask, vdest));

2873

}

2874

2875

pd++;

2876

w--;

2877

}

2878

}

2879

}

2880

2881

static void

2882

vmx_composite_add_8_8 (pixman_implementation_t *imp,

2883

pixman_composite_info_t *info)

2884

{

2885

PIXMAN_COMPOSITE_ARGS (info);

2886

uint8_t *dst_line, *dst;

2887

uint8_t *src_line, *src;

2888

int dst_stride, src_stride;

2889

int32_t w;

2890

uint16_t t;

2891

2892

PIXMAN_IMAGE_GET_LINE (

2893

src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);

2894

PIXMAN_IMAGE_GET_LINE (

2895

dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);

2896

2897

while (height--)

2898

{

2899

dst = dst_line;

2900

src = src_line;

2901

2902

dst_line += dst_stride;

2903

src_line += src_stride;

2904

w = width;

2905

2906

/* Small head */

2907

while (w && (uintptr_t)dst & 3)

2908

{

2909

t = (*dst) + (*src++);

2910

*dst++ = t | (0 - (t >> 8));

2911

w--;

2912

}

2913

2914

vmx_combine_add_u (imp, op,

2915

(uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);

2916

2917

/* Small tail */

2918

dst += w & 0xfffc;

2919

src += w & 0xfffc;

2920

2921

w &= 3;

2922

2923

while (w)

2924

{

2925

t = (*dst) + (*src++);

2926

*dst++ = t | (0 - (t >> 8));

2927

w--;

2928

}

2929

}

2930

}

2931

2932

static void

2933

vmx_composite_add_8888_8888 (pixman_implementation_t *imp,

2934

pixman_composite_info_t *info)

2935

{

2936

PIXMAN_COMPOSITE_ARGS (info);

2937

uint32_t *dst_line, *dst;

2938

uint32_t *src_line, *src;

2939

int dst_stride, src_stride;

2940

2941

PIXMAN_IMAGE_GET_LINE (

2942

src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);

2943

PIXMAN_IMAGE_GET_LINE (

2944

dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);

2945

2946

while (height--)

2947

{

2948

dst = dst_line;

2949

dst_line += dst_stride;

2950

src = src_line;

2951

src_line += src_stride;

2952

2953

vmx_combine_add_u (imp, op, dst, src, NULL, width);

2954

}

2955

}

2956

2957

static force_inline void

2958

scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd,

2959

const uint32_t* ps,

2960

int32_t w,

2961

pixman_fixed_t vx,

2962

pixman_fixed_t unit_x,

2963

pixman_fixed_t src_width_fixed,

2964

pixman_bool_t fully_transparent_src)

2965

{

2966

uint32_t s, d;

2967

const uint32_t* pm = NULL;

2968

2969

vector unsigned int vmx_dst_lo, vmx_dst_hi;

2970

vector unsigned int vmx_src_lo, vmx_src_hi;

2971

vector unsigned int vmx_alpha_lo, vmx_alpha_hi;

2972

2973

if (fully_transparent_src)

2974

return;

2975

2976

/* Align dst on a 16-byte boundary */

2977

while (w && ((uintptr_t)pd & 15))

2978

{

2979

d = *pd;

2980

s = combine1 (ps + pixman_fixed_to_int (vx), pm);

2981

vx += unit_x;

2982

while (vx >= 0)

2983

vx -= src_width_fixed;

2984

2985

*pd++ = core_combine_over_u_pixel_vmx (s, d);

2986

if (pm)

2987

pm++;

2988

w--;

2989

}

2990

2991

while (w >= 4)

2992

{

2993

vector unsigned int tmp;

2994

uint32_t tmp1, tmp2, tmp3, tmp4;

2995

2996

tmp1 = *(ps + pixman_fixed_to_int (vx));

2997

vx += unit_x;

2998

while (vx >= 0)

2999

vx -= src_width_fixed;

3000

tmp2 = *(ps + pixman_fixed_to_int (vx));

3001

vx += unit_x;

3002

while (vx >= 0)

3003

vx -= src_width_fixed;

3004

tmp3 = *(ps + pixman_fixed_to_int (vx));

3005

vx += unit_x;

3006

while (vx >= 0)

3007

vx -= src_width_fixed;

3008

tmp4 = *(ps + pixman_fixed_to_int (vx));

3009

vx += unit_x;

3010

while (vx >= 0)

3011

vx -= src_width_fixed;

3012

3013

tmp[0] = tmp1;

3014

tmp[1] = tmp2;

3015

tmp[2] = tmp3;

3016

tmp[3] = tmp4;

3017

3018

vmx_src_hi = combine4 ((const uint32_t *) &tmp, pm);

3019

3020

if (is_opaque (vmx_src_hi))

3021

{

3022

save_128_aligned (pd, vmx_src_hi);

3023

}

3024

else if (!is_zero (vmx_src_hi))

3025

{

3026

vmx_dst_hi = load_128_aligned (pd);

3027

3028

unpack_128_2x128 (vmx_src_hi, (vector unsigned int) AVV(0),

3029

&vmx_src_lo, &vmx_src_hi);

3030

3031

unpack_128_2x128 (vmx_dst_hi, (vector unsigned int) AVV(0),

3032

&vmx_dst_lo, &vmx_dst_hi);

3033

3034

expand_alpha_2x128 (

3035

vmx_src_lo, vmx_src_hi, &vmx_alpha_lo, &vmx_alpha_hi);

3036

3037

over_2x128 (&vmx_src_lo, &vmx_src_hi,

3038

&vmx_alpha_lo, &vmx_alpha_hi,

3039

&vmx_dst_lo, &vmx_dst_hi);

3040

3041

/* rebuid the 4 pixel data and save*/

3042

save_128_aligned (pd, pack_2x128_128 (vmx_dst_lo, vmx_dst_hi));

3043

}

3044

3045

w -= 4;

3046

pd += 4;

3047

if (pm)

3048

pm += 4;

3049

}

3050

3051

while (w)

3052

{

3053

d = *pd;

3054

s = combine1 (ps + pixman_fixed_to_int (vx), pm);

3055

vx += unit_x;

3056

while (vx >= 0)

3057

vx -= src_width_fixed;

3058

3059

*pd++ = core_combine_over_u_pixel_vmx (s, d);

3060

if (pm)

3061

pm++;

3062

3063

w--;

3064

}

3065

}

3066

3067

FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER,

3068

scaled_nearest_scanline_vmx_8888_8888_OVER,

3069

uint32_t, uint32_t, COVER)

3070

FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER,

3071

scaled_nearest_scanline_vmx_8888_8888_OVER,

3072

uint32_t, uint32_t, NONE)

3073

FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER,

3074

scaled_nearest_scanline_vmx_8888_8888_OVER,

3075

uint32_t, uint32_t, PAD)

3076

FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,

3077

scaled_nearest_scanline_vmx_8888_8888_OVER,

3078

uint32_t, uint32_t, NORMAL)

3079

1989

3080

static const pixman_fast_path_t vmx_fast_paths[] =

1990

3081

{

3082

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),

3083

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),

3084

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),

3085

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),

3086

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca),

3087

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca),

3088

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca),

3089

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca),

3090

3091

/* PIXMAN_OP_ADD */

3092

PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),

3093

PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888),

3094

PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),

3095

3096

/* PIXMAN_OP_SRC */

3097

PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),

3098

PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),

3099

3100

SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888),

3101

SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888),

3102

SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888),

3103

SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888),

3104

1991

3105

{ PIXMAN_OP_NONE },

1992

3106

};

1993

3107

3108

static uint32_t *

3109

vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)

3110

{

3111

int w = iter->width;

3112

vector unsigned int ff000000 = mask_ff000000;

3113

uint32_t *dst = iter->buffer;

3114

uint32_t *src = (uint32_t *)iter->bits;

3115

3116

iter->bits += iter->stride;

3117

3118

while (w && ((uintptr_t)dst) & 0x0f)

3119

{

3120

*dst++ = (*src++) | 0xff000000;

3121

w--;

3122

}

3123

3124

while (w >= 4)

3125

{

3126

save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000));

3127

3128

dst += 4;

3129

src += 4;

3130

w -= 4;

3131

}

3132

3133

while (w)

3134

{

3135

*dst++ = (*src++) | 0xff000000;

3136

w--;

3137

}

3138

3139

return iter->buffer;

3140

}

3141

3142

static uint32_t *

3143

vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)

3144

{

3145

int w = iter->width;

3146

uint32_t *dst = iter->buffer;

3147

uint8_t *src = iter->bits;

3148

vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6;

3149

3150

iter->bits += iter->stride;

3151

3152

while (w && (((uintptr_t)dst) & 15))

3153

{

3154

*dst++ = *(src++) << 24;

3155

w--;

3156

}

3157

3158

while (w >= 16)

3159

{

3160

vmx0 = load_128_unaligned((uint32_t *) src);

3161

3162

unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2);

3163

unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4);

3164

unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6);

3165

3166

save_128_aligned(dst, vmx6);

3167

save_128_aligned((dst + 4), vmx5);

3168

save_128_aligned((dst + 8), vmx4);

3169

save_128_aligned((dst + 12), vmx3);

3170

3171

dst += 16;

3172

src += 16;

3173

w -= 16;

3174

}

3175

3176

while (w)

3177

{

3178

*dst++ = *(src++) << 24;

3179

w--;

3180

}

3181

3182

return iter->buffer;

3183

}

3184

3185

#define IMAGE_FLAGS \

3186

(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \

3187

FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)

3188

3189

static const pixman_iter_info_t vmx_iters[] =

3190

{

3191

{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,

3192

_pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL

3193

3194

{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,

3195

_pixman_iter_init_bits_stride, vmx_fetch_a8, NULL

3196

3197

{ PIXMAN_null },

3198

};

3199

1994

3200

pixman_implementation_t *

1995

3201

_pixman_implementation_create_vmx (pixman_implementation_t *fallback)

1996

3202

{

1997

3203

pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);

1998

3204

3205

/* VMX constants */

3206

mask_00ff = create_mask_16_128 (0x00ff);

3207

mask_ff000000 = create_mask_32_128 (0xff000000);

3208

mask_red = create_mask_32_128 (0x00f80000);

3209

mask_green = create_mask_32_128 (0x0000fc00);

3210

mask_blue = create_mask_32_128 (0x000000f8);

3211

mask_565_fix_rb = create_mask_32_128 (0x00e000e0);

3212

mask_565_fix_g = create_mask_32_128 (0x0000c000);

3213

1999

3214

/* Set up function pointers */

2000

3215

2001

3216

imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;

2022

3237

imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;

2023

3238

imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;

2024

3239

3240

imp->fill = vmx_fill;

3241

3242

imp->iter_info = vmx_iters;

3243

2025

3244

return imp;

2026

3245

}

Older »