~ubuntu-branches/ubuntu/jaunty/gimp/jaunty-security : revision 12

app/composite/gimp-composite-altivec.c

/* The GIMP -- an image manipulation program

/* GIMP - The GNU Image Manipulation Program

* -*- mode: c tab-width: 2; -*-

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

#ifdef COMPILE_ALTIVEC_IS_OKAY

#endif

#ifdef HAVE_ALTIVEC_H

#include <altivec.h>

#endif

/* Paper over differences between official gcc and Apple's weird gcc */

#ifdef HAVE_ALTIVEC_H

#define INIT_VECTOR(v...) {v}

#define CONST_BUFFER(b) (b)

#else

#define INIT_VECTOR(v...) (v)

#define CONST_BUFFER(b) ((guchar *)(b))

#endif

static const vector unsigned char alphamask = (const vector unsigned char)

INIT_VECTOR(0,0,0,0xff,0,0,0,0xff,0,0,0,0xff,0,0,0,0xff);

static const vector unsigned char combine_high_bytes = (const vector unsigned char)

INIT_VECTOR(0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30);

static const vector unsigned short ox0080 = (const vector unsigned short)

INIT_VECTOR(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);

static const vector unsigned short ox0008 = (const vector unsigned short)

INIT_VECTOR(0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8);

static const vector signed short ox00ff = (const vector signed short)

INIT_VECTOR(0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff);

static const vector signed short oxff80 = (const vector signed short)

INIT_VECTOR(0xff80,0xff80,0xff80,0xff80,0xff80,0xff80,0xff80,0xff80);

/* Load a vector from an unaligned location in memory */

static inline vector unsigned char

LoadUnaligned(const guchar *v)

{

if ((long)v & 0x0f)

{

vector unsigned char permuteVector = vec_lvsl(0, v);

vector unsigned char low = vec_ld(0, v);

vector unsigned char high = vec_ld(16, v);

return vec_perm(low, high, permuteVector);

}

else

return vec_ld(0, v); /* don't want overflow */

}

/* Load less than a vector from an unaligned location in memory */

static inline vector unsigned char

LoadUnalignedLess(const guchar *v,

int n)

{

vector unsigned char permuteVector = vec_lvsl(0, v);

if (((long)v&0x0f)+n > 15)

{

vector unsigned char low = vec_ld(0, v);

vector unsigned char high = vec_ld(16, v);

return vec_perm(low, high, permuteVector);

}

else

{

vector unsigned char tmp = vec_ld(0, v);

return vec_perm(tmp, tmp, permuteVector); /* don't want overflow */

}

/* Store a vector to an unaligned location in memory */

static inline void

StoreUnaligned (vector unsigned char v,

const guchar *where)

{

if ((unsigned long)where & 0x0f)

{

100

/* Load the surrounding area */

101

vector unsigned char low = vec_ld(0, where);

102

vector unsigned char high = vec_ld(16, where);

103

/* Prepare the constants that we need */

104

vector unsigned char permuteVector = vec_lvsr(0, where);

105

vector signed char oxFF = vec_splat_s8(-1);

106

vector signed char ox00 = vec_splat_s8(0);

107

/* Make a mask for which parts of the vectors to swap out */

108

vector unsigned char mask = (vector unsigned char)vec_perm(ox00, oxFF, permuteVector);

109

v = vec_perm(v, v, permuteVector);

110

/* Insert our data into the low and high vectors */

111

low = vec_sel(low, v, mask);

112

high = vec_sel(v, high, mask);

113

/* Store the two aligned result vectors */

114

vec_st(low, 0, CONST_BUFFER(where));

115

vec_st(high, 16, CONST_BUFFER(where));

116

}

117

else

118

{ /* prevent overflow */

119

vec_st(v, 0, CONST_BUFFER(where));

120

}

121

}

122

123

/* Store less than a vector to an unaligned location in memory */

124

static inline void

125

StoreUnalignedLess (vector unsigned char v,

126

const guchar *where,

127

int n)

128

{

129

int i;

130

vector unsigned char permuteVector = vec_lvsr(0, where);

131

v = vec_perm(v, v, permuteVector);

132

for (i=0; i<n; i++)

133

vec_ste(v, i, CONST_BUFFER(where));

134

}

135

136

void

137

gimp_composite_addition_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

138

{

139

const guchar *A = ctx->A;

140

const guchar *B = ctx->B;

141

guchar *D = ctx->D;

142

guint length = ctx->n_pixels;

143

vector unsigned char a,b,d,alpha_a,alpha_b;

144

145

while (length >= 4)

146

{

147

a=LoadUnaligned(A);

148

b=LoadUnaligned(B);

149

150

alpha_a=vec_and(a, alphamask);

151

alpha_b=vec_and(b, alphamask);

152

d=vec_min(alpha_a, alpha_b);

153

154

a=vec_andc(a, alphamask);

155

a=vec_adds(a, d);

156

b=vec_andc(b, alphamask);

157

d=vec_adds(a, b);

158

159

StoreUnaligned(d, D);

160

161

A+=16;

162

B+=16;

163

D+=16;

164

length-=4;

165

}

166

/* process last pixels */

167

length = length*4;

168

a=LoadUnalignedLess(A, length);

169

b=LoadUnalignedLess(B, length);

170

171

alpha_a=vec_and(a,alphamask);

172

alpha_b=vec_and(b,alphamask);

173

d=vec_min(alpha_a,alpha_b);

174

175

a=vec_andc(a,alphamask);

176

a=vec_adds(a,d);

177

b=vec_andc(b,alphamask);

178

d=vec_adds(a,b);

179

180

StoreUnalignedLess(d, D, length);

181

}

182

183

void

184

gimp_composite_subtract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

185

{

186

const guchar *A = ctx->A;

187

const guchar *B = ctx->B;

188

guchar *D = ctx->D;

189

guint length = ctx->n_pixels;

190

vector unsigned char a,b,d,alpha_a,alpha_b;

191

192

while (length >= 4)

193

{

194

a=LoadUnaligned(A);

195

b=LoadUnaligned(B);

196

197

alpha_a=vec_and(a, alphamask);

198

alpha_b=vec_and(b, alphamask);

199

d=vec_min(alpha_a, alpha_b);

200

201

a=vec_andc(a, alphamask);

202

a=vec_adds(a, d);

203

b=vec_andc(b, alphamask);

204

d=vec_subs(a, b);

205

206

StoreUnaligned(d, D);

207

208

A+=16;

209

B+=16;

210

D+=16;

211

length-=4;

212

}

213

/* process last pixels */

214

length = length*4;

215

a=LoadUnalignedLess(A, length);

216

b=LoadUnalignedLess(B, length);

217

218

alpha_a=vec_and(a,alphamask);

219

alpha_b=vec_and(b,alphamask);

220

d=vec_min(alpha_a,alpha_b);

221

222

a=vec_andc(a,alphamask);

223

a=vec_adds(a,d);

224

b=vec_andc(b,alphamask);

225

d=vec_subs(a,b);

226

227

StoreUnalignedLess(d, D, length);

228

}

229

230

void

231

gimp_composite_swap_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

232

{

233

const guchar *A = ctx->A;

234

const guchar *B = ctx->B;

235

guint length = ctx->n_pixels;

236

vector unsigned char a,b;

237

238

while (length >= 4)

239

{

240

a=LoadUnaligned(A);

241

b=LoadUnaligned(B);

242

StoreUnaligned(b, A);

243

StoreUnaligned(a, B);

244

A+=16;

245

B+=16;

246

length-=4;

247

}

248

/* process last pixels */

249

length = length*4;

250

a=LoadUnalignedLess(A, length);

251

b=LoadUnalignedLess(B, length);

252

StoreUnalignedLess(a, B, length);

253

StoreUnalignedLess(b, A, length);

254

}

255

256

void

257

gimp_composite_difference_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

258

{

259

const guchar *A = ctx->A;

260

const guchar *B = ctx->B;

261

guchar *D = ctx->D;

262

guint length = ctx->n_pixels;

263

vector unsigned char a,b,d,e,alpha_a,alpha_b;

264

265

while (length >= 4)

266

{

267

a=LoadUnaligned(A);

268

b=LoadUnaligned(B);

269

270

alpha_a=vec_and(a, alphamask);

271

alpha_b=vec_and(b, alphamask);

272

d=vec_min(alpha_a, alpha_b);

273

274

a=vec_andc(a, alphamask);

275

a=vec_adds(a, d);

276

b=vec_andc(b, alphamask);

277

d=vec_subs(a, b);

278

e=vec_subs(b, a);

279

d=vec_add(d,e);

280

281

StoreUnaligned(d, D);

282

283

A+=16;

284

B+=16;

285

D+=16;

286

length-=4;

287

}

288

/* process last pixels */

289

length = length*4;

290

a=LoadUnalignedLess(A, length);

291

b=LoadUnalignedLess(B, length);

292

293

alpha_a=vec_and(a,alphamask);

294

alpha_b=vec_and(b,alphamask);

295

d=vec_min(alpha_a,alpha_b);

296

297

a=vec_andc(a,alphamask);

298

a=vec_adds(a,d);

299

b=vec_andc(b,alphamask);

300

d=vec_subs(a,b);

301

e=vec_subs(b, a);

302

d=vec_add(d,e);

303

304

StoreUnalignedLess(d, D, length);

305

}

306

307

void

308

gimp_composite_darken_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

309

{

310

const guchar *A = ctx->A;

311

const guchar *B = ctx->B;

312

guchar *D = ctx->D;

313

guint length = ctx->n_pixels;

314

vector unsigned char a,b,d;

315

316

while (length >= 4)

317

{

318

a=LoadUnaligned(A);

319

b=LoadUnaligned(B);

320

321

d=vec_min(a, b);

322

323

StoreUnaligned(d, D);

324

325

A+=16;

326

B+=16;

327

D+=16;

328

length-=4;

329

}

330

/* process last pixels */

331

length = length*4;

332

a=LoadUnalignedLess(A, length);

333

b=LoadUnalignedLess(B, length);

334

335

d=vec_min(a, b);

336

337

StoreUnalignedLess(d, D, length);

338

}

339

340

void

341

gimp_composite_lighten_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

342

{

343

const guchar *A = ctx->A;

344

const guchar *B = ctx->B;

345

guchar *D = ctx->D;

346

guint length = ctx->n_pixels;

347

vector unsigned char a,b,d,alpha_a,alpha_b;

348

349

while (length >= 4)

350

{

351

a=LoadUnaligned(A);

352

b=LoadUnaligned(B);

353

354

alpha_a=vec_and(a, alphamask);

355

alpha_b=vec_and(b, alphamask);

356

d=vec_min(alpha_a, alpha_b);

357

358

a=vec_andc(a, alphamask);

359

a=vec_adds(a, d);

360

b=vec_andc(b, alphamask);

361

d=vec_max(a, b);

362

363

StoreUnaligned(d, D);

364

365

A+=16;

366

B+=16;

367

D+=16;

368

length-=4;

369

}

370

/* process last pixels */

371

length = length*4;

372

a=LoadUnalignedLess(A, length);

373

b=LoadUnalignedLess(B, length);

374

375

alpha_a=vec_and(a,alphamask);

376

alpha_b=vec_and(b,alphamask);

377

d=vec_min(alpha_a,alpha_b);

378

379

a=vec_andc(a,alphamask);

380

a=vec_adds(a,d);

381

b=vec_andc(b,alphamask);

382

d=vec_max(a, b);

383

384

StoreUnalignedLess(d, D, length);

385

}

386

387

void

388

gimp_composite_multiply_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

389

{

390

const guchar *A = ctx->A;

391

const guchar *B = ctx->B;

392

guchar *D = ctx->D;

393

guint length = ctx->n_pixels;

394

vector unsigned char a,b,d,alpha_a,alpha_b,alpha;

395

vector unsigned short al,ah;

396

397

while (length >= 4)

398

{

399

a=LoadUnaligned(A);

400

b=LoadUnaligned(B);

401

402

al=vec_mule(a,b);

403

al=vec_add(al,ox0080);

404

ah=vec_mulo(a,b);

405

ah=vec_add(ah,ox0080);

406

al=vec_add(al,vec_sr(al,ox0008));

407

ah=vec_add(ah,vec_sr(ah,ox0008));

408

d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

409

410

alpha_a=vec_and(a, alphamask);

411

alpha_b=vec_and(b, alphamask);

412

alpha=vec_min(alpha_a, alpha_b);

413

414

d=vec_andc(d, alphamask);

415

d=vec_or(d, alpha);

416

417

StoreUnaligned(d, D);

418

419

A+=16;

420

B+=16;

421

D+=16;

422

length-=4;

423

}

424

/* process last pixels */

425

length = length*4;

426

a=LoadUnalignedLess(A, length);

427

b=LoadUnalignedLess(B, length);

428

429

al=vec_mule(a,b);

430

al=vec_add(al,ox0080);

431

ah=vec_mulo(a,b);

432

ah=vec_add(ah,ox0080);

433

al=vec_add(al,vec_sr(al,ox0008));

434

ah=vec_add(ah,vec_sr(ah,ox0008));

435

d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

436

437

alpha_a=vec_and(a, alphamask);

438

alpha_b=vec_and(b, alphamask);

439

alpha=vec_min(alpha_a, alpha_b);

440

441

d=vec_andc(d, alphamask);

442

d=vec_or(d, alpha);

443

444

StoreUnalignedLess(d, D, length);

445

}

446

447

void

448

gimp_composite_blend_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

449

{

450

const guchar *A = ctx->A;

451

const guchar *B = ctx->B;

452

guchar *D = ctx->D;

453

guint length = ctx->n_pixels;

454

guchar blend = ctx->blend.blend;

455

union

456

{

457

vector unsigned char v;

458

unsigned char u8[16];

459

} vblend;

460

461

vector unsigned char vblendc;

462

vector unsigned char a,b,d;

463

vector unsigned short al,ah,bl,bh,one=vec_splat_u16(1);

464

guchar tmp;

465

466

for (tmp=0; tmp<16; tmp++ )

467

vblend.u8[tmp]=blend;

468

vblendc=vec_nor(vblend.v,vblend.v);

469

470

while (length >= 4)

471

{

472

a=LoadUnaligned(A);

473

b=LoadUnaligned(B);

474

475

/* dest[b] = (src1[b] * blend2 + src2[b] * blend) / 255;

476

* to divide by 255 we use ((n+1)+(n+1)>>8)>>8

477

* It works for all value but 0xffff

478

* happily blending formula can't give this value */

479

480

al=vec_mule(a,vblendc);

481

ah=vec_mulo(a,vblendc);

482

483

bl=vec_mule(b,vblend.v);

484

bh=vec_mulo(b,vblend.v);

485

486

al=vec_add(al,bl);

487

al=vec_add(al,one);

488

al=vec_add(al,vec_sr(al,ox0008));

489

490

ah=vec_add(ah,bh);

491

ah=vec_add(ah,one);

492

ah=vec_add(ah,vec_sr(ah,ox0008));

493

494

d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

495

496

StoreUnaligned(d, D);

497

498

A+=16;

499

B+=16;

500

D+=16;

501

length-=4;

502

}

503

/* process last pixels */

504

length = length*4;

505

a=LoadUnalignedLess(A, length);

506

b=LoadUnalignedLess(B, length);

507

508

al=vec_mule(a,vblendc);

509

ah=vec_mulo(a,vblendc);

510

511

bl=vec_mule(b,vblend.v);

512

bh=vec_mulo(b,vblend.v);

513

514

al=vec_add(al,bl);

515

al=vec_add(al,one);

516

al=vec_add(al,vec_sr(al,ox0008));

517

518

ah=vec_add(ah,bh);

519

ah=vec_add(ah,one);

520

ah=vec_add(ah,vec_sr(ah,ox0008));

521

522

d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

523

524

StoreUnalignedLess(d, D, length);

525

}

526

527

void

528

gimp_composite_screen_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

529

{

530

const guchar *A = ctx->A;

531

const guchar *B = ctx->B;

532

guchar *D = ctx->D;

533

guint length = ctx->n_pixels;

534

vector unsigned char a,b,d,alpha_a,alpha_b,alpha;

535

vector unsigned short ah,al;

536

537

while (length >= 4)

538

{

539

a=LoadUnaligned(A);

540

b=LoadUnaligned(B);

541

542

alpha_a=vec_and(a, alphamask);

543

alpha_b=vec_and(b, alphamask);

544

alpha=vec_min(alpha_a, alpha_b);

545

546

a=vec_nor(a,a);

547

b=vec_nor(b,b);

548

al=vec_mule(a,b);

549

al=vec_add(al,ox0080);

550

ah=vec_mulo(a,b);

551

ah=vec_add(ah,ox0080);

552

553

al=vec_add(al,vec_sr(al,ox0008));

554

ah=vec_add(ah,vec_sr(ah,ox0008));

555

556

d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

557

558

d=vec_nor(d,d);

559

d=vec_andc(d, alphamask);

560

d=vec_or(d, alpha);

561

562

StoreUnaligned(d, D);

563

564

A+=16;

565

B+=16;

566

D+=16;

567

length-=4;

568

}

569

/* process last pixels */

570

length = length*4;

571

a=LoadUnalignedLess(A, length);

572

b=LoadUnalignedLess(B, length);

573

574

alpha_a=vec_and(a, alphamask);

575

alpha_b=vec_and(b, alphamask);

576

alpha=vec_min(alpha_a, alpha_b);

577

578

a=vec_nor(a,a);

579

b=vec_nor(b,b);

580

al=vec_mule(a,b);

581

al=vec_add(al,ox0080);

582

ah=vec_mulo(a,b);

583

ah=vec_add(ah,ox0080);

584

585

al=vec_add(al,vec_sr(al,ox0008));

586

ah=vec_add(ah,vec_sr(ah,ox0008));

587

588

d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);

589

d=vec_nor(d,d);

590

591

d=vec_andc(d, alphamask);

592

d=vec_or(d, alpha);

593

594

StoreUnalignedLess(d, D, length);

595

}

596

597

void

598

gimp_composite_grain_merge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

599

{

600

const guchar *A = ctx->A;

601

const guchar *B = ctx->B;

602

guchar *D = ctx->D;

603

guint length = ctx->n_pixels;

604

vector unsigned char a,b,d,alpha_a,alpha_b,alpha;

605

vector signed short ah,al,bh,bl;

606

607

while (length >= 4)

608

{

609

a=LoadUnaligned(A);

610

b=LoadUnaligned(B);

611

612

alpha_a=vec_and(a, alphamask);

613

alpha_b=vec_and(b, alphamask);

614

alpha=vec_min(alpha_a, alpha_b);

615

616

ah=vec_unpackh((vector signed char)a);

617

ah=vec_and(ah,ox00ff);

618

al=vec_unpackl((vector signed char)a);

619

al=vec_and(al,ox00ff);

620

bh=vec_unpackh((vector signed char)b);

621

bh=vec_and(bh,ox00ff);

622

bl=vec_unpackl((vector signed char)b);

623

bl=vec_and(bl,ox00ff);

624

625

ah=vec_add(ah,bh);

626

al=vec_add(al,bl);

627

ah=vec_add(ah,oxff80);

628

al=vec_add(al,oxff80);

629

630

d=vec_packsu(ah,al);

631

632

d=vec_andc(d, alphamask);

633

d=vec_or(d, alpha);

634

635

StoreUnaligned(d, D);

636

637

A+=16;

638

B+=16;

639

D+=16;

640

length-=4;

641

}

642

/* process last pixels */

643

length = length*4;

644

a=LoadUnalignedLess(A, length);

645

b=LoadUnalignedLess(B, length);

646

647

alpha_a=vec_and(a, alphamask);

648

alpha_b=vec_and(b, alphamask);

649

alpha=vec_min(alpha_a, alpha_b);

650

651

ah=vec_unpackh((vector signed char)a);

652

ah=vec_and(ah,ox00ff);

653

al=vec_unpackl((vector signed char)a);

654

al=vec_and(al,ox00ff);

655

bh=vec_unpackh((vector signed char)b);

656

bh=vec_and(bh,ox00ff);

657

bl=vec_unpackl((vector signed char)b);

658

bl=vec_and(bl,ox00ff);

659

660

ah=vec_add(ah,bh);

661

al=vec_add(al,bl);

662

ah=vec_add(ah,oxff80);

663

al=vec_add(al,oxff80);

664

665

d=vec_packsu(ah,al);

666

667

d=vec_andc(d, alphamask);

668

d=vec_or(d, alpha);

669

670

StoreUnalignedLess(d, D, length);

671

}

672

673

void

674

gimp_composite_grain_extract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

675

{

676

const guchar *A = ctx->A;

677

const guchar *B = ctx->B;

678

guchar *D = ctx->D;

679

guint length = ctx->n_pixels;

680

vector unsigned char a,b,d,alpha_a,alpha_b,alpha;

681

vector signed short ah,al,bh,bl;

682

683

while (length >= 4)

684

{

685

a=LoadUnaligned(A);

686

b=LoadUnaligned(B);

687

688

alpha_a=vec_and(a, alphamask);

689

alpha_b=vec_and(b, alphamask);

690

alpha=vec_min(alpha_a, alpha_b);

691

692

ah=vec_unpackh((vector signed char)a);

693

ah=vec_and(ah,ox00ff);

694

al=vec_unpackl((vector signed char)a);

695

al=vec_and(al,ox00ff);

696

bh=vec_unpackh((vector signed char)b);

697

bh=vec_and(bh,ox00ff);

698

bl=vec_unpackl((vector signed char)b);

699

bl=vec_and(bl,ox00ff);

700

701

ah=vec_sub(ah,bh);

702

al=vec_sub(al,bl);

703

ah=vec_sub(ah,oxff80);

704

al=vec_sub(al,oxff80);

705

706

d=vec_packsu(ah,al);

707

708

d=vec_andc(d, alphamask);

709

d=vec_or(d, alpha);

710

711

StoreUnaligned(d, D);

712

713

A+=16;

714

B+=16;

715

D+=16;

716

length-=4;

717

}

718

/* process last pixels */

719

length = length*4;

720

a=LoadUnalignedLess(A, length);

721

b=LoadUnalignedLess(B, length);

722

723

alpha_a=vec_and(a, alphamask);

724

alpha_b=vec_and(b, alphamask);

725

alpha=vec_min(alpha_a, alpha_b);

726

727

ah=vec_unpackh((vector signed char)a);

728

ah=vec_and(ah,ox00ff);

729

al=vec_unpackl((vector signed char)a);

730

al=vec_and(al,ox00ff);

731

bh=vec_unpackh((vector signed char)b);

732

bh=vec_and(bh,ox00ff);

733

bl=vec_unpackl((vector signed char)b);

734

bl=vec_and(bl,ox00ff);

735

736

ah=vec_sub(ah,bh);

737

al=vec_sub(al,bl);

738

ah=vec_sub(ah,oxff80);

739

al=vec_sub(al,oxff80);

740

741

d=vec_packsu(ah,al);

742

743

d=vec_andc(d, alphamask);

744

d=vec_or(d, alpha);

745

746

StoreUnalignedLess(d, D, length);

747

}

748

749

void

750

gimp_composite_divide_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

751

{

752

const guchar *A = ctx->A;

753

const guchar *B = ctx->B;

754

guchar *D = ctx->D;

755

guint length = ctx->n_pixels;

756

vector unsigned char a,b,d;

757

vector unsigned char alpha_a,alpha_b,alpha;

758

vector signed short ox0001=vec_splat_s16(1);

759

union

760

{

761

vector signed short v;

762

vector unsigned short vu;

763

gushort u16[8];

764

} ah,al,bh,bl;

765

766

while (length >= 4)

767

{

768

a=LoadUnaligned(A);

769

b=LoadUnaligned(B);

770

771

alpha_a=vec_and(a, alphamask);

772

alpha_b=vec_and(b, alphamask);

773

alpha=vec_min(alpha_a, alpha_b);

774

775

ah.v=vec_unpackh((vector signed char)a);

776

ah.v=vec_sl(ah.v,ox0008);

777

al.v=vec_unpackl((vector signed char)a);

778

al.v=vec_sl(al.v,ox0008);

779

780

bh.v=vec_unpackh((vector signed char)b);

781

bh.v=vec_and(bh.v,ox00ff);

782

bh.v=vec_add(bh.v,ox0001);

783

bl.v=vec_unpackl((vector signed char)b);

784

bl.v=vec_and(bl.v,ox00ff);

785

bl.v=vec_add(bl.v,ox0001);

786

787

ah.u16[0]=ah.u16[0]/bh.u16[0];

788

ah.u16[1]=ah.u16[1]/bh.u16[1];

789

ah.u16[2]=ah.u16[2]/bh.u16[2];

790

ah.u16[4]=ah.u16[4]/bh.u16[4];

791

ah.u16[5]=ah.u16[5]/bh.u16[5];

792

ah.u16[6]=ah.u16[6]/bh.u16[6];

793

794

al.u16[0]=al.u16[0]/bl.u16[0];

795

al.u16[1]=al.u16[1]/bl.u16[1];

796

al.u16[2]=al.u16[2]/bl.u16[2];

797

al.u16[4]=al.u16[4]/bl.u16[4];

798

al.u16[5]=al.u16[5]/bl.u16[5];

799

al.u16[6]=al.u16[6]/bl.u16[6];

800

801

d=vec_packs(ah.vu,al.vu);

802

803

d=vec_andc(d, alphamask);

804

d=vec_or(d, alpha);

805

806

StoreUnaligned(d, D);

807

A+=16;

808

B+=16;

809

D+=16;

810

length-=4;

811

}

812

length = length*4;

813

a=LoadUnalignedLess(A, length);

814

b=LoadUnalignedLess(B, length);

815

816

alpha_a=vec_and(a, alphamask);

817

alpha_b=vec_and(b, alphamask);

818

alpha=vec_min(alpha_a, alpha_b);

819

820

ah.v=vec_unpackh((vector signed char)a);

821

ah.v=vec_sl(ah.v,ox0008);

822

al.v=vec_unpackl((vector signed char)a);

823

al.v=vec_sl(al.v,ox0008);

824

825

bh.v=vec_unpackh((vector signed char)b);

826

bh.v=vec_and(bh.v,ox00ff);

827

bh.v=vec_add(bh.v,ox0001);

828

bl.v=vec_unpackl((vector signed char)b);

829

bl.v=vec_and(bl.v,ox00ff);

830

bl.v=vec_add(bl.v,ox0001);

831

832

ah.u16[0]=ah.u16[0]/bh.u16[0];

833

ah.u16[1]=ah.u16[1]/bh.u16[1];

834

ah.u16[2]=ah.u16[2]/bh.u16[2];

835

ah.u16[4]=ah.u16[4]/bh.u16[4];

836

ah.u16[5]=ah.u16[5]/bh.u16[5];

837

ah.u16[6]=ah.u16[6]/bh.u16[6];

838

839

al.u16[0]=al.u16[0]/bl.u16[0];

840

al.u16[1]=al.u16[1]/bl.u16[1];

841

al.u16[2]=al.u16[2]/bl.u16[2];

842

al.u16[4]=al.u16[4]/bl.u16[4];

843

al.u16[5]=al.u16[5]/bl.u16[5];

844

al.u16[6]=al.u16[6]/bl.u16[6];

845

846

d=vec_packs(ah.vu,al.vu);

847

848

d=vec_andc(d, alphamask);

849

d=vec_or(d, alpha);

850

851

StoreUnalignedLess(d, D, length);

852

}

853

854

void

855

gimp_composite_dodge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)

856

{

857

const guchar *A = ctx->A;

858

const guchar *B = ctx->B;

859

guchar *D = ctx->D;

860

guint length = ctx->n_pixels;

861

vector unsigned char a,b,d;

862

vector unsigned char alpha_a,alpha_b,alpha;

863

vector signed short ox0001=vec_splat_s16(1);

864

union

865

{

866

vector signed short v;

867

vector unsigned short vu;

868

gushort u16[8];

869

} ah,al,bh,bl;

870

871

while (length >= 4)

872

{

873

a=LoadUnaligned(A);

874

b=LoadUnaligned(B);

875

876

alpha_a=vec_and(a, alphamask);

877

alpha_b=vec_and(b, alphamask);

878

alpha=vec_min(alpha_a, alpha_b);

879

880

ah.v=vec_unpackh((vector signed char)a);

881

ah.v=vec_sl(ah.v,ox0008);

882

al.v=vec_unpackl((vector signed char)a);

883

al.v=vec_sl(al.v,ox0008);

884

885

b=vec_nor(b,b);

886

bh.v=vec_unpackh((vector signed char)b);

887

bh.v=vec_and(bh.v,ox00ff);

888

bh.v=vec_add(bh.v,ox0001);

889

bl.v=vec_unpackl((vector signed char)b);

890

bl.v=vec_and(bl.v,ox00ff);

891

bl.v=vec_add(bl.v,ox0001);

892

893

ah.u16[0]=ah.u16[0]/bh.u16[0];

894

ah.u16[1]=ah.u16[1]/bh.u16[1];

895

ah.u16[2]=ah.u16[2]/bh.u16[2];

896

ah.u16[4]=ah.u16[4]/bh.u16[4];

897

ah.u16[5]=ah.u16[5]/bh.u16[5];

898

ah.u16[6]=ah.u16[6]/bh.u16[6];

899

900

al.u16[0]=al.u16[0]/bl.u16[0];

901

al.u16[1]=al.u16[1]/bl.u16[1];

902

al.u16[2]=al.u16[2]/bl.u16[2];

903

al.u16[4]=al.u16[4]/bl.u16[4];

904

al.u16[5]=al.u16[5]/bl.u16[5];

905

al.u16[6]=al.u16[6]/bl.u16[6];

906

907

d=vec_packs(ah.vu,al.vu);

908

909

d=vec_andc(d, alphamask);

910

d=vec_or(d, alpha);

911

912

StoreUnaligned(d, D);

913

A+=16;

914

B+=16;

915

D+=16;

916

length-=4;

917

}

918

length = length*4;

919

a=LoadUnalignedLess(A, length);

920

b=LoadUnalignedLess(B, length);

921

922

alpha_a=vec_and(a, alphamask);

923

alpha_b=vec_and(b, alphamask);

924

alpha=vec_min(alpha_a, alpha_b);

925

926

ah.v=vec_unpackh((vector signed char)a);

927

ah.v=vec_sl(ah.v,ox0008);

928

al.v=vec_unpackl((vector signed char)a);

929

al.v=vec_sl(al.v,ox0008);

930

931

b=vec_nor(b,b);

932

bh.v=vec_unpackh((vector signed char)b);

933

bh.v=vec_and(bh.v,ox00ff);

934

bh.v=vec_add(bh.v,ox0001);

935

bl.v=vec_unpackl((vector signed char)b);

936

bl.v=vec_and(bl.v,ox00ff);

937

bl.v=vec_add(bl.v,ox0001);

938

939

ah.u16[0]=ah.u16[0]/bh.u16[0];

940

ah.u16[1]=ah.u16[1]/bh.u16[1];

941

ah.u16[2]=ah.u16[2]/bh.u16[2];

942

ah.u16[4]=ah.u16[4]/bh.u16[4];

943

ah.u16[5]=ah.u16[5]/bh.u16[5];

944

ah.u16[6]=ah.u16[6]/bh.u16[6];

945

946

al.u16[0]=al.u16[0]/bl.u16[0];

947

al.u16[1]=al.u16[1]/bl.u16[1];

948

al.u16[2]=al.u16[2]/bl.u16[2];

949

al.u16[4]=al.u16[4]/bl.u16[4];

950

al.u16[5]=al.u16[5]/bl.u16[5];

951

al.u16[6]=al.u16[6]/bl.u16[6];

952

953

d=vec_packs(ah.vu,al.vu);

954

955

d=vec_andc(d, alphamask);

956

d=vec_or(d, alpha);

957

958

StoreUnalignedLess(d, D, length);

959

}

960

961

#endif /* COMPILE_IS_OKAY */

Older »