~ubuntu-branches/ubuntu/trusty/libdv/trusty

Committer: Bazaar Package Importer
Author(s): Daniel Kobras
Date: 2006-09-26 14:22:15 UTC
mfrom: (1.3.1 upstream) (3.1.4 edgy)
Revision ID: james.westby@ubuntu.com-20060926142215-q2arp4stpw6lrb5p

Tags: 1.0.0-1

* New upstream version.
* Removed patches:
  + [01_changelog_update_CVS]
  + [10_amd64_linkage_fix_CVS]
  + [10_inline_fixes_CVS]
  + [20_no_exec_stack_CVS]
  + [30_unload_memleak_fix_CVS]
  + [40_playdv_exit_fix_CVS]
  + [50_gcc41_asm_constraint_fixes_CVS]
    All of the above are part of the new upstream version.
* debian/control: In Build-Depends, remove alternative dependencies on
  obsolete SDL and X packages.
* debian/control: Complies with version 3.7.2 of Debian policy.
* debian/libdv4.README.Debian: Document lack of position-independent code
  in i386 version of libdv's shared library as mandated by recent versions
  of Debian policy.

files added:
debian/libdv4.README.Debian

libdv/dct_block_mmx_x86_64.S

libdv/encode_x86_64.S

libdv/idct_block_mmx_x86_64.S

libdv/quant_x86_64.S

libdv/rgbtoyuv_x86_64.S

libdv/transpose_x86_64.S

libdv/vlc_x86_64.S

files modified:
AUTHORS

ChangeLog

INSTALL

Makefile.am

Makefile.in

aclocal.m4

bootstrap

config.guess

config.h.in

config.sub

configure

configure.ac

debian/changelog

debian/control

debian/patches/10_zap_config_h.dpatch

debian/rules

depcomp

encodedv/Makefile.in

install-sh

libdv.pc

libdv.pc.in

libdv.spec

libdv/Makefile.am

libdv/Makefile.in

libdv/YUY2.c

libdv/YUY2.h

libdv/YV12.c

libdv/YV12.h

libdv/dct.c

libdv/dct_block_mmx.S

libdv/dv.c

libdv/dv_types.h

libdv/enc_input.c

libdv/enc_output.c

libdv/encode.c

libdv/encode_x86.S

libdv/enctest.c

libdv/gasmoff.c

libdv/idct_248.c

libdv/idct_block_mmx.S

libdv/mmx.h

libdv/parse.c

libdv/quant.c

libdv/quant.h

libdv/quant_x86.S

libdv/rgbtoyuv.S

libdv/transpose_x86.S

libdv/vlc.c

libdv/vlc.h

libdv/vlc_x86.S

libdv/weighting.c

libtool

ltmain.sh

missing

playdv/Makefile.in

playdv/playdv.c

Show diffs side-by-side

added added

removed removed

libdv/quant_x86_64.S

* quant_x86.S

* Copyright (C) James Bowman - May 2000

* Copyright (C) Peter Schlaile - Jan 2001

* This file is part of libdv, a free DV (IEC 61834/SMPTE 314M)

* codec.

* libdv is free software; you can redistribute it and/or modify it

* under the terms of the GNU Lesser Public License as published by

* the Free Software Foundation; either version 2.1, or (at your

* option) any later version.

* libdv is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser Public License for more details.

* You should have received a copy of the GNU Lesser Public License

* along with libdv; see the file COPYING. If not, write to

* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.

* The libdv homepage is http://libdv.sourceforge.net/.

.section .note.GNU-stack, "", @progbits

.previous

The pattern for dv_88_areas looks like this:

0 0 1 1 1 2 2

0 0 1 1 1 2 2 2

0 1 1 1 2 2 2 3

1 1 1 2 2 2 3 3

1 1 2 2 2 3 3 3

1 2 2 2 3 3 3 3

2 2 2 3 3 3 3 3

2 2 3 3 3 3 3 3

Note

[1] matrix element [0][0] is untouched.

[2] all values in the same diagonal are equal

This implementation works by loading the four shift values in turn,

and shifting all the appropriate array elements.

#include "asmoff.h"

void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int class)

.text

.align 4

.globl _dv_quant_88_inverse_x86_64

.hidden _dv_quant_88_inverse_x86_64

.type _dv_quant_88_inverse_x86_64,@function

_dv_quant_88_inverse_x86_64:

/* Args are at block=rdi, qno=rsi, class=rdx */

push %r12

push %r14

rax

r12 extra

rcx

edx pq

r14 block

/* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */

mov %rsi,%rax /* qno */

mov %rdx,%r12 /* class */

mov dv_quant_offset@GOTPCREL(%rip),%rcx

movzbq (%rcx,%r12,1),%rcx /* dv_quant_offset[class] */

add %rcx,%rax /* qno + */

mov dv_quant_shifts@GOTPCREL(%rip),%r11

lea (%r11,%rax,4),%r11 /* r11 is pq = dv_quant_shifts[...] */

/* extra = (class == 3); */

/* 0 1 2 3 */

sub $3,%r12 /* -3 -2 -1 0 */

sar $31,%r12 /* -1 -1 -1 0 */

inc %r12 /* 0 0 0 1 */

add $DV_WEIGHT_BIAS,%r12

/* r12 is extra */

mov %rdi,%r14 /* r14 is block */

/* Pick up each of the factors into %rcx, then shift the

appropriate coefficients. The pattern here is taken from

dv_88_areas; it's arranged by diagonals for clarity. */

100

#define ADDR(row,col) (2*(8*row+col))(%r14)

101

#define MSHIFT(row,col) \

102

shlw %cl,ADDR(row,col)

103

104

mov $DV_WEIGHT_BIAS,%rcx

105

MSHIFT(0,0)

106

107

/* 0 */

108

movzbq (%r11,1),%rcx

109

add %r12,%rcx

110

MSHIFT(0,1)

111

MSHIFT(1,0)

112

113

MSHIFT(0,2)

114

MSHIFT(1,1)

115

MSHIFT(2,0)

116

117

/* 1 */

118

movzbq 1(%r11,1),%rcx

119

add %r12,%rcx

120

MSHIFT(0,3)

121

MSHIFT(1,2)

122

MSHIFT(2,1)

123

MSHIFT(3,0)

124

125

MSHIFT(0,4)

126

MSHIFT(1,3)

127

MSHIFT(2,2)

128

MSHIFT(3,1)

129

MSHIFT(4,0)

130

131

MSHIFT(0,5)

132

MSHIFT(1,4)

133

MSHIFT(2,3)

134

MSHIFT(3,2)

135

MSHIFT(4,1)

136

MSHIFT(5,0)

137

138

/* 2 */

139

movzbq 2(%r11,1),%rcx

140

add %r12,%rcx

141

MSHIFT(0,6)

142

MSHIFT(1,5)

143

MSHIFT(2,4)

144

MSHIFT(3,3)

145

MSHIFT(4,2)

146

MSHIFT(5,1)

147

MSHIFT(6,0)

148

149

MSHIFT(0,7)

150

MSHIFT(1,6)

151

MSHIFT(2,5)

152

MSHIFT(3,4)

153

MSHIFT(4,3)

154

MSHIFT(5,2)

155

MSHIFT(6,1)

156

MSHIFT(7,0)

157

158

MSHIFT(1,7)

159

MSHIFT(2,6)

160

MSHIFT(3,5)

161

MSHIFT(4,4)

162

MSHIFT(5,3)

163

MSHIFT(6,2)

164

MSHIFT(7,1)

165

166

/* 3 */

167

movzbq 3(%r11,1),%rcx

168

add %r12,%rcx

169

MSHIFT(2,7)

170

MSHIFT(3,6)

171

MSHIFT(4,5)

172

MSHIFT(5,4)

173

MSHIFT(6,3)

174

MSHIFT(7,2)

175

176

MSHIFT(3,7)

177

MSHIFT(4,6)

178

MSHIFT(5,5)

179

MSHIFT(6,4)

180

MSHIFT(7,3)

181

182

MSHIFT(4,7)

183

MSHIFT(5,6)

184

MSHIFT(6,5)

185

MSHIFT(7,4)

186

187

MSHIFT(5,7)

188

MSHIFT(6,6)

189

MSHIFT(7,5)

190

191

MSHIFT(6,7)

192

MSHIFT(7,6)

193

194

MSHIFT(7,7)

195

196

pop %r14

197

pop %r12

198

ret

199

200

.align 4

201

.globl _dv_quant_x86_64

202

.hidden _dv_quant_x86_64

203

.type _dv_quant_x86_64,@function

204

_dv_quant_x86_64:

205

206

/* Args are at block=rdi, qno=rsi, class=rdx */

207

208

push %r12

209

push %rbx

210

push %r11

211

push %r14

212

213

214

215

rax

216

r12 extra

217

rcx

218

r11 pq

219

r14 block

220

221

222

/* pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */

223

mov %rsi,%rax /* qno */

224

mov %rdx,%r12 /* class */

225

226

mov dv_quant_offset@GOTPCREL(%rip),%rcx

227

movzbq (%rcx,%r12,1),%rcx

228

add %rcx,%rax

229

mov dv_quant_shifts@GOTPCREL(%rip),%r11

230

lea (%r11,%rax,4),%r11 /* r11 is pq */

231

232

/* extra = (class == 3); */

233

/* 0 1 2 3 */

234

sub $3,%r12 /* -3 -2 -1 0 */

235

sar $31,%r12 /* -1 -1 -1 0 */

236

inc %r12 /* 0 0 0 1 */

237

/* r12 is extra */

238

239

mov %rdi,%r14 /* r14 is block */

240

241

/* Since we already reordered the coefficients, it's easy:

242

Shift between OFS0 and OFS1 with the first pq value

243

between OFS1 and OFS2 with the second pq value etc.

244

Since we really want to divide, we have to compensate for

245

negative values.

246

247

The remaining thing is pipe-line optimization

248

which results in obfuscating MMX code...

249

250

251

# sarw %cl,ADDR(row,col)

252

253

#define OFS0 (1)

254

#define OFS1 (1+2+3)

255

#define OFS2 (1+2+3+4+5+6)

256

#define OFS3 (1+2+3+4+5+6+7+8+7)

257

258

/* 0 */

259

movzbq (%r11,1),%rcx

260

movq OFS0*2(%r14), %mm2

261

add %r12, %rcx

262

movq (OFS0+4)*2(%r14), %mm4

263

movd %rcx, %mm7

264

265

movq %mm2, %mm3

266

movq %mm4, %mm5

267

psraw $0x0f, %mm2

268

psraw $0x0f, %mm4

269

pxor %mm2, %mm3

270

pxor %mm4, %mm5

271

psubw %mm2, %mm3

272

psubw %mm4, %mm5

273

psraw %mm7, %mm3

274

psraw %mm7, %mm5

275

pxor %mm2, %mm3

276

pxor %mm4, %mm5

277

psubw %mm2, %mm3

278

psubw %mm4, %mm5

279

280

movq (OFS1*2)(%r14), %mm2

281

movq %mm3, OFS0*2(%r14)

282

movq %mm5, (OFS0+4)*2(%r14)

283

284

/* 1 */

285

movzbq 1(%r11,1),%rcx

286

movq (OFS1+4)*2(%r14), %mm4

287

add %r12, %rcx

288

movq %mm2, %mm3

289

movd %rcx, %mm7

290

movq %mm4, %mm5

291

psraw $0x0f, %mm2

292

psraw $0x0f, %mm4

293

pxor %mm2, %mm3

294

pxor %mm4, %mm5

295

psubw %mm2, %mm3

296

psubw %mm4, %mm5

297

psraw %mm7, %mm3

298

psraw %mm7, %mm5

299

pxor %mm2, %mm3

300

pxor %mm4, %mm5

301

psubw %mm2, %mm3

302

psubw %mm4, %mm5

303

304

movq %mm3, OFS1*2(%r14)

305

movq (OFS1+8)*2(%r14), %mm2

306

movq %mm5, (OFS1+4)*2(%r14)

307

movq (OFS1+12)*2(%r14), %mm4

308

movq %mm2, %mm3

309

movq %mm4, %mm5

310

psraw $0x0f, %mm2

311

psraw $0x0f, %mm4

312

pxor %mm2, %mm3

313

pxor %mm4, %mm5

314

psubw %mm2, %mm3

315

psubw %mm4, %mm5

316

psraw %mm7, %mm3

317

psraw %mm7, %mm5

318

pxor %mm2, %mm3

319

pxor %mm4, %mm5

320

psubw %mm2, %mm3

321

psubw %mm4, %mm5

322

323

movq OFS2*2(%r14), %mm0

324

movq %mm3, (OFS1+8)*2(%r14)

325

movq %mm5, (OFS1+12)*2(%r14)

326

327

/* 2 */

328

movzbq 2(%r11,1),%rcx

329

movq (OFS2+4)*2(%r14), %mm2

330

add %r12, %rcx

331

movq (OFS2+8)*2(%r14), %mm4

332

movd %rcx, %mm7

333

334

movq %mm0, %mm1

335

movq %mm2, %mm3

336

movq %mm4, %mm5

337

psraw $0x0f, %mm0

338

psraw $0x0f, %mm2

339

psraw $0x0f, %mm4

340

pxor %mm0, %mm1

341

pxor %mm2, %mm3

342

pxor %mm4, %mm5

343

psubw %mm0, %mm1

344

psubw %mm2, %mm3

345

psubw %mm4, %mm5

346

psraw %mm7, %mm1

347

psraw %mm7, %mm3

348

psraw %mm7, %mm5

349

pxor %mm0, %mm1

350

pxor %mm2, %mm3

351

pxor %mm4, %mm5

352

psubw %mm0, %mm1

353

psubw %mm2, %mm3

354

psubw %mm4, %mm5

355

356

movq %mm1, OFS2*2(%r14)

357

movq %mm3, (OFS2+4)*2(%r14)

358

movq %mm5, (OFS2+8)*2(%r14)

359

360

movq (OFS2+12)*2(%r14), %mm0

361

movq (OFS2+16)*2(%r14), %mm2

362

movq (OFS2+20)*2(%r14), %mm4

363

364

movq %mm0, %mm1

365

movq %mm2, %mm3

366

movq %mm4, %mm5

367

psraw $0x0f, %mm0

368

psraw $0x0f, %mm2

369

psraw $0x0f, %mm4

370

pxor %mm0, %mm1

371

pxor %mm2, %mm3

372

pxor %mm4, %mm5

373

psubw %mm0, %mm1

374

psubw %mm2, %mm3

375

psubw %mm4, %mm5

376

psraw %mm7, %mm1

377

psraw %mm7, %mm3

378

psraw %mm7, %mm5

379

pxor %mm0, %mm1

380

pxor %mm2, %mm3

381

pxor %mm4, %mm5

382

psubw %mm0, %mm1

383

psubw %mm2, %mm3

384

psubw %mm4, %mm5

385

386

movq OFS3*2(%r14), %mm0

387

388

movq %mm1, (OFS2+12)*2(%r14)

389

movq %mm3, (OFS2+16)*2(%r14)

390

movq %mm5, (OFS2+20)*2(%r14)

391

392

/* 3 */

393

movzbq 3(%r11,1),%rcx

394

movq (OFS3+4)*2(%r14), %mm2

395

add %r12, %rcx

396

movq (OFS3+8)*2(%r14), %mm4

397

movd %rcx, %mm7

398

399

movq %mm0, %mm1

400

movq %mm2, %mm3

401

movq %mm4, %mm5

402

psraw $0x0f, %mm0

403

psraw $0x0f, %mm2

404

psraw $0x0f, %mm4

405

pxor %mm0, %mm1

406

pxor %mm2, %mm3

407

pxor %mm4, %mm5

408

psubw %mm0, %mm1

409

psubw %mm2, %mm3

410

psubw %mm4, %mm5

411

psraw %mm7, %mm1

412

psraw %mm7, %mm3

413

psraw %mm7, %mm5

414

pxor %mm0, %mm1

415

pxor %mm2, %mm3

416

pxor %mm4, %mm5

417

psubw %mm0, %mm1

418

psubw %mm2, %mm3

419

psubw %mm4, %mm5

420

421

movq %mm1, OFS3*2(%r14)

422

movq %mm3, (OFS3+4)*2(%r14)

423

movq %mm5, (OFS3+8)*2(%r14)

424

425

movq (OFS3+12)*2(%r14), %mm2

426

movq (OFS3+16)*2(%r14), %mm4

427

428

movq %mm2, %mm3

429

movq %mm4, %mm5

430

psraw $0x0f, %mm2

431

psraw $0x0f, %mm4

432

pxor %mm2, %mm3

433

pxor %mm4, %mm5

434

psubw %mm2, %mm3

435

psubw %mm4, %mm5

436

psraw %mm7, %mm3

437

psraw %mm7, %mm5

438

pxor %mm2, %mm3

439

pxor %mm4, %mm5

440

psubw %mm2, %mm3

441

psubw %mm4, %mm5

442

443

movq %mm3, (OFS3+12)*2(%r14)

444

movq %mm5, (OFS3+16)*2(%r14)

445

446

movw (OFS3+20)*2(%r14), %ax

447

movw %ax, %bx

448

sarw $0xf, %bx

449

xorw %bx, %ax

450

subw %bx, %ax

451

sarw %cl, %ax

452

xorw %bx, %ax

453

subw %bx, %ax

454

movw %ax, (OFS3+20)*2(%r14)

455

456

pop %r14

457

pop %r11

458

pop %rbx

459

pop %r12

460

ret

461

462

463

464

465

Older »