~ubuntu-branches/ubuntu/lucid/gavl/lucid

Viewing changes to gavl/sse2/scale_y_sse2.c

Committer: Bazaar Package Importer
Author(s): Romain Beauxis
Date: 2008-11-07 13:47:46 UTC
mfrom: (1.1.2 upstream)
Revision ID: james.westby@ubuntu.com-20081107134746-s4s970fo1bcde9zw

Tags: 1.0.1-1

* Adopted package for debian, with the blessing of previous maintainer.
* Based new package on Christian Marillat's package for debian-multimedia.
* Removed support for ccache until I figure out how to make it work
with cdbs.
* Changed library package name since ABI is not backward compatible, but
upstream did not bump major soversion.

files added:
debian/control.in

debian/libgavl-1.0-0.install

debian/libgavl-1.0-0.symbols

debian/libgavl-doc.doc-base

debian/libgavl-doc.install

doc/Doxyfile.in

doc/Makefile.am

doc/Makefile.in

doc/mainpage.incl

gavl/3dnow

gavl/3dnow/Makefile.am

gavl/3dnow/Makefile.in

gavl/3dnow/deinterlace_blend_3dnow.c

gavl/c/deinterlace_blend_c.c

gavl/c/dsp_c.c

gavl/c/gray_gray_c.c

gavl/c/gray_rgb_c.c

gavl/c/gray_yuv_c.c

gavl/c/rgb_gray_c.c

gavl/c/rgb_rgb_c.c

gavl/c/rgb_yuv_c.c

gavl/c/scale_macros.h

gavl/c/yuv_gray_c.c

gavl/c/yuv_rgb_c.c

gavl/c/yuv_yuv_c.c

gavl/deinterlace_blend.c

gavl/deinterlace_scale.c

gavl/dsp.c

gavl/dsputils.c

gavl/hq/rgb_rgb_hq.c

gavl/hq/rgb_yuv_hq.c

gavl/hq/yuv_rgb_hq.c

gavl/hq/yuv_yuv_hq.c

gavl/mmx/deinterlace_blend_mmx.c

gavl/mmx/dsp_mmx.c

gavl/mmx/rgb_rgb_mmx.c

gavl/mmx/rgb_yuv_mmx.c

gavl/mmx/scale_x_mmx.c

gavl/mmx/scale_y.h

gavl/mmx/scale_y_linear.h

gavl/mmx/scale_y_mmx.c

gavl/mmx/yuv_rgb_mmx.c

gavl/mmx/yuv_yuv_mmx.c

gavl/mmxext/deinterlace_blend_mmxext.c

gavl/mmxext/dsp_mmxext.c

gavl/mmxext/rgb_rgb_mmxext.c

gavl/mmxext/rgb_yuv_mmxext.c

gavl/mmxext/scale_x_mmxext.c

gavl/mmxext/scale_y_mmxext.c

gavl/mmxext/yuv_rgb_mmxext.c

gavl/mmxext/yuv_yuv_mmxext.c

gavl/sse

gavl/sse/Makefile.am

gavl/sse/Makefile.in

gavl/sse/dsp_sse.c

gavl/sse/rgb_yuv_sse.c

gavl/sse/scale_x_sse.c

gavl/sse/scale_y.h

gavl/sse/scale_y_linear.h

gavl/sse/scale_y_sse.c

gavl/sse/sse.h

gavl/sse2

gavl/sse2/Makefile.am

gavl/sse2/Makefile.in

gavl/sse2/scale_y.h

gavl/sse2/scale_y_sse2.c

gavl/sse3

gavl/sse3/Makefile.am

gavl/sse3/Makefile.in

gavl/sse3/rgb_yuv_sse3.c

gavl/sse3/scale_x_sse3.c

include/bswap.h

include/dsp.h

include/gavl/gavl_version.h

include/gavl/gavl_version.h.in

include/gavl/gavldsp.h

include/macros.h

m4/gavl_float.m4

src/benchmark.c

src/convolvetest.c

src/deinterlace_time.c

src/deinterlacetest.c

src/pixelformat_penalty.c

src/pngutil.c

src/pngutil.h

src/timeutils.c

src/timeutils.h

src/volume_test.c

utils

utils/Makefile.am

utils/Makefile.in

utils/cpuinfo.c

files removed:
debian/dirs

debian/docs

debian/libgavl-dev.dirs

debian/libgavl.substvars

debian/libgavl0.dirs

debian/libgavl0.install

debian/menu

debian/patches

debian/patches/00list

debian/patches/fpic.dpatch

gavl/c/_rgb_rgb_c.c

gavl/c/_rgb_yuv_c.c

gavl/c/_yuv_rgb_c.c

gavl/c/_yuv_yuv_c.c

gavl/c/colorspace_c.c

gavl/hq/colorspace_hq.c

gavl/mmx/_rgb_rgb_mmx.c

gavl/mmx/_rgb_yuv_mmx.c

gavl/mmx/_yuv_rgb_mmx.c

gavl/mmx/_yuv_yuv_mmx.c

gavl/mmx/colorspace_mmx.c

gavl/mmx/scale_mmx.c

gavl/mmxext/colorspace_mmxext.c

gavl/mmxext/scale_mmxext.c

include/gavlconfig.h

include/gavlconfig.h.in

files modified:
COPYING

Makefile.am

Makefile.in

aclocal.m4

autogen.sh

config.guess

config.sub

configure

configure.ac

cpuinfo.sh

debian/changelog

debian/control

debian/copyright

debian/libgavl-dev.install

debian/rules

debian/watch

depcomp

gavl.pc.in

gavl/Makefile.am

gavl/Makefile.in

gavl/arith128.c

gavl/audioconverter.c

gavl/audioformat.c

gavl/audioframe.c

gavl/audiooptions.c

gavl/blend.c

gavl/c/Makefile.am

gavl/c/Makefile.in

gavl/c/_interleave_c.c

gavl/c/_mix_c.c

gavl/c/_sampleformat_c.c

gavl/c/blend_c.c

gavl/c/colorspace_macros.h

gavl/c/colorspace_tables.c

gavl/c/colorspace_tables.h

gavl/c/interleave_c.c

gavl/c/mix_c.c

gavl/c/sampleformat_c.c

gavl/c/scale_bicubic_c.c

gavl/c/scale_bicubic_noclip_c.c

gavl/c/scale_bilinear_c.c

gavl/c/scale_bilinear_x.h

gavl/c/scale_bilinear_xy.h

gavl/c/scale_bilinear_y.h

gavl/c/scale_generic_c.c

gavl/c/scale_generic_x.h

gavl/c/scale_generic_y.h

gavl/c/scale_nearest_c.c

gavl/c/scale_quadratic_c.c

gavl/c/scale_x.h

gavl/c/scale_y.h

gavl/c/volume_c.c

gavl/colorspace.c

gavl/cputest.c

gavl/csp_packed_packed.h

gavl/csp_packed_planar.h

gavl/csp_planar_packed.h

gavl/csp_planar_planar.h

gavl/deinterlace.c

gavl/deinterlace_copy.c

gavl/hq/Makefile.am

gavl/hq/Makefile.in

gavl/interleave.c

gavl/libgdither/Makefile.in

gavl/libgdither/gdither.c

gavl/libgdither/gdither.h

gavl/libgdither/gdither_types.h

gavl/libgdither/gdither_types_internal.h

gavl/libgdither/noise.h

gavl/libsamplerate/Makefile.in

gavl/libsamplerate/common.h

gavl/libsamplerate/samplerate.c

gavl/libsamplerate/src_linear.c

gavl/libsamplerate/src_sinc.c

gavl/libsamplerate/src_zoh.c

gavl/memcpy.c

gavl/mix.c

gavl/mmx/Makefile.am

gavl/mmx/Makefile.in

gavl/mmx/interpolate.h

gavl/mmx/mmx.h

gavl/mmx/mmx_macros.h

gavl/mmxext/Makefile.am

gavl/mmxext/Makefile.in

gavl/peakdetector.c

gavl/rectangle.c

gavl/sampleformat.c

gavl/samplerate.c

gavl/scale.c

gavl/scale_context.c

gavl/scale_kernels.c

gavl/scale_table.c

gavl/time.c

gavl/timer.c

gavl/video.c

gavl/videoformat.c

gavl/videoframe.c

gavl/videooptions.c

gavl/volume.c

include/Makefile.am

include/Makefile.in

include/accel.h

include/arith128.h

include/attributes.h

include/audio.h

include/blend.h

include/colorspace.h

include/deinterlace.h

include/gavl/Makefile.am

include/gavl/Makefile.in

include/gavl/config.h.in

include/gavl/gavl.h

include/gavl/gavltime.h

include/interleave.h

include/mix.h

include/sampleformat.h

include/samplerate.h

include/scale.h

include/video.h

include/volume.h

install-sh

ltmain.sh

m4/Makefile.am

m4/Makefile.in

m4/check_funcs.m4

m4/gavl_simd.m4

m4/lqt_opt_cflags.m4

missing

src/Makefile.am

src/Makefile.in

src/blend_test.c

src/colorspace_test.c

src/colorspace_time.c

src/fill_test.c

src/plot_scale_kernels.c

src/scale_time.c

src/scaletest.c

src/timescale_test.c

Show diffs side-by-side

added added

removed removed

gavl/sse2/scale_y_sse2.c

/*****************************************************************

* gavl - a general purpose audio/video processing library

* gmerlin-general@lists.sourceforge.net

* http://gmerlin.sourceforge.net

* This program is free software: you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation, either version 2 of the License, or

* (at your option) any later version.

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program. If not, see <http://www.gnu.org/licenses/>.

* *****************************************************************/

#include <config.h>

#include <attributes.h>

#include <stdio.h>

#include <gavl/gavl.h>

#include <video.h>

#include <scale.h>

#include "../mmx/mmx.h"

#include "../sse/sse.h"

static const sse_t factor_mask = { .uw = { 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } };

static const sse_t min_13 = { .uw = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } };

static const sse_t max_13 = { .uw = { 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF } };

#if 0

static mmx_t mm_tmp;

#define DUMP_MM(name, reg) MOVQ_R2M(reg, mm_tmp);\

fprintf(stderr, "%s: %016llx\n", name, mm_tmp.q);

#endif

#ifdef MMXEXT

#define MOVQ_R2M(reg,mem) movntq_r2m(reg, mem)

#else

#define MOVQ_R2M(reg,mem) movq_r2m(reg, mem)

#endif

* xmm0: Input1

* xmm1: Input2

* xmm2: Factor1

* xmm3: Factor1

* xmm4: Output1

* xmm5: Output2

* xmm6: Scratch

* xmm7: factor_mask

#define INIT_8_GLOBAL \

int32_t tmp; \

pxor_r2r(xmm6, xmm6);\

movaps_m2r(factor_mask, xmm7);

#define INIT_8 \

pxor_r2r(xmm3, xmm3);\

pxor_r2r(xmm4, xmm4);

#define INIT_16_GLOBAL \

int32_t tmp; \

pxor_r2r(xmm6, xmm6);\

movaps_m2r(factor_mask, xmm7);

#define INIT_16 \

pxor_r2r(xmm3, xmm3);\

pxor_r2r(xmm4, xmm4);

#define LOAD_FACTOR_8(num) \

/* Load factor */ \

movd_m2r(ctx->table_v.pixels[ctx->scanline].factor_i[num], xmm2);\

pand_r2r(xmm7, xmm2);\

pshuflw_r2ri(xmm2,xmm5,0x00);\

pshufd_r2ri(xmm5,xmm5,0x00);

#define LOAD_FACTOR_16(num) \

/* Load factor */ \

movd_m2r(ctx->table_v.pixels[ctx->scanline].factor_i[num], xmm2);\

pand_r2r(xmm7, xmm2);\

pshuflw_r2ri(xmm2,xmm5,0x00);\

pshufd_r2ri(xmm5,xmm5,0x00);

#define ACCUM_8(num) \

/* Load input */ \

movaps_m2r(*src,xmm0);\

movaps_r2r(xmm0,xmm1);\

punpcklbw_r2r(xmm6, xmm0); \

punpckhbw_r2r(xmm6, xmm1); \

100

psllw_i2r(7, xmm0);\

101

psllw_i2r(7, xmm1);\

102

LOAD_FACTOR_8(num); \

103

/* Accumulate xmm0 */ \

104

pmulhw_r2r(xmm5, xmm0);\

105

paddsw_r2r(xmm0, xmm3);\

106

/* Accumulate xmm1 */ \

107

pmulhw_r2r(xmm5, xmm1);\

108

paddsw_r2r(xmm1, xmm4)

109

110

#define ACCUM_16(num) \

111

/* Load input */ \

112

movaps_m2r(*src,xmm0);\

113

movaps_m2r(*(src+16),xmm1);\

114

psrlw_i2r(1, xmm0);\

115

psrlw_i2r(1, xmm1);\

116

LOAD_FACTOR_16(num); \

117

/* Accumulate xmm0 */ \

118

pmulhw_r2r(xmm5, xmm0);\

119

paddsw_r2r(xmm0, xmm3);\

120

/* Accumulate xmm1 */ \

121

pmulhw_r2r(xmm5, xmm1);\

122

paddsw_r2r(xmm1, xmm4)

123

124

#define OUTPUT_8 \

125

psraw_i2r(5, xmm3);\

126

psraw_i2r(5, xmm4);\

127

packuswb_r2r(xmm4, xmm3);\

128

movups_r2m(xmm3, *dst)

129

130

#define OUTPUT_16_NOCLIP \

131

psllw_i2r(3, xmm3);\

132

psllw_i2r(3, xmm4);\

133

movups_r2m(xmm3, *dst);\

134

movups_r2m(xmm4, *(dst+16));\

135

136

#define OUTPUT_16 \

137

pminsw_m2r(max_13, xmm3);\

138

pminsw_m2r(max_13, xmm4);\

139

pmaxsw_m2r(min_13, xmm3);\

140

pmaxsw_m2r(min_13, xmm4);\

141

psllw_i2r(3, xmm3);\

142

psllw_i2r(3, xmm4);\

143

movups_r2m(xmm3, *dst);\

144

movups_r2m(xmm4, *(dst+16));\

145

146

#define INIT_C_8 \

147

tmp = 0;

148

149

#define INIT_C_16 \

150

tmp = 0;

151

152

#define ACCUM_C_8(num) \

153

tmp += ctx->table_v.pixels[ctx->scanline].factor_i[num] * *src

154

155

#define ACCUM_C_16(num) \

156

tmp += ctx->table_v.pixels[ctx->scanline].factor_i[num] * *(uint16_t*)src

157

158

#define OUTPUT_C_8 \

159

tmp >>= 14; \

160

*dst = (uint8_t)((tmp & ~0xFF)?((-tmp) >> 31) : tmp);

161

162

#define OUTPUT_C_16 \

163

tmp >>= 14; \

164

*(uint16_t*)dst = (uint16_t)((tmp & ~0xFFFF)?((-tmp) >> 31) : tmp);

165

166

/* scale_uint8_x_1_y_bicubic_sse2 */

167

168

#define FUNC_NAME scale_uint8_x_1_y_bicubic_sse2

169

#define WIDTH_MUL 1

170

#define BYTES 1

171

#define NUM_TAPS 4

172

173

#define INIT_GLOBAL INIT_8_GLOBAL

174

#define INIT INIT_8

175

#define ACCUM ACCUM_8

176

#define OUTPUT OUTPUT_8

177

#define INIT_C INIT_C_8

178

#define ACCUM_C ACCUM_C_8

179

#define OUTPUT_C OUTPUT_C_8

180

181

#include "scale_y.h"

182

183

/* scale_uint8_x_2_y_bicubic_sse */

184

185

#define FUNC_NAME scale_uint8_x_2_y_bicubic_sse2

186

#define WIDTH_MUL 2

187

#define BYTES 1

188

#define NUM_TAPS 4

189

190

#define INIT_GLOBAL INIT_8_GLOBAL

191

#define INIT INIT_8

192

#define ACCUM ACCUM_8

193

#define OUTPUT OUTPUT_8

194

#define INIT_C INIT_C_8

195

#define ACCUM_C ACCUM_C_8

196

#define OUTPUT_C OUTPUT_C_8

197

198

#include "scale_y.h"

199

200

/* scale_uint8_x_3_y_bicubic_sse2 */

201

202

#define FUNC_NAME scale_uint8_x_3_y_bicubic_sse2

203

#define WIDTH_MUL 3

204

#define BYTES 1

205

#define NUM_TAPS 4

206

207

#define INIT_GLOBAL INIT_8_GLOBAL

208

#define INIT INIT_8

209

#define ACCUM ACCUM_8

210

#define OUTPUT OUTPUT_8

211

#define INIT_C INIT_C_8

212

#define ACCUM_C ACCUM_C_8

213

#define OUTPUT_C OUTPUT_C_8

214

215

#include "scale_y.h"

216

217

/* scale_uint8_x_4_y_bicubic_sse2 */

218

219

#define FUNC_NAME scale_uint8_x_4_y_bicubic_sse2

220

#define WIDTH_MUL 4

221

#define BYTES 1

222

#define NUM_TAPS 4

223

224

#define INIT_GLOBAL INIT_8_GLOBAL

225

#define INIT INIT_8

226

#define ACCUM ACCUM_8

227

#define OUTPUT OUTPUT_8

228

#define INIT_C INIT_C_8

229

#define ACCUM_C ACCUM_C_8

230

#define OUTPUT_C OUTPUT_C_8

231

232

#include "scale_y.h"

233

234

235

/* scale_uint8_x_1_y_quadratic_sse2 */

236

237

#define FUNC_NAME scale_uint8_x_1_y_quadratic_sse2

238

#define WIDTH_MUL 1

239

#define BYTES 1

240

#define NUM_TAPS 3

241

242

#define INIT_GLOBAL INIT_8_GLOBAL

243

#define INIT INIT_8

244

#define ACCUM ACCUM_8

245

#define OUTPUT OUTPUT_8

246

#define INIT_C INIT_C_8

247

#define ACCUM_C ACCUM_C_8

248

#define OUTPUT_C OUTPUT_C_8

249

250

#include "scale_y.h"

251

252

/* scale_uint8_x_2_y_quadratic_sse2 */

253

254

#define FUNC_NAME scale_uint8_x_2_y_quadratic_sse2

255

#define WIDTH_MUL 2

256

#define BYTES 1

257

#define NUM_TAPS 3

258

259

#define INIT_GLOBAL INIT_8_GLOBAL

260

#define INIT INIT_8

261

#define ACCUM ACCUM_8

262

#define OUTPUT OUTPUT_8

263

#define INIT_C INIT_C_8

264

#define ACCUM_C ACCUM_C_8

265

#define OUTPUT_C OUTPUT_C_8

266

267

#include "scale_y.h"

268

269

/* scale_uint8_x_3_y_quadratic_sse2 */

270

271

#define FUNC_NAME scale_uint8_x_3_y_quadratic_sse2

272

#define WIDTH_MUL 3

273

#define BYTES 1

274

#define NUM_TAPS 3

275

276

#define INIT_GLOBAL INIT_8_GLOBAL

277

#define INIT INIT_8

278

#define ACCUM ACCUM_8

279

#define OUTPUT OUTPUT_8

280

#define INIT_C INIT_C_8

281

#define ACCUM_C ACCUM_C_8

282

#define OUTPUT_C OUTPUT_C_8

283

284

#include "scale_y.h"

285

286

/* scale_uint8_x_4_y_quadratic_sse2 */

287

288

#define FUNC_NAME scale_uint8_x_4_y_quadratic_sse2

289

#define WIDTH_MUL 4

290

#define BYTES 1

291

#define NUM_TAPS 3

292

293

#define INIT_GLOBAL INIT_8_GLOBAL

294

#define INIT INIT_8

295

#define ACCUM ACCUM_8

296

#define OUTPUT OUTPUT_8

297

#define INIT_C INIT_C_8

298

#define ACCUM_C ACCUM_C_8

299

#define OUTPUT_C OUTPUT_C_8

300

301

#include "scale_y.h"

302

303

/* scale_uint8_x_1_y_generic_sse2 */

304

305

#define FUNC_NAME scale_uint8_x_1_y_generic_sse2

306

#define WIDTH_MUL 1

307

#define BYTES 1

308

#define NUM_TAPS -1

309

310

#define INIT_GLOBAL INIT_8_GLOBAL

311

#define INIT INIT_8

312

#define ACCUM ACCUM_8

313

#define OUTPUT OUTPUT_8

314

#define INIT_C INIT_C_8

315

#define ACCUM_C ACCUM_C_8

316

#define OUTPUT_C OUTPUT_C_8

317

318

#include "scale_y.h"

319

320

/* scale_uint8_x_2_y_generic_sse2 */

321

322

#define FUNC_NAME scale_uint8_x_2_y_generic_sse2

323

#define WIDTH_MUL 2

324

#define BYTES 1

325

#define NUM_TAPS -1

326

327

#define INIT_GLOBAL INIT_8_GLOBAL

328

#define INIT INIT_8

329

#define ACCUM ACCUM_8

330

#define OUTPUT OUTPUT_8

331

#define INIT_C INIT_C_8

332

#define ACCUM_C ACCUM_C_8

333

#define OUTPUT_C OUTPUT_C_8

334

335

#include "scale_y.h"

336

337

/* scale_uint8_x_4_y_generic_sse2 */

338

339

#define FUNC_NAME scale_uint8_x_4_y_generic_sse2

340

#define WIDTH_MUL 4

341

#define BYTES 1

342

#define NUM_TAPS -1

343

344

#define INIT_GLOBAL INIT_8_GLOBAL

345

#define INIT INIT_8

346

#define ACCUM ACCUM_8

347

#define OUTPUT OUTPUT_8

348

#define INIT_C INIT_C_8

349

#define ACCUM_C ACCUM_C_8

350

#define OUTPUT_C OUTPUT_C_8

351

352

#include "scale_y.h"

353

354

/* scale_uint8_x_3_y_generic_sse2 */

355

356

#define FUNC_NAME scale_uint8_x_3_y_generic_sse2

357

#define WIDTH_MUL 3

358

#define BYTES 1

359

#define NUM_TAPS -1

360

361

#define INIT_GLOBAL INIT_8_GLOBAL

362

#define INIT INIT_8

363

#define ACCUM ACCUM_8

364

#define OUTPUT OUTPUT_8

365

#define INIT_C INIT_C_8

366

#define ACCUM_C ACCUM_C_8

367

#define OUTPUT_C OUTPUT_C_8

368

369

#include "scale_y.h"

370

371

372

/* 16 bits */

373

374

/* scale_uint16_x_1_y_bicubic_sse2 */

375

376

#define FUNC_NAME scale_uint16_x_1_y_bicubic_sse2

377

#define WIDTH_MUL 1

378

#define BYTES 2

379

#define NUM_TAPS 4

380

381

#define INIT_GLOBAL INIT_16_GLOBAL

382

#define INIT INIT_16

383

#define ACCUM ACCUM_16

384

#define OUTPUT OUTPUT_16

385

#define INIT_C INIT_C_16

386

#define ACCUM_C ACCUM_C_16

387

#define OUTPUT_C OUTPUT_C_16

388

389

#include "scale_y.h"

390

391

/* scale_uint16_x_1_y_bicubic_noclip_sse2 */

392

393

#define FUNC_NAME scale_uint16_x_1_y_bicubic_noclip_sse2

394

#define WIDTH_MUL 1

395

#define BYTES 2

396

#define NUM_TAPS 4

397

398

#define INIT_GLOBAL INIT_16_GLOBAL

399

#define INIT INIT_16

400

#define ACCUM ACCUM_16

401

#define OUTPUT OUTPUT_16_NOCLIP

402

#define INIT_C INIT_C_16

403

#define ACCUM_C ACCUM_C_16

404

#define OUTPUT_C OUTPUT_C_16

405

406

#include "scale_y.h"

407

408

/* scale_uint16_x_2_y_bicubic_sse2 */

409

410

#define FUNC_NAME scale_uint16_x_2_y_bicubic_sse2

411

#define WIDTH_MUL 2

412

#define BYTES 2

413

#define NUM_TAPS 4

414

415

#define INIT_GLOBAL INIT_16_GLOBAL

416

#define INIT INIT_16

417

#define ACCUM ACCUM_16

418

#define OUTPUT OUTPUT_16

419

#define INIT_C INIT_C_16

420

#define ACCUM_C ACCUM_C_16

421

#define OUTPUT_C OUTPUT_C_16

422

423

#include "scale_y.h"

424

425

/* scale_uint16_x_2_y_bicubic_noclip_sse2 */

426

427

#define FUNC_NAME scale_uint16_x_2_y_bicubic_noclip_sse2

428

#define WIDTH_MUL 2

429

#define BYTES 2

430

#define NUM_TAPS 4

431

432

#define INIT_GLOBAL INIT_16_GLOBAL

433

#define INIT INIT_16

434

#define ACCUM ACCUM_16

435

#define OUTPUT OUTPUT_16_NOCLIP

436

#define INIT_C INIT_C_16

437

#define ACCUM_C ACCUM_C_16

438

#define OUTPUT_C OUTPUT_C_16

439

440

#include "scale_y.h"

441

442

/* scale_uint16_x_3_y_bicubic_sse2 */

443

444

#define FUNC_NAME scale_uint16_x_3_y_bicubic_sse2

445

#define WIDTH_MUL 3

446

#define BYTES 2

447

#define NUM_TAPS 4

448

449

#define INIT_GLOBAL INIT_16_GLOBAL

450

#define INIT INIT_16

451

#define ACCUM ACCUM_16

452

#define OUTPUT OUTPUT_16

453

#define INIT_C INIT_C_16

454

#define ACCUM_C ACCUM_C_16

455

#define OUTPUT_C OUTPUT_C_16

456

457

#include "scale_y.h"

458

459

/* scale_uint16_x_3_y_bicubic_noclip_sse2 */

460

461

#define FUNC_NAME scale_uint16_x_3_y_bicubic_noclip_sse2

462

#define WIDTH_MUL 3

463

#define BYTES 2

464

#define NUM_TAPS 4

465

466

#define INIT_GLOBAL INIT_16_GLOBAL

467

#define INIT INIT_16

468

#define ACCUM ACCUM_16

469

#define OUTPUT OUTPUT_16_NOCLIP

470

#define INIT_C INIT_C_16

471

#define ACCUM_C ACCUM_C_16

472

#define OUTPUT_C OUTPUT_C_16

473

474

#include "scale_y.h"

475

476

/* scale_uint16_x_4_y_bicubic_sse2 */

477

478

#define FUNC_NAME scale_uint16_x_4_y_bicubic_sse2

479

#define WIDTH_MUL 4

480

#define BYTES 2

481

#define NUM_TAPS 4

482

483

#define INIT_GLOBAL INIT_16_GLOBAL

484

#define INIT INIT_16

485

#define ACCUM ACCUM_16

486

#define OUTPUT OUTPUT_16

487

#define INIT_C INIT_C_16

488

#define ACCUM_C ACCUM_C_16

489

#define OUTPUT_C OUTPUT_C_16

490

491

#include "scale_y.h"

492

493

/* scale_uint16_x_4_y_bicubic_noclip_sse2 */

494

495

#define FUNC_NAME scale_uint16_x_4_y_bicubic_noclip_sse2

496

#define WIDTH_MUL 4

497

#define BYTES 2

498

#define NUM_TAPS 4

499

500

#define INIT_GLOBAL INIT_16_GLOBAL

501

#define INIT INIT_16

502

#define ACCUM ACCUM_16

503

#define OUTPUT OUTPUT_16_NOCLIP

504

#define INIT_C INIT_C_16

505

#define ACCUM_C ACCUM_C_16

506

#define OUTPUT_C OUTPUT_C_16

507

508

#include "scale_y.h"

509

510

/* */

511

512

/* scale_uint16_x_1_y_quadratic_sse2 */

513

514

#define FUNC_NAME scale_uint16_x_1_y_quadratic_sse2

515

#define WIDTH_MUL 1

516

#define BYTES 2

517

#define NUM_TAPS 3

518

519

#define INIT_GLOBAL INIT_16_GLOBAL

520

#define INIT INIT_16

521

#define ACCUM ACCUM_16

522

#define OUTPUT OUTPUT_16_NOCLIP

523

#define INIT_C INIT_C_16

524

#define ACCUM_C ACCUM_C_16

525

#define OUTPUT_C OUTPUT_C_16

526

527

#include "scale_y.h"

528

529

/* scale_uint16_x_2_y_quadratic_sse2 */

530

531

#define FUNC_NAME scale_uint16_x_2_y_quadratic_sse2

532

#define WIDTH_MUL 2

533

#define BYTES 2

534

#define NUM_TAPS 3

535

536

#define INIT_GLOBAL INIT_16_GLOBAL

537

#define INIT INIT_16

538

#define ACCUM ACCUM_16

539

#define OUTPUT OUTPUT_16_NOCLIP

540

#define INIT_C INIT_C_16

541

#define ACCUM_C ACCUM_C_16

542

#define OUTPUT_C OUTPUT_C_16

543

544

#include "scale_y.h"

545

546

/* scale_uint16_x_3_y_quadratic_sse2 */

547

548

#define FUNC_NAME scale_uint16_x_3_y_quadratic_sse2

549

#define WIDTH_MUL 3

550

#define BYTES 2

551

#define NUM_TAPS 3

552

553

#define INIT_GLOBAL INIT_16_GLOBAL

554

#define INIT INIT_16

555

#define ACCUM ACCUM_16

556

#define OUTPUT OUTPUT_16_NOCLIP

557

#define INIT_C INIT_C_16

558

#define ACCUM_C ACCUM_C_16

559

#define OUTPUT_C OUTPUT_C_16

560

561

#include "scale_y.h"

562

563

/* scale_uint16_x_4_y_quadratic_sse2 */

564

565

#define FUNC_NAME scale_uint16_x_4_y_quadratic_sse2

566

#define WIDTH_MUL 4

567

#define BYTES 2

568

#define NUM_TAPS 3

569

570

#define INIT_GLOBAL INIT_16_GLOBAL

571

#define INIT INIT_16

572

#define ACCUM ACCUM_16

573

#define OUTPUT OUTPUT_16_NOCLIP

574

#define INIT_C INIT_C_16

575

#define ACCUM_C ACCUM_C_16

576

#define OUTPUT_C OUTPUT_C_16

577

578

#include "scale_y.h"

579

580

581

/* scale_uint16_x_1_y_generic_sse2 */

582

583

#define FUNC_NAME scale_uint16_x_1_y_generic_sse2

584

#define WIDTH_MUL 1

585

#define BYTES 2

586

#define NUM_TAPS -1

587

588

#define INIT_GLOBAL INIT_16_GLOBAL

589

#define INIT INIT_16

590

#define ACCUM ACCUM_16

591

#define OUTPUT OUTPUT_16

592

#define INIT_C INIT_C_16

593

#define ACCUM_C ACCUM_C_16

594

#define OUTPUT_C OUTPUT_C_16

595

596

#include "scale_y.h"

597

598

/* scale_uint16_x_2_y_generic_sse2 */

599

600

#define FUNC_NAME scale_uint16_x_2_y_generic_sse2

601

#define WIDTH_MUL 2

602

#define BYTES 2

603

#define NUM_TAPS -1

604

605

#define INIT_GLOBAL INIT_16_GLOBAL

606

#define INIT INIT_16

607

#define ACCUM ACCUM_16

608

#define OUTPUT OUTPUT_16

609

#define INIT_C INIT_C_16

610

#define ACCUM_C ACCUM_C_16

611

#define OUTPUT_C OUTPUT_C_16

612

613

#include "scale_y.h"

614

615

/* scale_uint16_x_3_y_generic_sse2 */

616

617

#define FUNC_NAME scale_uint16_x_3_y_generic_sse2

618

#define WIDTH_MUL 3

619

#define BYTES 2

620

#define NUM_TAPS -1

621

622

#define INIT_GLOBAL INIT_16_GLOBAL

623

#define INIT INIT_16

624

#define ACCUM ACCUM_16

625

#define OUTPUT OUTPUT_16

626

#define INIT_C INIT_C_16

627

#define ACCUM_C ACCUM_C_16

628

#define OUTPUT_C OUTPUT_C_16

629

630

#include "scale_y.h"

631

632

/* scale_uint16_x_4_y_generic_sse2 */

633

634

#define FUNC_NAME scale_uint16_x_4_y_generic_sse2

635

#define WIDTH_MUL 4

636

#define BYTES 2

637

#define NUM_TAPS -1

638

639

#define INIT_GLOBAL INIT_16_GLOBAL

640

#define INIT INIT_16

641

#define ACCUM ACCUM_16

642

#define OUTPUT OUTPUT_16

643

#define INIT_C INIT_C_16

644

#define ACCUM_C ACCUM_C_16

645

#define OUTPUT_C OUTPUT_C_16

646

647

#include "scale_y.h"

648

649

650

651

652

653

654

void gavl_init_scale_funcs_quadratic_y_sse2(gavl_scale_funcs_t * tab,

655

int src_advance, int dst_advance)

656

{

657

if((src_advance == 1) && (dst_advance == 1))

658

{

659

tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_quadratic_sse2;

660

tab->funcs_y.bits_uint8_noadvance = 14;

661

}

662

else if((src_advance == 3) && (dst_advance == 3))

663

{

664

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_quadratic_sse2;

665

tab->funcs_y.bits_uint8_noadvance = 14;

666

}

667

else if((src_advance == 4) && (dst_advance == 4))

668

{

669

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_quadratic_sse2;

670

tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_quadratic_sse2;

671

tab->funcs_y.bits_uint8_noadvance = 14;

672

}

673

else if((src_advance == 2) && (dst_advance == 2))

674

{

675

tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_quadratic_sse2;

676

tab->funcs_y.bits_uint8_noadvance = 14;

677

}

678

tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_quadratic_sse2;

679

tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_quadratic_sse2;

680

tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_quadratic_sse2;

681

tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_quadratic_sse2;

682

tab->funcs_y.bits_uint16 = 14;

683

}

684

685

void gavl_init_scale_funcs_bicubic_y_sse2(gavl_scale_funcs_t * tab,

686

int src_advance, int dst_advance)

687

{

688

if((src_advance == 1) && (dst_advance == 1))

689

{

690

tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bicubic_sse2;

691

tab->funcs_y.bits_uint8_noadvance = 14;

692

}

693

else if((src_advance == 3) && (dst_advance == 3))

694

{

695

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bicubic_sse2;

696

tab->funcs_y.bits_uint8_noadvance = 14;

697

}

698

else if((src_advance == 4) && (dst_advance == 4))

699

{

700

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bicubic_sse2;

701

tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bicubic_sse2;

702

tab->funcs_y.bits_uint8_noadvance = 14;

703

}

704

else if((src_advance == 2) && (dst_advance == 2))

705

{

706

tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bicubic_sse2;

707

tab->funcs_y.bits_uint8_noadvance = 14;

708

}

709

710

tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_bicubic_sse2;

711

tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_bicubic_sse2;

712

tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_bicubic_sse2;

713

tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_bicubic_sse2;

714

tab->funcs_y.bits_uint16 = 14;

715

}

716

717

void gavl_init_scale_funcs_bicubic_y_noclip_sse2(gavl_scale_funcs_t * tab,

718

int src_advance, int dst_advance)

719

{

720

if((src_advance == 1) && (dst_advance == 1))

721

{

722

tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bicubic_sse2;

723

tab->funcs_y.bits_uint8_noadvance = 14;

724

}

725

else if((src_advance == 3) && (dst_advance == 3))

726

{

727

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bicubic_sse2;

728

tab->funcs_y.bits_uint8_noadvance = 14;

729

}

730

else if((src_advance == 4) && (dst_advance == 4))

731

{

732

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bicubic_sse2;

733

tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bicubic_sse2;

734

tab->funcs_y.bits_uint8_noadvance = 14;

735

}

736

else if((src_advance == 2) && (dst_advance == 2))

737

{

738

tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bicubic_sse2;

739

tab->funcs_y.bits_uint8_noadvance = 14;

740

}

741

742

tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_bicubic_noclip_sse2;

743

tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_bicubic_noclip_sse2;

744

tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_bicubic_noclip_sse2;

745

tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_bicubic_noclip_sse2;

746

tab->funcs_y.bits_uint16 = 14;

747

}

748

749

#ifdef MMXEXT

750

void gavl_init_scale_funcs_generic_y_mmxext(gavl_scale_funcs_t * tab,

751

int src_advance, int dst_advance)

752

#else

753

void gavl_init_scale_funcs_generic_y_sse2(gavl_scale_funcs_t * tab,

754

int src_advance, int dst_advance)

755

#endif

756

{

757

if((src_advance == 1) && (dst_advance == 1))

758

{

759

tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_generic_sse2;

760

tab->funcs_y.bits_uint8_noadvance = 14;

761

}

762

else if((src_advance == 3) && (dst_advance == 3))

763

{

764

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_generic_sse2;

765

tab->funcs_y.bits_uint8_noadvance = 14;

766

}

767

else if((src_advance == 2) && (dst_advance == 2))

768

{

769

tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_generic_sse2;

770

tab->funcs_y.bits_uint8_noadvance = 14;

771

}

772

else if((src_advance == 4) && (dst_advance == 4))

773

{

774

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_generic_sse2;

775

tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_generic_sse2;

776

tab->funcs_y.bits_uint8_noadvance = 14;

777

}

778

tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_generic_sse2;

779

tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_generic_sse2;

780

tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_generic_sse2;

781

tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_generic_sse2;

782

tab->funcs_y.bits_uint16 = 14;

783

784

}

785

786

#if 0

787

788

/* scale_uint8_x_1_y_bilinear_sse2 */

789

790

#define FUNC_NAME scale_uint8_x_1_y_bilinear_sse2

791

#define WIDTH_MUL 1

792

#define BYTES 1

793

#define NUM_TAPS -1

794

795

#include "scale_y_linear_8.h"

796

797

/* scale_uint8_x_2_y_bilinear_sse2 */

798

799

#define FUNC_NAME scale_uint8_x_2_y_bilinear_sse2

800

#define WIDTH_MUL 2

801

#define BYTES 1

802

#define NUM_TAPS -1

803

804

#include "scale_y_linear_8.h"

805

806

/* scale_uint8_x_4_y_bilinear_sse2 */

807

808

#define FUNC_NAME scale_uint8_x_4_y_bilinear_sse2

809

#define WIDTH_MUL 4

810

#define BYTES 1

811

#define NUM_TAPS -1

812

813

#include "scale_y_linear_8.h"

814

815

/* scale_uint8_x_3_y_bilinear_sse2 */

816

817

#define FUNC_NAME scale_uint8_x_3_y_bilinear_sse2

818

#define WIDTH_MUL 3

819

#define BYTES 1

820

#define NUM_TAPS -1

821

822

#include "scale_y_linear_8.h"

823

824

#endif

825

826

void gavl_init_scale_funcs_bilinear_y_sse2(gavl_scale_funcs_t * tab,

827

int src_advance, int dst_advance)

828

{

829

#if 0 // Too slow

830

if((src_advance == 1) && (dst_advance == 1))

831

{

832

tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bilinear_sse2;

833

tab->funcs_y.bits_uint8_noadvance = 14;

834

}

835

else if((src_advance == 3) && (dst_advance == 3))

836

{

837

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bilinear_sse2;

838

tab->funcs_y.bits_uint8_noadvance = 14;

839

}

840

else if((src_advance == 2) && (dst_advance == 2))

841

{

842

tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bilinear_sse2;

843

tab->funcs_y.bits_uint8_noadvance = 14;

844

}

845

else if((src_advance == 4) && (dst_advance == 4))

846

{

847

tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bilinear_sse2;

848

tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bilinear_sse2;

849

tab->funcs_y.bits_uint8_noadvance = 14;

850

}

851

#endif

852

}

Older »