~linaro-toolchain-dev/cortex-strings/trunk : revision 67

1

/*

2

3

4

*

5

* Redistribution and use in source and binary forms, with or without

6

* modification, are permitted provided that the following conditions

7

* are met:

8

* 1. Redistributions of source code must retain the above copyright

9

* notice, this list of conditions and the following disclaimer.

10

* 2. Redistributions in binary form must reproduce the above copyright

11

* notice, this list of conditions and the following disclaimer in the

12

* documentation and/or other materials provided with the distribution.

13

* 3. The name of the company may not be used to endorse or promote

14

* products derived from this software without specific prior written

15

* permission.

16

*

17

* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED

18

* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF

19

* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

20

* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21

* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED

22

* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

23

* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

24

* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

25

* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

26

* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27

*/

28

29

#include "arm_asm.h"

30

31

#ifdef __ARMEB__

32

#define S2LOMEM lsl

33

#define S2LOMEMEQ lsleq

34

#define S2HIMEM lsr

35

#define MSB 0x000000ff

36

#define LSB 0xff000000

37

#define BYTE0_OFFSET 24

38

#define BYTE1_OFFSET 16

39

#define BYTE2_OFFSET 8

40

#define BYTE3_OFFSET 0

41

#else /* not __ARMEB__ */

42

#define S2LOMEM lsr

43

#define S2LOMEMEQ lsreq

44

#define S2HIMEM lsl

45

#define BYTE0_OFFSET 0

46

#define BYTE1_OFFSET 8

47

#define BYTE2_OFFSET 16

48

#define BYTE3_OFFSET 24

49

#define MSB 0xff000000

50

#define LSB 0x000000ff

51

#endif /* not __ARMEB__ */

52

53

.syntax unified

54

55

#if defined (__thumb__)

56

.thumb

57

.thumb_func

58

#endif

59

.global strcmp

60

.type strcmp, %function

61

strcmp:

62

63

#if (defined (__thumb__) && !defined (__thumb2__))

64

1:

65

ldrb r2, [r0]

66

ldrb r3, [r1]

67

adds r0, r0, #1

68

adds r1, r1, #1

69

cmp r2, #0

70

beq 2f

71

cmp r2, r3

72

beq 1b

73

2:

74

subs r0, r2, r3

75

bx lr

76

#elif (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))

77

1:

78

ldrb r2, [r0], #1

79

ldrb r3, [r1], #1

80

cmp r2, #1

81

it cs

82

cmpcs r2, r3

83

beq 1b

84

subs r0, r2, r3

85

RETURN

86

87

88

#elif (defined (_ISA_THUMB_2) || defined (_ISA_ARM_6))

89

/* Use LDRD whenever possible. */

90

91

/* The main thing to look out for when comparing large blocks is that

92

the loads do not cross a page boundary when loading past the index

93

of the byte with the first difference or the first string-terminator.

94

95

For example, if the strings are identical and the string-terminator

96

is at index k, byte by byte comparison will not load beyond address

97

s1+k and s2+k; word by word comparison may load up to 3 bytes beyond

98

k; double word - up to 7 bytes. If the load of these bytes crosses

99

a page boundary, it might cause a memory fault (if the page is not mapped)

100

that would not have happened in byte by byte comparison.

101

102

If an address is (double) word aligned, then a load of a (double) word

103

from that address will not cross a page boundary.

104

Therefore, the algorithm below considers word and double-word alignment

105

of strings separately. */

106

107

/* High-level description of the algorithm.

108

109

* The fast path: if both strings are double-word aligned,

110

use LDRD to load two words from each string in every loop iteration.

111

* If the strings have the same offset from a word boundary,

112

use LDRB to load and compare byte by byte until

113

the first string is aligned to a word boundary (at most 3 bytes).

114

This is optimized for quick return on short unaligned strings.

115

* If the strings have the same offset from a double-word boundary,

116

use LDRD to load two words from each string in every loop iteration, as in the fast path.

117

* If the strings do not have the same offset from a double-word boundary,

118

load a word from the second string before the loop to initialize the queue.

119

Use LDRD to load two words from every string in every loop iteration.

120

Inside the loop, load the second word from the second string only after comparing

121

the first word, using the queued value, to guarantee safety across page boundaries.

122

* If the strings do not have the same offset from a word boundary,

123

use LDR and a shift queue. Order of loads and comparisons matters,

124

similarly to the previous case.

125

126

* Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.

127

* The only difference between ARM and Thumb modes is the use of CBZ instruction.

128

* The only difference between big and little endian is the use of REV in little endian

129

to compute the return value, instead of MOV.

130

* No preload. [TODO.]

131

*/

132

133

.macro m_cbz reg label

134

#ifdef __thumb2__

135

cbz \reg, \label

136

#else /* not defined __thumb2__ */

137

cmp \reg, #0

138

beq \label

139

#endif /* not defined __thumb2__ */

140

.endm /* m_cbz */

141

142

.macro m_cbnz reg label

143

#ifdef __thumb2__

144

cbnz \reg, \label

145

#else /* not defined __thumb2__ */

146

cmp \reg, #0

147

bne \label

148

#endif /* not defined __thumb2__ */

149

.endm /* m_cbnz */

150

151

.macro init

152

/* Macro to save temporary registers and prepare magic values. */

153

subs sp, sp, #16

154

strd r4, r5, [sp, #8]

155

strd r6, r7, [sp]

156

mvn r6, #0 /* all F */

157

mov r7, #0 /* all 0 */

158

.endm /* init */

159

160

.macro magic_compare_and_branch w1 w2 label

161

/* Macro to compare registers w1 and w2 and conditionally branch to label. */

162

cmp \w1, \w2 /* Are w1 and w2 the same? */

163

magic_find_zero_bytes \w1

164

it eq

165

cmpeq ip, #0 /* Is there a zero byte in w1? */

166

bne \label

167

.endm /* magic_compare_and_branch */

168

169

.macro magic_find_zero_bytes w1

170

/* Macro to find all-zero bytes in w1, result is in ip. */

171

#if (defined (__ARM_FEATURE_DSP))

172

uadd8 ip, \w1, r6

173

sel ip, r7, r6

174

#else /* not defined (__ARM_FEATURE_DSP) */

175

/* __ARM_FEATURE_DSP is not defined for some Cortex-M processors.

176

Coincidently, these processors only have Thumb-2 mode, where we can use the

177

the (large) magic constant available directly as an immediate in instructions.

178

Note that we cannot use the magic constant in ARM mode, where we need

179

to create the constant in a register. */

180

sub ip, \w1, #0x01010101

181

bic ip, ip, \w1

182

and ip, ip, #0x80808080

183

#endif /* not defined (__ARM_FEATURE_DSP) */

184

.endm /* magic_find_zero_bytes */

185

186

.macro setup_return w1 w2

187

#ifdef __ARMEB__

188

mov r1, \w1

189

mov r2, \w2

190

#else /* not __ARMEB__ */

191

rev r1, \w1

192

rev r2, \w2

193

#endif /* not __ARMEB__ */

194

.endm /* setup_return */

195

196

/*

197

optpld r0, #0

198

optpld r1, #0

199

*/

200

201

/* Are both strings double-word aligned? */

202

orr ip, r0, r1

203

tst ip, #7

204

bne do_align

205

206

/* Fast path. */

207

init

208

209

doubleword_aligned:

210

211

/* Get here when the strings to compare are double-word aligned. */

212

/* Compare two words in every iteration. */

213

.p2align 2

214

2:

215

/*

216

optpld r0, #16

217

optpld r1, #16

218

*/

219

220

/* Load the next double-word from each string. */

221

ldrd r2, r3, [r0], #8

222

ldrd r4, r5, [r1], #8

223

224

magic_compare_and_branch w1=r2, w2=r4, label=return_24

225

magic_compare_and_branch w1=r3, w2=r5, label=return_35

226

b 2b

227

228

do_align:

229

/* Is the first string word-aligned? */

230

ands ip, r0, #3

231

beq word_aligned_r0

232

233

/* Fast compare byte by byte until the first string is word-aligned. */

234

/* The offset of r0 from a word boundary is in ip. Thus, the number of bytes

235

to read until the next word boudnary is 4-ip. */

236

bic r0, r0, #3

237

ldr r2, [r0], #4

238

lsls ip, ip, #31

239

beq byte2

240

bcs byte3

241

242

byte1:

243

ldrb ip, [r1], #1

244

uxtb r3, r2, ror #BYTE1_OFFSET

245

subs ip, r3, ip

246

bne fast_return

247

m_cbz reg=r3, label=fast_return

248

249

byte2:

250

ldrb ip, [r1], #1

251

uxtb r3, r2, ror #BYTE2_OFFSET

252

subs ip, r3, ip

253

bne fast_return

254

m_cbz reg=r3, label=fast_return

255

256

byte3:

257

ldrb ip, [r1], #1

258

uxtb r3, r2, ror #BYTE3_OFFSET

259

subs ip, r3, ip

260

bne fast_return

261

m_cbnz reg=r3, label=word_aligned_r0

262

263

fast_return:

264

mov r0, ip

265

bx lr

266

267

word_aligned_r0:

268

init

269

/* The first string is word-aligned. */

270

/* Is the second string word-aligned? */

271

ands ip, r1, #3

272

bne strcmp_unaligned

273

274

word_aligned:

275

/* The strings are word-aligned. */

276

/* Is the first string double-word aligned? */

277

tst r0, #4

278

beq doubleword_aligned_r0

279

280

/* If r0 is not double-word aligned yet, align it by loading

281

and comparing the next word from each string. */

282

ldr r2, [r0], #4

283

ldr r4, [r1], #4

284

magic_compare_and_branch w1=r2 w2=r4 label=return_24

285

286

doubleword_aligned_r0:

287

/* Get here when r0 is double-word aligned. */

288

/* Is r1 doubleword_aligned? */

289

tst r1, #4

290

beq doubleword_aligned

291

292

/* Get here when the strings to compare are word-aligned,

293

r0 is double-word aligned, but r1 is not double-word aligned. */

294

295

/* Initialize the queue. */

296

ldr r5, [r1], #4

297

298

/* Compare two words in every iteration. */

299

.p2align 2

300

3:

301

/*

302

optpld r0, #16

303

optpld r1, #16

304

*/

305

306

/* Load the next double-word from each string and compare. */

307

ldrd r2, r3, [r0], #8

308

magic_compare_and_branch w1=r2 w2=r5 label=return_25

309

ldrd r4, r5, [r1], #8

310

magic_compare_and_branch w1=r3 w2=r4 label=return_34

311

b 3b

312

313

.macro miscmp_word offsetlo offsethi

314

/* Macro to compare misaligned strings. */

315

/* r0, r1 are word-aligned, and at least one of the strings

316

is not double-word aligned. */

317

/* Compare one word in every loop iteration. */

318

/* OFFSETLO is the original bit-offset of r1 from a word-boundary,

319

OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */

320

321

/* Initialize the shift queue. */

322

ldr r5, [r1], #4

323

324

/* Compare one word from each string in every loop iteration. */

325

.p2align 2

326

7:

327

ldr r3, [r0], #4

328

S2LOMEM r5, r5, #\offsetlo

329

magic_find_zero_bytes w1=r3

330

cmp r7, ip, S2HIMEM #\offsetlo

331

and r2, r3, r6, S2LOMEM #\offsetlo

332

it eq

333

cmpeq r2, r5

334

bne return_25

335

ldr r5, [r1], #4

336

cmp ip, #0

337

eor r3, r2, r3

338

S2HIMEM r2, r5, #\offsethi

339

it eq

340

cmpeq r3, r2

341

bne return_32

342

b 7b

343

.endm /* miscmp_word */

344

345

strcmp_unaligned:

346

/* r0 is word-aligned, r1 is at offset ip from a word. */

347

/* Align r1 to the (previous) word-boundary. */

348

bic r1, r1, #3

349

350

/* Unaligned comparison word by word using LDRs. */

351

cmp ip, #2

352

beq miscmp_word_16 /* If ip == 2. */

353

bge miscmp_word_24 /* If ip == 3. */

354

miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */

355

miscmp_word_16: miscmp_word offsetlo=16 offsethi=16

356

miscmp_word_24: miscmp_word offsetlo=24 offsethi=8

357

358

359

return_32:

360

setup_return w1=r3, w2=r2

361

b do_return

362

return_34:

363

setup_return w1=r3, w2=r4

364

b do_return

365

return_25:

366

setup_return w1=r2, w2=r5

367

b do_return

368

return_35:

369

setup_return w1=r3, w2=r5

370

b do_return

371

return_24:

372

setup_return w1=r2, w2=r4

373

374

do_return:

375

376

#ifdef __ARMEB__

377

mov r0, ip

378

#else /* not __ARMEB__ */

379

rev r0, ip

380

#endif /* not __ARMEB__ */

381

382

/* Restore temporaries early, before computing the return value. */

383

ldrd r6, r7, [sp]

384

ldrd r4, r5, [sp, #8]

385

adds sp, sp, #16

386

387

/* There is a zero or a different byte between r1 and r2. */

388

/* r0 contains a mask of all-zero bytes in r1. */

389

/* Using r0 and not ip here because cbz requires low register. */

390

m_cbz reg=r0, label=compute_return_value

391

clz r0, r0

392

/* r0 contains the number of bits on the left of the first all-zero byte in r1. */

393

rsb r0, r0, #24

394

/* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */

395

lsr r1, r1, r0

396

lsr r2, r2, r0

397

398

compute_return_value:

399

subs r0, r1, r2

400

bx lr

401

402

403

#else /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6)

404

defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) ||

405

(defined (__thumb__) && !defined (__thumb2__))) */

406

407

/* Use LDR whenever possible. */

408

409

#ifdef __thumb2__

410

#define magic1(REG) 0x01010101

411

#define magic2(REG) 0x80808080

412

#else

413

#define magic1(REG) REG

414

#define magic2(REG) REG, lsl #7

415

#endif

416

417

optpld r0

418

optpld r1

419

eor r2, r0, r1

420

tst r2, #3

421

/* Strings not at same byte offset from a word boundary. */

422

bne strcmp_unaligned

423

ands r2, r0, #3

424

bic r0, r0, #3

425

bic r1, r1, #3

426

ldr ip, [r0], #4

427

it eq

428

ldreq r3, [r1], #4

429

beq 1f

430

/* Although s1 and s2 have identical initial alignment, they are

431

not currently word aligned. Rather than comparing bytes,

432

make sure that any bytes fetched from before the addressed

433

bytes are forced to 0xff. Then they will always compare

434

equal. */

435

eor r2, r2, #3

436

lsl r2, r2, #3

437

mvn r3, MSB

438

S2LOMEM r2, r3, r2

439

ldr r3, [r1], #4

440

orr ip, ip, r2

441

orr r3, r3, r2

442

1:

443

#ifndef __thumb2__

444

/* Load the 'magic' constant 0x01010101. */

445

str r4, [sp, #-4]!

446

mov r4, #1

447

orr r4, r4, r4, lsl #8

448

orr r4, r4, r4, lsl #16

449

#endif

450

.p2align 2

451

4:

452

optpld r0, #8

453

optpld r1, #8

454

sub r2, ip, magic1(r4)

455

cmp ip, r3

456

itttt eq

457

/* check for any zero bytes in first word */

458

biceq r2, r2, ip

459

tsteq r2, magic2(r4)

460

ldreq ip, [r0], #4

461

ldreq r3, [r1], #4

462

beq 4b

463

2:

464

/* There's a zero or a different byte in the word */

465

S2HIMEM r0, ip, #24

466

S2LOMEM ip, ip, #8

467

cmp r0, #1

468

it cs

469

cmpcs r0, r3, S2HIMEM #24

470

it eq

471

S2LOMEMEQ r3, r3, #8

472

beq 2b

473

/* On a big-endian machine, r0 contains the desired byte in bits

474

0-7; on a little-endian machine they are in bits 24-31. In

475

both cases the other bits in r0 are all zero. For r3 the

476

interesting byte is at the other end of the word, but the

477

other bits are not necessarily zero. We need a signed result

478

representing the differnece in the unsigned bytes, so for the

479

little-endian case we can't just shift the interesting bits

480

up. */

481

#ifdef __ARMEB__

482

sub r0, r0, r3, lsr #24

483

#else

484

and r3, r3, #255

485

#ifdef __thumb2__

486

/* No RSB instruction in Thumb2 */

487

lsr r0, r0, #24

488

sub r0, r0, r3

489

#else

490

rsb r0, r3, r0, lsr #24

491

#endif

492

#endif

493

#ifndef __thumb2__

494

ldr r4, [sp], #4

495

#endif

496

RETURN

497

498

499

strcmp_unaligned:

500

501

#if 0

502

/* The assembly code below is based on the following alogrithm. */

503

#ifdef __ARMEB__

504

#define RSHIFT <<

505

#define LSHIFT >>

506

#else

507

#define RSHIFT >>

508

#define LSHIFT <<

509

#endif

510

511

#define body(shift) \

512

mask = 0xffffffffU RSHIFT shift; \

513

w1 = *wp1++; \

514

w2 = *wp2++; \

515

do \

516

{ \

517

t1 = w1 & mask; \

518

if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \

519

{ \

520

w2 RSHIFT= shift; \

521

break; \

522

} \

523

if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \

524

{ \

525

/* See comment in assembler below re syndrome on big-endian */\

526

if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \

527

w2 RSHIFT= shift; \

528

else \

529

{ \

530

w2 = *wp2; \

531

t1 = w1 RSHIFT (32 - shift); \

532

w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \

533

} \

534

break; \

535

} \

536

w2 = *wp2++; \

537

t1 ^= w1; \

538

if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \

539

{ \

540

t1 = w1 >> (32 - shift); \

541

w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \

542

break; \

543

} \

544

w1 = *wp1++; \

545

} while (1)

546

547

const unsigned* wp1;

548

const unsigned* wp2;

549

unsigned w1, w2;

550

unsigned mask;

551

unsigned shift;

552

unsigned b1 = 0x01010101;

553

char c1, c2;

554

unsigned t1;

555

556

while (((unsigned) s1) & 3)

557

{

558

c1 = *s1++;

559

c2 = *s2++;

560

if (c1 == 0 || c1 != c2)

561

return c1 - (int)c2;

562

}

563

wp1 = (unsigned*) (((unsigned)s1) & ~3);

564

wp2 = (unsigned*) (((unsigned)s2) & ~3);

565

t1 = ((unsigned) s2) & 3;

566

if (t1 == 1)

567

{

568

body(8);

569

}

570

else if (t1 == 2)

571

{

572

body(16);

573

}

574

else

575

{

576

body (24);

577

}

578

579

do

580

{

581

#ifdef __ARMEB__

582

c1 = (char) t1 >> 24;

583

c2 = (char) w2 >> 24;

584

#else /* not __ARMEB__ */

585

c1 = (char) t1;

586

c2 = (char) w2;

587

#endif /* not __ARMEB__ */

588

t1 RSHIFT= 8;

589

w2 RSHIFT= 8;

590

} while (c1 != 0 && c1 == c2);

591

return c1 - c2;

592

#endif /* 0 */

593

594

595

wp1 .req r0

596

wp2 .req r1

597

b1 .req r2

598

w1 .req r4

599

w2 .req r5

600

t1 .req ip

601

@ r3 is scratch

602

603

/* First of all, compare bytes until wp1(sp1) is word-aligned. */

604

1:

605

tst wp1, #3

606

beq 2f

607

ldrb r2, [wp1], #1

608

ldrb r3, [wp2], #1

609

cmp r2, #1

610

it cs

611

cmpcs r2, r3

612

beq 1b

613

sub r0, r2, r3

614

RETURN

615

616

2:

617

str r5, [sp, #-4]!

618

str r4, [sp, #-4]!

619

//stmfd sp!, {r4, r5}

620

mov b1, #1

621

orr b1, b1, b1, lsl #8

622

orr b1, b1, b1, lsl #16

623

624

and t1, wp2, #3

625

bic wp2, wp2, #3

626

ldr w1, [wp1], #4

627

ldr w2, [wp2], #4

628

cmp t1, #2

629

beq 2f

630

bhi 3f

631

632

/* Critical inner Loop: Block with 3 bytes initial overlap */

633

.p2align 2

634

1:

635

bic t1, w1, MSB

636

cmp t1, w2, S2LOMEM #8

637

sub r3, w1, b1

638

bic r3, r3, w1

639

bne 4f

640

ands r3, r3, b1, lsl #7

641

it eq

642

ldreq w2, [wp2], #4

643

bne 5f

644

eor t1, t1, w1

645

cmp t1, w2, S2HIMEM #24

646

bne 6f

647

ldr w1, [wp1], #4

648

b 1b

649

4:

650

S2LOMEM w2, w2, #8

651

b 8f

652

653

5:

654

#ifdef __ARMEB__

655

/* The syndrome value may contain false ones if the string ends

656

with the bytes 0x01 0x00 */

657

tst w1, #0xff000000

658

itt ne

659

tstne w1, #0x00ff0000

660

tstne w1, #0x0000ff00

661

beq 7f

662

#else

663

bics r3, r3, #0xff000000

664

bne 7f

665

#endif

666

ldrb w2, [wp2]

667

S2LOMEM t1, w1, #24

668

#ifdef __ARMEB__

669

lsl w2, w2, #24

670

#endif

671

b 8f

672

673

6:

674

S2LOMEM t1, w1, #24

675

and w2, w2, LSB

676

b 8f

677

678

/* Critical inner Loop: Block with 2 bytes initial overlap */

679

.p2align 2

680

2:

681

S2HIMEM t1, w1, #16

682

sub r3, w1, b1

683

S2LOMEM t1, t1, #16

684

bic r3, r3, w1

685

cmp t1, w2, S2LOMEM #16

686

bne 4f

687

ands r3, r3, b1, lsl #7

688

it eq

689

ldreq w2, [wp2], #4

690

bne 5f

691

eor t1, t1, w1

692

cmp t1, w2, S2HIMEM #16

693

bne 6f

694

ldr w1, [wp1], #4

695

b 2b

696

697

5:

698

#ifdef __ARMEB__

699

/* The syndrome value may contain false ones if the string ends

700

with the bytes 0x01 0x00 */

701

tst w1, #0xff000000

702

it ne

703

tstne w1, #0x00ff0000

704

beq 7f

705

#else

706

lsls r3, r3, #16

707

bne 7f

708

#endif

709

ldrh w2, [wp2]

710

S2LOMEM t1, w1, #16

711

#ifdef __ARMEB__

712

lsl w2, w2, #16

713

#endif

714

b 8f

715

716

6:

717

S2HIMEM w2, w2, #16

718

S2LOMEM t1, w1, #16

719

4:

720

S2LOMEM w2, w2, #16

721

b 8f

722

723

/* Critical inner Loop: Block with 1 byte initial overlap */

724

.p2align 2

725

3:

726

and t1, w1, LSB

727

cmp t1, w2, S2LOMEM #24

728

sub r3, w1, b1

729

bic r3, r3, w1

730

bne 4f

731

ands r3, r3, b1, lsl #7

732

it eq

733

ldreq w2, [wp2], #4

734

bne 5f

735

eor t1, t1, w1

736

cmp t1, w2, S2HIMEM #8

737

bne 6f

738

ldr w1, [wp1], #4

739

b 3b

740

4:

741

S2LOMEM w2, w2, #24

742

b 8f

743

5:

744

/* The syndrome value may contain false ones if the string ends

745

with the bytes 0x01 0x00 */

746

tst w1, LSB

747

beq 7f

748

ldr w2, [wp2], #4

749

6:

750

S2LOMEM t1, w1, #8

751

bic w2, w2, MSB

752

b 8f

753

7:

754

mov r0, #0

755

//ldmfd sp!, {r4, r5}

756

ldr r4, [sp], #4

757

ldr r5, [sp], #4

758

RETURN

759

8:

760

and r2, t1, LSB

761

and r0, w2, LSB

762

cmp r0, #1

763

it cs

764

cmpcs r0, r2

765

itt eq

766

S2LOMEMEQ t1, t1, #8

767

S2LOMEMEQ w2, w2, #8

768

beq 8b

769

sub r0, r2, r0

770

//ldmfd sp!, {r4, r5}

771

ldr r4, [sp], #4

772

ldr r5, [sp], #4

773

RETURN

774

775

#endif /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6)

776

defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) ||

777

(defined (__thumb__) && !defined (__thumb2__))) */