~ubuntu-branches/ubuntu/hoary/indent/hoary : revision 1

1

2

*

3

4

*

5

6

7

8

*

9

* Redistribution and use in source and binary forms are permitted

10

* provided that

11

* the above copyright notice and this paragraph are duplicated in all such

12

* forms and that any documentation, advertising materials, and other

13

* materials related to such distribution and use acknowledge that the

14

* software was developed by the University of California, Berkeley, the

15

* University of Illinois, Urbana, and Sun Microsystems, Inc. The name of

16

* either University or Sun Microsystems may not be used to endorse or

17

* promote products derived from this software without specific prior written

18

* permission. THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR

19

* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES

20

* OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

21

*/

22

23

24

/* Here we have the token scanner for indent. It scans off one token and

25

* puts it in the global variable "token". It returns a code, indicating the

26

* type of token scanned. */

27

28

#include "sys.h"

29

#include <ctype.h>

30

#if defined (HAVE_UNISTD_H)

31

#include <unistd.h>

32

#endif

33

#include <string.h>

34

#include "indent.h"

35

#include "globs.h"

36

37

RCSTAG_CC ("$Id: lexi.c,v 1.32 2001/11/26 21:50:49 david Exp $");

38

39

/* Stuff that needs to be shared with the rest of indent. Documented in

40

* indent.h. */

41

char *token;

42

char *token_end;

43

44

#define alphanum 1

45

#define opchar 3

46

47

struct templ

48

{

49

char *rwd;

50

enum rwcodes rwcode;

51

};

52

53

/* Pointer to a vector of keywords specified by the user. */

54

static struct templ *user_specials = 0;

55

56

/* Allocated size of user_specials. */

57

static unsigned int user_specials_max;

58

59

/* Index in user_specials of the first unused entry. */

60

static unsigned int user_specials_idx;

61

62

char chartype[] = { /* this is used to facilitate the decision of

63

what type (alphanumeric, operator) each

64

character is */

65

#ifndef CHARSET_EBCDIC

66

67

0, 0, 0, 0, 0, 0, 0, 0,

68

0, 0, 0, 0, 0, 0, 0, 0,

69

0, 0, 0, 0, 0, 0, 0, 0,

70

0, 0, 0, 0, 0, 0, 0, 0,

71

0, 3, 0, 0, 1, 3, 3, 0,

72

0, 0, 3, 3, 0, 3, 0, 3,

73

1, 1, 1, 1, 1, 1, 1, 1,

74

1, 1, 0, 0, 3, 3, 3, 3,

75

0, 1, 1, 1, 1, 1, 1, 1,

76

1, 1, 1, 1, 1, 1, 1, 1,

77

1, 1, 1, 1, 1, 1, 1, 1,

78

1, 1, 1, 0, 0, 0, 3, 1,

79

0, 1, 1, 1, 1, 1, 1, 1,

80

1, 1, 1, 1, 1, 1, 1, 1,

81

1, 1, 1, 1, 1, 1, 1, 1,

82

1, 1, 1, 0, 3, 0, 3, 0,

83

0, 0, 0, 0, 0, 0, 0, 0,

84

0, 0, 0, 0, 0, 0, 0, 0,

85

0, 0, 0, 0, 0, 0, 0, 0,

86

0, 0, 0, 0, 0, 0, 0, 0,

87

0, 0, 0, 0, 0, 0, 0, 0,

88

0, 0, 0, 0, 0, 0, 0, 0,

89

0, 0, 0, 0, 0, 0, 0, 0,

90

0, 0, 0, 0, 0, 0, 0, 0,

91

0, 0, 0, 0, 0, 0, 0, 0,

92

0, 0, 0, 0, 0, 0, 0, 0,

93

0, 0, 0, 0, 0, 0, 0, 0,

94

0, 0, 0, 0, 0, 0, 0, 0,

95

0, 0, 0, 0, 0, 0, 0, 0,

96

0, 0, 0, 0, 0, 0, 0, 0,

97

0, 0, 0, 0, 0, 0, 0, 0,

98

0, 0, 0, 0, 0, 0, 0, 0,

99

100

#else /* CHARSET_EBCDIC */

101

102

/*

103

* The following table was generated by the program given at the end

104

*/

105

106

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

107

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

108

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

109

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

110

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

111

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

112

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

113

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

114

0, 0, 0, 0, 0, 0, 0, 0, /*| .......| */

115

0, 0, 0, 0, 3, 0, 3, 3, /*|..`.<(+|| */

116

3, 0, 0, 0, 0, 0, 0, 0, /*|&.......| */

117

0, 0, 3, 1, 3, 0, 0, 0, /*|..!$*);.| */

118

3, 3, 0, 0, 0, 0, 0, 0, /*|-/......| */

119

0, 0, 3, 0, 3, 1, 3, 3, /*|..^,%_>?| */

120

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

121

0, 0, 0, 0, 0, 0, 3, 0, /*|..:#@'="| */

122

0, 1, 1, 1, 1, 1, 1, 1, /*|.abcdefg| */

123

1, 1, 0, 0, 0, 0, 0, 0, /*|hi......| */

124

0, 1, 1, 1, 1, 1, 1, 1, /*|.jklmnop| */

125

1, 1, 0, 0, 0, 0, 0, 0, /*|qr......| */

126

0, 0, 1, 1, 1, 1, 1, 1, /*|..stuvwx| */

127

1, 1, 0, 0, 0, 0, 0, 0, /*|yz......| */

128

0, 0, 0, 0, 0, 0, 0, 0, /*|........| */

129

0, 0, 0, 0, 0, 0, 0, 0, /*|...[\]..| */

130

0, 1, 1, 1, 1, 1, 1, 1, /*|.ABCDEFG| */

131

1, 1, 0, 0, 0, 0, 0, 0, /*|HI......| */

132

0, 1, 1, 1, 1, 1, 1, 1, /*|.JKLMNOP| */

133

1, 1, 0, 0, 0, 0, 0, 0, /*|QR......| */

134

0, 0, 1, 1, 1, 1, 1, 1, /*|..STUVWX| */

135

1, 1, 0, 0, 0, 0, 0, 0, /*|YZ......| */

136

1, 1, 1, 1, 1, 1, 1, 1, /*|01234567| */

137

1, 1, 0, 0, 0, 0, 0, 3, /*|89.{.}.~| */

138

#endif /* CHARSET_EBCDIC */

139

};

140

141

/*

142

* The table above was generated by the following program from

143

* the ASCII version of the chartype[] array above, where the

144

* unsigned char os_toascii[] array can be found in the source

145

* for the Apache Web Server version 1.3.x, in the directories...

146

* src/os/bs2000/ebcdic.c for the SIEMENS BS2000 mainframes,

147

* src/os/tpf/ebcdic.c for the IBM TPF server line.

148

* The following table was created for BS2000 EBCDIC character set,

149

* but the few non-zero places should be compatible with IBM's EBCDIC.

150

*/

151

#if 0

152

int

153

main (void)

154

{

155

const int GAP = 8;

156

int i, j;

157

158

for (i = 0; i < 256; i += GAP)

159

{

160

printf (" ");

161

for (j = 0; j < GAP; ++j)

162

{

163

printf ("%d, ", chartype[os_toascii[i + j]]);

164

}

165

166

printf (" /*|");

167

168

for (j = 0; j < GAP; ++j)

169

{

170

printf ("%c", isprint (i + j) ? (i + j) : '.');

171

}

172

173

printf ("|*/\n");

174

}

175

return 0;

176

}

177

#endif

178

179

/* Include code generated by gperf */

180

#ifdef PROTOTYPES

181

#ifdef __GNUC__

182

__inline

183

#endif

184

struct templ *is_reserved (register const char *str, register unsigned int len);

185

#endif

186

#include "gperf.c"

187

188

/* Include code generated by gperf for C++ keyword set */

189

#undef MIN_HASH_VALUE /* remove old defs */

190

#undef MAX_HASH_VALUE

191

#undef TOTAL_KEYWORDS

192

#undef MIN_WORD_LENGTH

193

#undef MAX_WORD_LENGTH

194

195

#ifdef PROTOTYPES

196

#ifdef __GNUC__

197

__inline

198

#endif

199

struct templ *is_reserved_cc (register const char *str, register unsigned int len);

200

#endif

201

#include "gperf-cc.c"

202

203

enum codes lexi (void)

204

{

205

int unary_delim; /* this is set to 1 if the current token

206

forces a following operator to be unary */

207

static enum codes last_code; /* the last token type returned */

208

static int l_struct; /* set to 1 if the last token was 'struct' */

209

static int l_enum; /* set to 1 if the last token was `enum' */

210

enum codes code; /* internal code to be returned */

211

char qchar; /* the delimiter character for a string */

212

213

static int count; /* debugging counter */

214

215

count++;

216

217

unary_delim = false;

218

/* tell world that this token started in column 1 iff the last

219

thing scanned was nl */

220

parser_state_tos->col_1 = parser_state_tos->last_nl;

221

parser_state_tos->last_saw_nl = parser_state_tos->last_nl;

222

parser_state_tos->last_nl = false;

223

224

if (buf_ptr >= buf_end)

225

{

226

fill_buffer ();

227

}

228

229

if (*buf_ptr == ' ' || *buf_ptr == TAB)

230

{

231

parser_state_tos->col_1 = false;

232

while (*buf_ptr == ' ' || *buf_ptr == TAB)

233

{

234

if (++buf_ptr >= buf_end)

235

{

236

fill_buffer ();

237

}

238

}

239

}

240

241

/* INCREDIBLY IMPORTANT WARNING!!!

242

*

243

* Note that subsequent calls to `fill_buffer ()' may switch `buf_ptr'

244

* to a different buffer. Thus when `token_end' gets set later, it

245

* may be pointing into a different buffer than `token'. */

246

247

token = buf_ptr;

248

249

/* Scan an alphanumeric token */

250

251

if ((!((buf_ptr[0] == 'L') &&

252

((buf_ptr[1] == '"') || (buf_ptr[1] == '\''))) &&

253

(chartype[0xff & (int) *buf_ptr] == alphanum)) ||

254

((buf_ptr[0] == '.') && isdigit (buf_ptr[1])))

255

{

256

/* we have a character or number */

257

struct templ *p;

258

259

if (isdigit (*buf_ptr) ||

260

((buf_ptr[0] == '.') && isdigit (buf_ptr[1])))

261

{

262

int seendot = 0, seenexp = 0;

263

264

if ((*buf_ptr == '0') && ((buf_ptr[1] == 'x') || (buf_ptr[1] == 'X')))

265

{

266

buf_ptr += 2;

267

while (isxdigit (*buf_ptr))

268

{

269

buf_ptr++;

270

}

271

}

272

else

273

while (1)

274

{

275

if (*buf_ptr == '.')

276

{

277

if (seendot)

278

{

279

break;

280

}

281

else

282

{

283

seendot++;

284

}

285

}

286

287

buf_ptr++;

288

289

if (!isdigit (*buf_ptr) && *buf_ptr != '.')

290

{

291

if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)

292

{

293

break;

294

}

295

else

296

{

297

seenexp++;

298

seendot++;

299

buf_ptr++;

300

301

if (*buf_ptr == '+' || *buf_ptr == '-')

302

{

303

buf_ptr++;

304

}

305

}

306

}

307

}

308

309

if (*buf_ptr == 'F' || *buf_ptr == 'f' || *buf_ptr == 'i' || *buf_ptr == 'j')

310

{

311

buf_ptr++;

312

}

313

else

314

{

315

while (*buf_ptr == 'U' || *buf_ptr == 'u' || *buf_ptr == 'L' || *buf_ptr == 'l')

316

{

317

buf_ptr++;

318

}

319

}

320

}

321

else

322

while (chartype[0xff & (int) *buf_ptr] == alphanum)

323

{ /* copy it over */

324

buf_ptr++;

325

326

if (buf_ptr >= buf_end)

327

{

328

fill_buffer ();

329

}

330

}

331

332

token_end = buf_ptr;

333

334

if (token_end - token == 13 && !strncmp (token, "__attribute__", 13))

335

{

336

last_code = decl;

337

parser_state_tos->last_u_d = true;

338

return (attribute);

339

}

340

341

while (*buf_ptr == ' ' || *buf_ptr == TAB)

342

{

343

if (++buf_ptr >= buf_end)

344

{

345

fill_buffer ();

346

}

347

}

348

349

/* Handle operator declarations. */

350

351

if (token_end - token == 8 && !strncmp (token, "operator", 8))

352

{

353

while (chartype[0xff & (int) *buf_ptr] == opchar)

354

{

355

buf_ptr++;

356

357

if (buf_ptr >= buf_end)

358

{

359

fill_buffer ();

360

}

361

}

362

363

token_end = buf_ptr;

364

365

while (*buf_ptr == ' ' || *buf_ptr == TAB)

366

{

367

if (++buf_ptr >= buf_end)

368

fill_buffer ();

369

}

370

}

371

372

parser_state_tos->its_a_keyword = false;

373

parser_state_tos->sizeof_keyword = false;

374

375

/* if last token was 'struct', then this token should be treated

376

as a declaration */

377

if (l_struct)

378

{

379

l_struct = false;

380

last_code = ident;

381

parser_state_tos->last_u_d = true;

382

383

if (parser_state_tos->last_token == cpp_operator)

384

{

385

return overloaded;

386

}

387

return (decl);

388

}

389

390

/* Operator after indentifier is binary */

391

392

parser_state_tos->last_u_d = false;

393

last_code = ident;

394

395

/* Check whether the token is a reserved word. Use perfect hashing... */

396

397

if (c_plus_plus)

398

{

399

p = is_reserved_cc (token, token_end - token);

400

}

401

else

402

{

403

p = is_reserved (token, token_end - token);

404

}

405

406

if (!p && user_specials != 0)

407

{

408

for (p = &user_specials[0]; p < &user_specials[0] + user_specials_idx; p++)

409

{

410

char *q = token;

411

char *r = p->rwd;

412

413

/* This string compare is a little nonstandard because token

414

* ends at the character before token_end and p->rwd is

415

* null-terminated. */

416

417

while (1)

418

{

419

/* If we have come to the end of both the keyword in

420

* user_specials and the keyword in token they are equal. */

421

422

if (q >= token_end && !*r)

423

{

424

goto found_keyword;

425

}

426

427

/* If we have come to the end of just one, they are not

428

* equal. */

429

430

if (q >= token_end || !*r)

431

{

432

break;

433

}

434

435

/* If the characters in corresponding characters are not

436

* equal, the strings are not equal. */

437

438

if (*q++ != *r++)

439

{

440

break;

441

}

442

}

443

}

444

445

/* Didn't find anything in user_specials. */

446

447

p = 0;

448

}

449

450

if (p)

451

{ /* we have a keyword */

452

enum codes value;

453

454

found_keyword:

455

value = ident;

456

parser_state_tos->its_a_keyword = true;

457

parser_state_tos->last_u_d = true;

458

parser_state_tos->last_rw = p->rwcode;

459

parser_state_tos->last_rw_depth = parser_state_tos->paren_depth;

460

461

switch (p->rwcode)

462

{

463

case rw_operator: /* C++ operator overloading. */

464

value = cpp_operator;

465

parser_state_tos->in_parameter_declaration = 1;

466

break;

467

case rw_switch: /* it is a switch */

468

value = (swstmt);

469

break;

470

case rw_case: /* a case or default */

471

value = (casestmt);

472

break;

473

case rw_enum:

474

l_enum = true; /* reset on '(' ')' '{' '}' or ';' */

475

/* fall through */

476

case rw_struct_like: /* a "struct" */

477

if (parser_state_tos->p_l_follow && !(parser_state_tos->noncast_mask & 1 << parser_state_tos->p_l_follow))

478

/* inside parens: cast */

479

{

480

parser_state_tos->cast_mask |= 1 << parser_state_tos->p_l_follow;

481

break;

482

}

483

484

l_struct = true;

485

486

/* Next time around, we will want to know that we have had a

487

'struct' */

488

case rw_decl: /* one of the declaration keywords */

489

if (parser_state_tos->p_l_follow && !(parser_state_tos->noncast_mask & 1 << parser_state_tos->p_l_follow))

490

/* inside parens: cast */

491

{

492

parser_state_tos->cast_mask |= 1 << parser_state_tos->p_l_follow;

493

break;

494

}

495

496

last_code = decl;

497

value = (decl);

498

break;

499

500

case rw_sp_paren: /* if, while, for */

501

value = (sp_paren);

502

if (*token == 'i' && parser_state_tos->last_token == sp_else)

503

{

504

parser_state_tos->i_l_follow -= ind_size;

505

}

506

507

break;

508

509

case rw_sp_nparen: /* do */

510

value = (sp_nparen);

511

break;

512

513

case rw_sp_else: /* else */

514

value = (sp_else);

515

break;

516

517

case rw_sizeof:

518

parser_state_tos->sizeof_keyword = true;

519

value = (ident);

520

break;

521

522

case rw_return:

523

case rw_break:

524

default: /* all others are treated like any other

525

identifier */

526

value = (ident);

527

} /* end of switch */

528

529

if (parser_state_tos->last_token == cpp_operator)

530

{

531

return overloaded;

532

}

533

534

return value;

535

} /* end of if (found_it) */

536

else if ((*buf_ptr == '(') && (parser_state_tos->tos <= 1) &&

537

(parser_state_tos->ind_level == 0) &&

538

(parser_state_tos->paren_depth == 0))

539

{

540

/* We have found something which might be the name in a function

541

* definition. */

542

543

char *tp;

544

int paren_count = 1;

545

546

/* If the return type of this function definition was not defined

547

* with a -T commandline option, then the output of indent would

548

* alternate on subsequent calls. In order to avoid that we try

549

* to detect that case here and make a minimal change to cause

550

* the correct behaviour.

551

*/

552

553

if (parser_state_tos->last_token == ident && parser_state_tos->last_saw_nl)

554

{

555

parser_state_tos->in_decl = 1;

556

}

557

558

/* Skip to the matching ')'. */

559

560

for (tp = buf_ptr + 1; paren_count > 0 && tp < in_prog + in_prog_size; tp++)

561

{

562

if (*tp == '(')

563

{

564

paren_count++;

565

}

566

567

if (*tp == ')')

568

{

569

paren_count--;

570

}

571

572

/* Can't occur in parameter list; this way we don't search the

573

* whole file in the case of unbalanced parens. */

574

575

if (*tp == ';')

576

{

577

goto not_proc;

578

}

579

}

580

581

if (paren_count == 0)

582

{

583

parser_state_tos->procname = token;

584

parser_state_tos->procname_end = token_end;

585

586

while (isspace (*tp))

587

{

588

tp++;

589

}

590

591

if ((*tp == '_') && (in_prog + in_prog_size - tp >= 13) &&

592

!strncmp (tp, "__attribute__", 13))

593

{

594

/* Found an __attribute__ after a function declaration */

595

goto not_proc; /* Must be a declaration */

596

}

597

598

/* If the next char is ';' or ',' or '(' we have a function

599

* declaration, not a definition.

600

*

601

* I've added '=' to this list to keep from breaking

602

* a non-valid C macro from libc. -jla */

603

604

if (*tp != ';' && *tp != ',' && *tp != '(' && *tp != '=')

605

{

606

parser_state_tos->in_parameter_declaration = 1;

607

}

608

}

609

610

not_proc:;

611

}

612

else if ((*buf_ptr == ':') && (*(buf_ptr + 1) == ':') &&

613

(parser_state_tos->tos <= 1) &&

614

(parser_state_tos->ind_level == 0) &&

615

(parser_state_tos->paren_depth == 0))

616

{

617

parser_state_tos->classname = token;

618

parser_state_tos->classname_end = token_end;

619

}

620

621

/* The following hack attempts to guess whether or not the

622

* current token is in fact a declaration keyword -- one that

623

* has been typedef'd */

624

625

else if ( ( ((*buf_ptr == '*') && (buf_ptr[1] != '=')) ||

626

isalpha (*buf_ptr) || (*buf_ptr == '_')) &&

627

!parser_state_tos->p_l_follow && !parser_state_tos->block_init &&

628

( (parser_state_tos->last_token == rparen) ||

629

(parser_state_tos->last_token == semicolon) ||

630

(parser_state_tos->last_token == rbrace) ||

631

(parser_state_tos->last_token == decl) ||

632

(parser_state_tos->last_token == lbrace) ||

633

(parser_state_tos->last_token == start_token)))

634

{

635

parser_state_tos->its_a_keyword = true;

636

parser_state_tos->last_u_d = true;

637

last_code = decl;

638

639

if (parser_state_tos->last_token == cpp_operator)

640

{

641

return overloaded;

642

}

643

644

return decl;

645

}

646

647

if (last_code == decl)

648

{

649

/* if this is a declared variable, then

650

following sign is unary */

651

parser_state_tos->last_u_d = true; /* will make "int a -1" work */

652

}

653

654

last_code = ident;

655

656

if (parser_state_tos->last_token == cpp_operator)

657

{

658

return overloaded;

659

}

660

661

return (ident); /* the ident is not in the list */

662

} /* end of procesing for alpanum character */

663

664

/* Scan a non-alphanumeric token */

665

666

/* If it is not a one character token, token_end will get changed later. */

667

668

token_end = buf_ptr + 1;

669

670

/* THIS MAY KILL YOU!!!

671

*

672

* Note that it may be possible for this to kill us--if `fill_buffer'

673

* at any time switches `buf_ptr' to the other input buffer, `token'

674

* and `token_end' will point to different storage areas!!! */

675

676

if (++buf_ptr >= buf_end)

677

{

678

fill_buffer ();

679

}

680

681

/* If it is a backslash new-line, just eat the backslash */

682

683

if ((*token == '\\') && (buf_ptr[0] == EOL))

684

{

685

token = buf_ptr;

686

687

if (++buf_ptr >= buf_end)

688

{

689

fill_buffer ();

690

}

691

}

692

693

switch (*token)

694

{

695

case '\0':

696

code = code_eof;

697

break;

698

699

case EOL:

700

parser_state_tos->matching_brace_on_same_line = -1;

701

unary_delim = parser_state_tos->last_u_d;

702

parser_state_tos->last_nl = true;

703

code = newline;

704

break;

705

706

/* Handle wide strings and chars. */

707

case 'L':

708

if (buf_ptr[0] != '"' && buf_ptr[0] != '\'')

709

{

710

token_end = buf_ptr;

711

code = ident;

712

break;

713

}

714

715

qchar = buf_ptr[0];

716

buf_ptr++;

717

goto handle_string;

718

719

case '\'': /* start of quoted character */

720

case '"': /* start of string */

721

qchar = *token;

722

723

handle_string:

724

/* Find out how big the literal is so we can set token_end. */

725

726

/* Invariant: before loop test buf_ptr points to the next

727

* character that we have not yet checked. */

728

729

while ((*buf_ptr != qchar) && (*buf_ptr != 0)) /* && *buf_ptr != EOL) */

730

{

731

if (*buf_ptr == EOL)

732

{

733

++line_no;

734

}

735

736

if (*buf_ptr == '\\')

737

{

738

buf_ptr++;

739

740

if (buf_ptr >= buf_end)

741

{

742

fill_buffer ();

743

}

744

745

if (*buf_ptr == EOL)

746

{

747

++line_no;

748

}

749

750

if (*buf_ptr == 0)

751

{

752

break;

753

}

754

}

755

756

buf_ptr++;

757

758

if (buf_ptr >= buf_end)

759

{

760

fill_buffer ();

761

}

762

}

763

764

if (*buf_ptr == EOL || *buf_ptr == 0)

765

{

766

WARNING ((qchar == '\'' ? _("Unterminated character constant") :

767

_("Unterminated string constant")), 0, 0);

768

}

769

else

770

{

771

/* Advance over end quote char. */

772

buf_ptr++;

773

774

if (buf_ptr >= buf_end)

775

{

776

fill_buffer ();

777

}

778

}

779

780

token_end = buf_ptr;

781

code = ident;

782

break;

783

784

case ('('):

785

l_enum = false;

786

unary_delim = true;

787

code = lparen;

788

break;

789

790

case ('['):

791

if (parser_state_tos->in_or_st)

792

parser_state_tos->in_or_st++;

793

unary_delim = true;

794

code = lparen;

795

break;

796

797

case (')'):

798

l_enum = false;

799

code = rparen;

800

break;

801

802

case (']'):

803

if (parser_state_tos->in_or_st > 1)

804

parser_state_tos->in_or_st--;

805

code = rparen;

806

break;

807

808

case '#':

809

unary_delim = parser_state_tos->last_u_d;

810

code = preesc;

811

812

/* Make spaces between '#' and the directive be part of

813

the token if user specified "-lps" */

814

if (leave_preproc_space)

815

{

816

while (*buf_ptr == ' ' && buf_ptr < buf_end)

817

{

818

buf_ptr++;

819

}

820

821

token_end = buf_ptr;

822

}

823

break;

824

825

case '?':

826

unary_delim = true;

827

code = question;

828

break;

829

830

case (':'):

831

/* Deal with C++ class::method */

832

833

if (*buf_ptr == ':')

834

{

835

code = doublecolon;

836

buf_ptr++;

837

token_end = buf_ptr;

838

break;

839

}

840

841

code = colon;

842

unary_delim = true;

843

if (squest && *e_com != ' ')

844

{

845

if (e_code == s_code)

846

{

847

parser_state_tos->want_blank = false;

848

}

849

850

else

851

{

852

parser_state_tos->want_blank = true;

853

}

854

}

855

break;

856

857

case (';'):

858

l_enum = false;

859

unary_delim = true;

860

code = semicolon;

861

break;

862

863

case ('{'):

864

if (parser_state_tos->matching_brace_on_same_line < 0)

865

{

866

parser_state_tos->matching_brace_on_same_line = 1;

867

}

868

else

869

{

870

parser_state_tos->matching_brace_on_same_line++;

871

}

872

873

if (l_enum)

874

{

875

/* Keep all variables in the same column:

876

* ONE,

877

* TWO, etc

878

* instead of

879

* ONE,

880

* TWO,

881

* Use a special code for `block_init' however, because we still

882

* want to do the line breaks when `braces_on_struct_decl_line'

883

* is not set.

884

*/

885

parser_state_tos->block_init = 2;

886

parser_state_tos->block_init_level = 0;

887

l_enum = false;

888

}

889

890

unary_delim = true;

891

code = lbrace;

892

break;

893

894

case ('}'):

895

parser_state_tos->matching_brace_on_same_line--;

896

l_enum = false;

897

unary_delim = true;

898

code = rbrace;

899

break;

900

901

case 014: /* a form feed */

902

unary_delim = parser_state_tos->last_u_d;

903

parser_state_tos->last_nl = true; /* remember this so we can set

904

'parser_state_tos->col_1' right */

905

code = form_feed;

906

break;

907

908

case (','):

909

unary_delim = true;

910

code = comma;

911

break;

912

913

case '.':

914

if (parser_state_tos->in_decl && *buf_ptr == '.' && buf_ptr[1] == '.')

915

{

916

/* check for '...' in a declaration */

917

if ((buf_ptr += 2) >= buf_end)

918

{

919

fill_buffer ();

920

}

921

922

unary_delim = true;

923

code = decl;

924

token_end = buf_ptr;

925

break;

926

}

927

unary_delim = false;

928

code = struct_delim;

929

930

if (*buf_ptr == '*') /* object .* pointer-to-member */

931

{

932

++buf_ptr;

933

token_end = buf_ptr;

934

}

935

break;

936

937

case '-':

938

case '+': /* check for -, +, --, ++ */

939

code = (parser_state_tos->last_u_d ? unary_op : binary_op);

940

unary_delim = true;

941

942

if (*buf_ptr == token[0])

943

{

944

/* check for doubled character */

945

buf_ptr++;

946

/* buffer overflow will be checked at end of loop */

947

if (last_code == ident || last_code == rparen)

948

{

949

code = (parser_state_tos->last_u_d ? unary_op : postop);

950

/* check for following ++ or -- */

951

unary_delim = false;

952

}

953

}

954

else if (*buf_ptr == '=')

955

{

956

/* check for operator += */

957

buf_ptr++;

958

}

959

else if (*buf_ptr == '>')

960

{

961

/* check for operator -> */

962

buf_ptr++;

963

code = struct_delim;

964

/* check for operator ->* */

965

966

if (*buf_ptr == '*')

967

{

968

buf_ptr++;

969

}

970

}

971

972

token_end = buf_ptr;

973

break; /* buffer overflow will be checked at end of

974

switch */

975

976

case '=':

977

if (parser_state_tos->in_or_st && parser_state_tos->last_token != cpp_operator)

978

{

979

parser_state_tos->block_init = 1;

980

parser_state_tos->block_init_level = 0;

981

}

982

983

if (*buf_ptr == '=') /* == */

984

{

985

buf_ptr++;

986

}

987

else if ((*buf_ptr == '-') ||

988

(*buf_ptr == '+') ||

989

(*buf_ptr == '*') ||

990

(*buf_ptr == '&'))

991

{

992

/* Something like x=-1, which can mean x -= 1 ("old style" in K&R1)

993

* or x = -1 (ANSI). Note that this is only an ambiguity if the

994

* character can also be a unary operator. If not, just produce

995

* output code that produces a syntax error (the theory being that

996

* people want to detect and eliminate old style assignments but

997

* they don't want indent to silently change the meaning of their

998

* code). */

999

1000

WARNING (_("old style assignment ambiguity in \"=%c\". Assuming \"= %c\"\n"),

1001

(unsigned long) *((unsigned char *) buf_ptr), (unsigned long) *((unsigned char *) buf_ptr));

1002

}

1003

1004

code = binary_op;

1005

unary_delim = true;

1006

token_end = buf_ptr;

1007

break;

1008

/* can drop thru!!! */

1009

1010

case '>':

1011

case '<':

1012

case '!':

1013

/* ops like <, <<, <=, !=, <<=, etc */

1014

/* This will of course scan sequences like "<=>", "!=>", "<<>", etc. as

1015

* one token, but I don't think that will cause any harm. */

1016

/* in C++ mode also scan <?[=], >?[=] GNU C++ operators

1017

* maybe some flag to them ? */

1018

1019

while (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=' || (c_plus_plus && *buf_ptr == '?'))

1020

{

1021

if (++buf_ptr >= buf_end)

1022

{

1023

fill_buffer ();

1024

}

1025

1026

if (*buf_ptr == '=')

1027

{

1028

if (++buf_ptr >= buf_end)

1029

{

1030

fill_buffer ();

1031

}

1032

}

1033

}

1034

1035

code = (parser_state_tos->last_u_d ? unary_op : binary_op);

1036

unary_delim = true;

1037

token_end = buf_ptr;

1038

break;

1039

1040

default:

1041

if (token[0] == '/' && (*buf_ptr == '*' || *buf_ptr == '/'))

1042

{

1043

/* A C or C++ comment */

1044

1045

if (*buf_ptr == '*')

1046

{

1047

code = comment;

1048

}

1049

else

1050

{

1051

code = cplus_comment;

1052

}

1053

1054

if (++buf_ptr >= buf_end)

1055

{

1056

fill_buffer ();

1057

}

1058

1059

if (code == comment)

1060

{

1061

/* Threat comments of type / *UPPERCASE* / not as comments */

1062

char *p = buf_ptr;

1063

1064

while (isupper (*p++))

1065

{

1066

/* There is always at least one

1067

* newline in the buffer; so no

1068

* need to check for buf_end. */

1069

}

1070

1071

if (p < buf_end && p[-1] == '*' && *p == '/')

1072

{

1073

buf_ptr = p + 1;

1074

code = ident;

1075

parser_state_tos->want_blank = true;

1076

}

1077

}

1078

1079

unary_delim = parser_state_tos->last_u_d;

1080

}

1081

else if (parser_state_tos->last_token == cpp_operator)

1082

{

1083

/* For C++ overloaded operators. */

1084

code = overloaded;

1085

last_code = overloaded;

1086

}

1087

else

1088

{

1089

while (*(buf_ptr - 1) == *buf_ptr || *buf_ptr == '=')

1090

{

1091

/* handle ||, &&, etc, and also things as in int *****i */

1092

if (++buf_ptr >= buf_end)

1093

{

1094

fill_buffer ();

1095

}

1096

}

1097

code = (parser_state_tos->last_u_d ? unary_op : binary_op);

1098

unary_delim = true;

1099

}

1100

1101

token_end = buf_ptr;

1102

1103

} /* end of switch */

1104

1105

if (code != newline)

1106

{

1107

l_struct = false;

1108

last_code = code;

1109

}

1110

1111

if (buf_ptr >= buf_end)

1112

{

1113

fill_buffer ();

1114

}

1115

1116

parser_state_tos->last_u_d = unary_delim;

1117

1118

if (parser_state_tos->last_token == cpp_operator)

1119

{

1120

return overloaded;

1121

}

1122

1123

return (code);

1124

}

1125

1126

/* Add the given keyword to the keyword table, using val as

1127

* the keyword type */

1128

1129

void

1130

addkey (

1131

char *key,

1132

enum rwcodes val)

1133

{

1134

struct templ *p;

1135

1136

/* Check to see whether key is a reserved word or not. */

1137

if ( (c_plus_plus && is_reserved_cc (key, strlen (key)) != 0) ||

1138

(!c_plus_plus && is_reserved (key, strlen (key)) != 0))

1139

{

1140

return;

1141

}

1142

1143

if (user_specials == 0)

1144

{

1145

user_specials = (struct templ *) xmalloc (5 * sizeof (struct templ));

1146

user_specials_max = 5;

1147

user_specials_idx = 0;

1148

}

1149

else if (user_specials_idx == user_specials_max)

1150

{

1151

user_specials_max += 5;

1152

user_specials = (struct templ *) xrealloc ((char *) user_specials, user_specials_max * sizeof (struct templ));

1153

}

1154

1155

p = &user_specials[user_specials_idx++];

1156

p->rwd = key;

1157

p->rwcode = val;

1158

return;

1159

}