1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
|
/*
* gawkapi.h -- Definitions for use by extension functions calling into gawk.
*/
/*
* copyright (c) 2012-2019, 2021-2024, the free software foundation, inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
* The following types and/or macros and/or functions are referenced
* in this file. For correct use, you must therefore include the
* corresponding standard header file BEFORE including this file.
*
* FILE - <stdio.h>
* NULL - <stddef.h>
* memset(), memcpy() - <string.h>
* size_t - <sys/types.h>
* struct stat - <sys/stat.h>
*
* Due to portability concerns, especially to systems that are not
* fully standards-compliant, it is your responsibility to include
* the correct files in the correct way. This requirement is necessary
* in order to keep this file clean, instead of becoming a portability
* hodge-podge as can be seen in the gawk source code.
*
* To pass reasonable integer values for ERRNO, you will also need to
* include <errno.h>.
*/
#ifndef _GAWK_API_H
#define _GAWK_API_H
/*
* General introduction:
*
* This API purposely restricts itself to ISO C 90 features. In particular, no
* bool, no // comments, no use of the restrict keyword, or anything else,
* in order to provide maximal portability.
*
* Exception: the "inline" keyword is used below in the "constructor"
* functions. If your compiler doesn't support it, you should either
* -Dinline='' on your command line, or use the autotools and include a
* config.h in your extensions.
*
* Additional important information:
*
* 1. ALL string values in awk_value_t objects need to come from api_malloc().
* Gawk will handle releasing the storage if necessary. This is slightly
* awkward, in that you can't take an awk_value_t that you got from gawk
* and reuse it directly, even for something that is conceptually pass
* by value.
*
* 2. Due to gawk internals, after using sym_update() to install an array
* into gawk, you have to retrieve the array cookie from the value
* passed in to sym_update(). Like so:
*
* new_array = create_array();
* val.val_type = AWK_ARRAY;
* val.array_cookie = new_array;
* sym_update("array", & val); // install array in the symbol table
*
* new_array = val.array_cookie; // MUST DO THIS
*
* // fill in new array with lots of subscripts and values
*
* Similarly, if installing a new array as a subarray of an existing
* array, you must add the new array to its parent before adding any
* elements to it.
*
* You must also retrieve the value of the array_cookie after the call
* to set_element().
*
* Thus, the correct way to build an array is to work "top down".
* Create the array, and immediately install it in gawk's symbol table
* using sym_update(), or install it as an element in a previously
* existing array using set_element().
*
* Thus the new array must ultimately be rooted in a global symbol. This is
* necessary before installing any subarrays in it, due to gawk's
* internal implementation. Strictly speaking, this is required only
* for arrays that will have subarrays as elements; however it is
* a good idea to always do this. This restriction may be relaxed
* in a subsequent revision of the API.
*
* 3. While each routine in the API has a few lines of summary for it
* in this header, said summaries are not standalone, adequate documentation. You
* should read the chapter in the gawk manual on writing extensions. Find it online
* at https://www.gnu.org/software/gawk/manual/html_node/Dynamic-Extensions.html,
* or in the Info files distributed with gawk.
*/
/* Allow use in C++ code. */
#ifdef __cplusplus
extern "C" {
#endif
/* This is used to keep extensions from modifying certain fields in some structs. */
#ifdef GAWK
#define awk_const
#else
#define awk_const const
#endif
typedef enum awk_bool {
awk_false = 0,
awk_true
} awk_bool_t; /* we don't use <stdbool.h> on purpose */
/*
* If an input parser would like to specify the field positions in the input
* record, it may populate an awk_fieldwidth_info_t structure to indicate
* the location of each field. The use_chars boolean controls whether the
* field lengths are specified in terms of bytes or potentially multi-byte
* characters. Performance will be better if the values are supplied in
* terms of bytes. The fields[0].skip value indicates how many bytes (or
* characters) to skip before $1, and fields[0].len is the length of $1, etc.
*/
typedef struct {
awk_bool_t use_chars; /* false ==> use bytes */
size_t nf;
struct awk_field_info {
size_t skip; /* amount to skip before field starts */
size_t len; /* length of field */
} fields[1]; /* actual dimension should be nf */
} awk_fieldwidth_info_t;
/*
* This macro calculates the total struct size needed. This is useful when
* calling malloc or realloc.
*/
#define awk_fieldwidth_info_size(NF) (sizeof(awk_fieldwidth_info_t) + \
(((NF)-1) * sizeof(struct awk_field_info)))
/* The information about input files that input parsers need to know: */
typedef struct awk_input {
const char *name; /* filename */
int fd; /* file descriptor */
#define INVALID_HANDLE (-1)
void *opaque; /* private data for input parsers */
/*
* The get_record function is called to read the next record of data.
*
* It should return the length of the input record or EOF, and it
* should set *out to point to the contents of $0. The rt_start
* and rt_len arguments should be used to return RT to gawk.
* If EOF is not returned, the parser must set *rt_len (and
* *rt_start if *rt_len is non-zero).
*
* Note that gawk will make a copy of the record in *out, so the
* parser is responsible for managing its own memory buffer.
* Similarly, gawk will make its own copy of RT, so the parser
* is also responsible for managing this memory.
*
* It is guaranteed that errcode is a valid pointer, so there is
* no need to test for a NULL value. Gawk sets *errcode to 0,
* so there is no need to set it unless an error occurs.
*
* If an error does occur, the function should return EOF and set
* *errcode to a positive value. In that case, if *errcode is greater
* than zero, gawk will automatically update the ERRNO variable based
* on the value of *errcode (e.g., setting *errcode = errno should do
* the right thing).
*
* If field_width is non-NULL, then *field_width will be initialized
* to NULL, and the function may set it to point to a structure
* supplying field width information to override the default
* gawk field parsing mechanism. Note that this structure will not
* be copied by gawk; it must persist at least until the next call
* to get_record or close_func. Note also that field_width will
* be NULL when getline is assigning the results to a variable, thus
* field parsing is not needed.
*/
int (*get_record)(char **out, struct awk_input *iobuf, int *errcode,
char **rt_start, size_t *rt_len,
const awk_fieldwidth_info_t **field_width);
/*
* This replaces the POSIX read() system call. Use it if you want to
* manage reading raw bytes yourself, and let gawk parse the record.
*/
ssize_t (*read_func)(int, void *, size_t);
/*
* The close_func is called to allow the parser to free private data.
* Gawk itself will close the fd unless close_func first sets it to
* INVALID_HANDLE.
*/
void (*close_func)(struct awk_input *iobuf);
/* put last, for alignment. bleah */
struct stat sbuf; /* stat buf */
} awk_input_buf_t;
typedef struct awk_input_parser {
const char *name; /* name of parser */
/*
* The can_take_file function should return true if the parser
* would like to parse this file. It should not change any gawk
* state!
*/
awk_bool_t (*can_take_file)(const awk_input_buf_t *iobuf);
/*
* If this parser is selected, then take_control_of will be called.
* It can assume that a previous call to can_take_file was successful,
* and no gawk state has changed since that call. It should populate
* the awk_input_buf_t's get_record, close_func, and opaque values as needed.
* It should return true if successful.
*/
awk_bool_t (*take_control_of)(awk_input_buf_t *iobuf);
awk_const struct awk_input_parser *awk_const next; /* for use by gawk */
} awk_input_parser_t;
/*
* Similar for output wrapper.
*/
/* First the data structure */
typedef struct awk_output_buf {
const char *name; /* name of output file */
const char *mode; /* mode argument to fopen */
FILE *fp; /* stdio file pointer */
awk_bool_t redirected; /* true if a wrapper is active */
void *opaque; /* for use by output wrapper */
/*
* Replacement functions for I/O. Just like the regular
* versions but also take the opaque pointer argument.
*/
size_t (*gawk_fwrite)(const void *buf, size_t size, size_t count,
FILE *fp, void *opaque);
int (*gawk_fflush)(FILE *fp, void *opaque);
int (*gawk_ferror)(FILE *fp, void *opaque);
int (*gawk_fclose)(FILE *fp, void *opaque);
} awk_output_buf_t;
/* Next the output wrapper registered with gawk */
typedef struct awk_output_wrapper {
const char *name; /* name of the wrapper */
/*
* The can_take_file function should return true if the wrapper
* would like to process this file. It should not change any gawk
* state!
*/
awk_bool_t (*can_take_file)(const awk_output_buf_t *outbuf);
/*
* If this wrapper is selected, then take_control_of will be called.
* It can assume that a previous call to can_take_file was successful,
* and no gawk state has changed since that call. It should populate
* the awk_output_buf_t function pointers and opaque pointer as needed.
* It should return true if successful.
*/
awk_bool_t (*take_control_of)(awk_output_buf_t *outbuf);
awk_const struct awk_output_wrapper *awk_const next; /* for use by gawk */
} awk_output_wrapper_t;
/* A two-way processor combines an input parser and an output wrapper. */
typedef struct awk_two_way_processor {
const char *name; /* name of the two-way processor */
/*
* The can_take_file function should return true if the two-way
* processor would like to parse this file. It should not change
* any gawk state!
*/
awk_bool_t (*can_take_two_way)(const char *name);
/*
* If this processor is selected, then take_control_of will be called.
* It can assume that a previous call to can_take_file was successful,
* and no gawk state has changed since that call. It should populate
* the awk_input_buf_t and awk_otuput_buf_t structures as needed.
* It should return true if successful.
*/
awk_bool_t (*take_control_of)(const char *name, awk_input_buf_t *inbuf,
awk_output_buf_t *outbuf);
awk_const struct awk_two_way_processor *awk_const next; /* for use by gawk */
} awk_two_way_processor_t;
#define gawk_api_major_version 4
#define gawk_api_minor_version 0
/* Current version of the API. */
enum {
GAWK_API_MAJOR_VERSION = gawk_api_major_version,
GAWK_API_MINOR_VERSION = gawk_api_minor_version
};
/* A number of typedefs related to different types of values. */
/*
* A mutable string. Gawk owns the memory pointed to if it supplied
* the value. Otherwise, it takes ownership of the memory pointed to.
*
* The API deals exclusively with regular chars; these strings may
* be multibyte encoded in the current locale's encoding and character
* set. Gawk will convert internally to wide characters if necessary.
*
* Note that a string provided by gawk will always be terminated
* with a '\0' character.
*/
typedef struct awk_string {
char *str; /* data */
size_t len; /* length thereof, in chars */
} awk_string_t;
enum AWK_NUMBER_TYPE {
AWK_NUMBER_TYPE_DOUBLE,
AWK_NUMBER_TYPE_MPFR,
AWK_NUMBER_TYPE_MPZ
};
/*
* When type is AWK_NUMBER_MPFR or AWK_NUMBER_MPZ, the memory pointed to
* by the ptr member belongs to gawk if it came from gawk. Otherwise the
* memory belongs to the extension and gawk copies it when its received.
* See the manual for further discussion.
*/
typedef struct awk_number {
double d; /* always populated in data received from gawk */
enum AWK_NUMBER_TYPE type;
void *ptr; /* either NULL or mpfr_ptr or mpz_ptr */
} awk_number_t;
/* Arrays are represented as an opaque type. */
typedef void *awk_array_t;
/* Scalars can be represented as an opaque type. */
typedef void *awk_scalar_t;
/* Any value can be stored as a cookie. */
typedef void *awk_value_cookie_t;
/*
* This tag defines the type of a value.
*
* Values are associated with regular variables and with array elements.
* Since arrays can be multidimensional (as can regular variables)
* it's valid to have a "value" that is actually an array.
*/
typedef enum {
AWK_UNDEFINED,
AWK_NUMBER,
AWK_STRING,
AWK_REGEX,
AWK_STRNUM,
AWK_ARRAY,
AWK_SCALAR, /* opaque access to a variable */
AWK_VALUE_COOKIE, /* for updating a previously created value */
AWK_BOOL
} awk_valtype_t;
/*
* An awk value. The val_type tag indicates what
* is in the union.
*/
typedef struct awk_value {
awk_valtype_t val_type;
union {
awk_string_t s;
awk_number_t n;
awk_array_t a;
awk_scalar_t scl;
awk_value_cookie_t vc;
awk_bool_t b;
} u;
#define str_value u.s
#define strnum_value str_value
#define regex_value str_value
#define num_value u.n.d
#define num_type u.n.type
#define num_ptr u.n.ptr
#define array_cookie u.a
#define scalar_cookie u.scl
#define value_cookie u.vc
#define bool_value u.b
} awk_value_t;
/*
* A "flattened" array element. Gawk produces an array of these
* inside the awk_flat_array_t.
* ALL memory pointed to belongs to gawk. Individual elements may
* be marked for deletion. New elements must be added individually,
* one at a time, using the separate API for that purpose.
*/
typedef struct awk_element {
/* convenience linked list pointer, not used by gawk */
struct awk_element *next;
enum {
AWK_ELEMENT_DEFAULT = 0, /* set by gawk */
AWK_ELEMENT_DELETE = 1 /* set by extension if
should be deleted */
} flags;
awk_value_t index;
awk_value_t value;
} awk_element_t;
/*
* A "flattened" array. See the description above for how
* to use the elements contained herein.
*/
typedef struct awk_flat_array {
awk_const void *awk_const opaque1; /* private data for use by gawk */
awk_const void *awk_const opaque2; /* private data for use by gawk */
awk_const size_t count; /* how many elements */
awk_element_t elements[1]; /* will be extended */
} awk_flat_array_t;
/*
* A record describing an extension function. Upon being
* loaded, the extension should pass in one of these to gawk for
* each C function.
*
* Each called function must fill in the result with either a scalar
* (number, string, or regex). Gawk takes ownership of any string memory.
*
* The called function must return the value of `result'.
* This is for the convenience of the calling code inside gawk.
*
* Each extension function may decide what to do if the number of
* arguments isn't what it expected. Following awk functions, it
* is likely OK to ignore extra arguments.
*
* 'min_required_args' indicates how many arguments MUST be passed.
* The API will throw a fatal error if not enough are passed.
*
* 'max_expected_args' is more benign; if more than that are passed,
* the API prints a lint message (IFF lint is enabled, of course).
*
* In any case, the extension function itself need not compare the
* actual number of arguments passed to those two values if it does
* not want to.
*/
typedef struct awk_ext_func {
const char *name;
awk_value_t *(*const function)(int num_actual_args,
awk_value_t *result,
struct awk_ext_func *finfo);
const size_t max_expected_args;
const size_t min_required_args;
awk_bool_t suppress_lint;
void *data; /* opaque pointer to any extra state */
} awk_ext_func_t;
typedef void *awk_ext_id_t; /* opaque type for extension id */
/*
* The API into gawk. Lots of functions here. We hope that they are
* logically organized.
*
* !!! If you make any changes to this structure, please remember to bump !!!
* !!! gawk_api_major_version and/or gawk_api_minor_version. !!!
*/
typedef struct gawk_api {
/* First, data fields. */
/* These are what gawk thinks the API version is. */
awk_const int major_version;
awk_const int minor_version;
/* GMP/MPFR versions, if extended-precision is available */
awk_const int gmp_major_version;
awk_const int gmp_minor_version;
awk_const int mpfr_major_version;
awk_const int mpfr_minor_version;
/*
* These can change on the fly as things happen within gawk.
* Currently only do_lint is prone to change, but we reserve
* the right to allow the others to do so also.
*
* N.B. If we ever again need to add an additional do_flags value,
* it would be wise to convert this from an array to a bitmask. If
* we add a new do_flags value and bump DO_FLAGS_SIZE, then it requires
* us to increment the ABI version. If we use a bitmask instead, then
* we will be free to add new flags without breaking ABI compatibility.
*/
#define DO_FLAGS_SIZE 7
awk_const int do_flags[DO_FLAGS_SIZE];
/* Use these as indices into do_flags[] array to check the values */
#define gawk_do_lint 0
#define gawk_do_traditional 1
#define gawk_do_profile 2
#define gawk_do_sandbox 3
#define gawk_do_debug 4
#define gawk_do_mpfr 5
#define gawk_do_csv 6
/* Next, registration functions: */
/*
* Add a function to the interpreter, returns true upon success.
* Gawk does not modify what func points to, but the extension
* function itself receives this pointer and can modify what it
* points to, thus it's not const.
*/
awk_bool_t (*api_add_ext_func)(awk_ext_id_t id, const char *name_space,
awk_ext_func_t *func);
/* Register an input parser; for opening files read-only */
void (*api_register_input_parser)(awk_ext_id_t id,
awk_input_parser_t *input_parser);
/* Register an output wrapper, for writing files */
void (*api_register_output_wrapper)(awk_ext_id_t id,
awk_output_wrapper_t *output_wrapper);
/* Register a processor for two way I/O */
void (*api_register_two_way_processor)(awk_ext_id_t id,
awk_two_way_processor_t *two_way_processor);
/*
* Add an exit call back.
*
* arg0 is a private data pointer for use by the extension;
* gawk saves it and passes it into the function pointed
* to by funcp at exit.
*
* Exit callback functions are called in LIFO order.
*/
void (*api_awk_atexit)(awk_ext_id_t id,
void (*funcp)(void *data, int exit_status),
void *arg0);
/* Register a version string for this extension with gawk. */
void (*api_register_ext_version)(awk_ext_id_t id, const char *version);
/* Functions to print messages */
void (*api_fatal)(awk_ext_id_t id, const char *format, ...);
void (*api_warning)(awk_ext_id_t id, const char *format, ...);
void (*api_lintwarn)(awk_ext_id_t id, const char *format, ...);
void (*api_nonfatal)(awk_ext_id_t id, const char *format, ...);
/* Functions to update ERRNO */
void (*api_update_ERRNO_int)(awk_ext_id_t id, int errno_val);
void (*api_update_ERRNO_string)(awk_ext_id_t id, const char *string);
void (*api_unset_ERRNO)(awk_ext_id_t id);
/*
* All of the functions that return a value from inside gawk
* (get a parameter, get a global variable, get an array element)
* behave in the same way.
*
* For a function parameter, the return is false if the argument
* count is out of range, or if the actual parameter does not match
* what is specified in wanted. In that case, result->val_type
* will hold the actual type of what was passed.
*
* Similarly for symbol table access to variables and array elements,
* the return is false if the actual variable or array element does
* not match what was requested, and result->val_type will hold
* the actual type.
Table entry is type returned:
+----------------------------------------------------------------+
| Type of Actual Value: |
+--------+--------+--------+--------+--------+-------+-----------+
| String | Strnum | Number | Regex | Bool | Array | Undefined |
+-----------+-----------+--------+--------+--------+--------+--------+-------+-----------+
| | String | String | String | String | String | String | false | false |
| +-----------+--------+--------+--------+--------+--------+-------+-----------+
| | Strnum | false | Strnum | Strnum | false | false | false | false |
| +-----------+--------+--------+--------+--------+--------+-------+-----------+
| | Number | Number | Number | Number | false | Number | false | false |
| +-----------+--------+--------+--------+--------+--------+-------+-----------+
| | Regex | false | false | false | Regex | false | false | false |
| +-----------+--------+--------+--------+--------+--------+-------+-----------+
| Type | Bool | false | false | false | false | Bool | false | false |
| Requested +-----------+--------+--------+--------+--------+--------+-------+-----------+
| | Array | false | false | false | false | false | Array | false |
| +-----------+--------+--------+--------+--------+--------+-------+-----------+
| | Scalar | Scalar | Scalar | Scalar | Scalar | Scalar | false | false |
| +-----------+--------+--------+--------+--------+--------+-------+-----------+
| | Undefined | String | Strnum | Number | Regex | Bool | Array | Undefined |
| +-----------+--------+--------+--------+--------+--------+-------+-----------+
| | Value | false | false | false | false | false | false | false |
| | Cookie | | | | | | | |
+-----------+-----------+--------+--------+--------+--------+--------+-------+-----------+
*/
/* Functions to handle parameters passed to the extension. */
/*
* Get the count'th parameter, zero-based.
* Returns false if count is out of range, or if actual parameter
* does not match what is specified in wanted. In that case,
* result->val_type is as described above.
*/
awk_bool_t (*api_get_argument)(awk_ext_id_t id, size_t count,
awk_valtype_t wanted,
awk_value_t *result);
/*
* Convert a parameter that was undefined into an array
* (provide call-by-reference for arrays). Returns false
* if count is too big, or if the argument's type is
* not undefined.
*/
awk_bool_t (*api_set_argument)(awk_ext_id_t id,
size_t count,
awk_array_t array);
/*
* Symbol table access:
* - Read-only access to special variables (NF, etc.)
* - One special exception: PROCINFO.
* - Use sym_update() to change a value, including from UNDEFINED
* to scalar or array.
*/
/*
* Lookup a variable, fill in value. No messing with the value
* returned.
* Returns false if the variable doesn't exist or if the wrong type
* was requested. In the latter case, vaule->val_type will have
* the real type, as described above.
*
* awk_value_t val;
* if (! api->sym_lookup(id, name, wanted, & val))
* error_code_here();
* else {
* // safe to use val
* }
*/
awk_bool_t (*api_sym_lookup)(awk_ext_id_t id,
const char *name_space,
const char *name,
awk_valtype_t wanted,
awk_value_t *result);
/*
* Update a value. Adds it to the symbol table if not there.
* Changing types (scalar <--> array) is not allowed.
* In fact, using this to update an array is not allowed, either.
* Such an attempt returns false.
*/
awk_bool_t (*api_sym_update)(awk_ext_id_t id,
const char *name_space,
const char *name,
awk_value_t *value);
/*
* A ``scalar cookie'' is an opaque handle that provide access
* to a global variable or array. It is an optimization that
* avoids looking up variables in gawk's symbol table every time
* access is needed.
*
* This function retrieves the current value of a scalar cookie.
* Once you have obtained a scalar_cookie using sym_lookup, you can
* use this function to get its value more efficiently.
*
* Return will be false if the value cannot be retrieved.
*
* Flow is thus
* awk_value_t val;
* awk_scalar_t cookie;
* api->sym_lookup(id, "variable", AWK_SCALAR, & val); // get the cookie
* cookie = val.scalar_cookie;
* ...
* api->sym_lookup_scalar(id, cookie, wanted, & val); // get the value
*/
awk_bool_t (*api_sym_lookup_scalar)(awk_ext_id_t id,
awk_scalar_t cookie,
awk_valtype_t wanted,
awk_value_t *result);
/*
* Update the value associated with a scalar cookie.
* Flow is
* sym_lookup with wanted == AWK_SCALAR
* if returns false
* sym_update with real initial value to install it
* sym_lookup again with AWK_SCALAR
* else
* use the scalar cookie
*
* Return will be false if the new value is not one of
* AWK_STRING, AWK_NUMBER, AWK_REGEX.
*
* Here too, the built-in variables may not be updated.
*/
awk_bool_t (*api_sym_update_scalar)(awk_ext_id_t id,
awk_scalar_t cookie, awk_value_t *value);
/* Cached values */
/*
* Create a cached string,regex, or numeric value for efficient later
* assignment. This improves performance when you want to assign
* the same value to one or more variables repeatedly. Only
* AWK_NUMBER, AWK_STRING, AWK_REGEX and AWK_STRNUM values are allowed.
* Any other type is rejected. We disallow AWK_UNDEFINED since that
* case would result in inferior performance.
*/
awk_bool_t (*api_create_value)(awk_ext_id_t id, awk_value_t *value,
awk_value_cookie_t *result);
/*
* Release the memory associated with a cookie from api_create_value.
* Please call this to free memory when the value is no longer needed.
*/
awk_bool_t (*api_release_value)(awk_ext_id_t id, awk_value_cookie_t vc);
/* Array management */
/*
* Retrieve total number of elements in array.
* Returns false if some kind of error.
*/
awk_bool_t (*api_get_element_count)(awk_ext_id_t id,
awk_array_t a_cookie, size_t *count);
/*
* Return the value of an element - read only!
* Use set_array_element() to change it.
* Behavior for value and return is same as for api_get_argument
* and sym_lookup.
*/
awk_bool_t (*api_get_array_element)(awk_ext_id_t id,
awk_array_t a_cookie,
const awk_value_t *const index,
awk_valtype_t wanted,
awk_value_t *result);
/*
* Change (or create) element in existing array with
* index and value.
*
* ARGV and ENVIRON may not be updated.
*/
awk_bool_t (*api_set_array_element)(awk_ext_id_t id, awk_array_t a_cookie,
const awk_value_t *const index,
const awk_value_t *const value);
/*
* Remove the element with the given index.
* Returns true if removed or false if element did not exist.
*/
awk_bool_t (*api_del_array_element)(awk_ext_id_t id,
awk_array_t a_cookie, const awk_value_t* const index);
/* Create a new array cookie to which elements may be added. */
awk_array_t (*api_create_array)(awk_ext_id_t id);
/* Clear out an array. */
awk_bool_t (*api_clear_array)(awk_ext_id_t id, awk_array_t a_cookie);
/*
* Flatten out an array with type conversions as requested.
* This supersedes the earlier api_flatten_array function that
* did not allow the caller to specify the requested types.
* (That API is still available as a macro, defined below.)
*/
awk_bool_t (*api_flatten_array_typed)(awk_ext_id_t id,
awk_array_t a_cookie,
awk_flat_array_t **data,
awk_valtype_t index_type, awk_valtype_t value_type);
/* When done, delete any marked elements, release the memory. */
awk_bool_t (*api_release_flattened_array)(awk_ext_id_t id,
awk_array_t a_cookie,
awk_flat_array_t *data);
/*
* Hooks to provide access to gawk's memory allocation functions.
* This ensures that memory passed between gawk and the extension
* is allocated and released by the same library.
*/
void *(*api_malloc)(size_t size);
void *(*api_calloc)(size_t nmemb, size_t size);
void *(*api_realloc)(void *ptr, size_t size);
void (*api_free)(void *ptr);
/*
* Obsolete function, should not be used. It remains only
* for binary compatibility. Any value it returns should be
* freed via api_free.
*/
void *(*api_get_mpfr)(awk_ext_id_t id);
/*
* Obsolete function, should not be used. It remains only
* for binary compatibility. Any value it returns should be
* freed via api_free.
*/
void *(*api_get_mpz)(awk_ext_id_t id);
/*
* Look up a file. If the name is NULL or name_len is 0, it returns
* data for the currently open input file corresponding to FILENAME
* (and it will not access the filetype argument, so that may be
* undefined).
*
* If the file is not already open, try to open it.
*
* The "filetype" argument should be one of:
*
* ">", ">>", "<", "|>", "|<", and "|&"
*
* If the file is not already open, and the fd argument is non-negative,
* gawk will use that file descriptor instead of opening the file
* in the usual way.
*
* If the fd is non-negative, but the file exists already, gawk
* ignores the fd and returns the existing file. It is the caller's
* responsibility to notice that the fd in the returned
* awk_input_buf_t does not match the requested value.
*
* Note that supplying a file descriptor is currently NOT supported
* for pipes. It should work for input, output, append, and two-way
* (coprocess) sockets. If the filetype is two-way, we assume that
* it is a socket!
*
* Note that in the two-way case, the input and output file descriptors
* may differ. To check for success, one must check that either of
* them matches.
*
* ibufp and obufp point at gawk's internal copies of the
* awk_input_buf_t and awk_output_t associated with the open
* file. Treat these data structures as read-only!
*/
awk_bool_t (*api_get_file)(awk_ext_id_t id,
const char *name,
size_t name_len,
const char *filetype,
int fd,
/*
* Return values (on success, one or both should
* be non-NULL):
*/
const awk_input_buf_t **ibufp,
const awk_output_buf_t **obufp);
/* Destroy an array. */
awk_bool_t (*api_destroy_array)(awk_ext_id_t id, awk_array_t a_cookie);
} gawk_api_t;
#ifndef GAWK /* these are not for the gawk code itself! */
/*
* Use these if you want to define "global" variables named api
* and ext_id to make the code a little easier to read.
* See the sample boilerplate code, below.
*/
#define do_lint (api->do_flags[gawk_do_lint])
#define do_traditional (api->do_flags[gawk_do_traditional])
#define do_profile (api->do_flags[gawk_do_profile])
#define do_sandbox (api->do_flags[gawk_do_sandbox])
#define do_debug (api->do_flags[gawk_do_debug])
#define do_mpfr (api->do_flags[gawk_do_mpfr])
#define do_csv (api->do_flags[gawk_do_csv])
#define get_argument(count, wanted, result) \
(api->api_get_argument(ext_id, count, wanted, result))
#define set_argument(count, new_array) \
(api->api_set_argument(ext_id, count, new_array))
#define fatal api->api_fatal
#define nonfatal api->api_nonfatal
#define warning api->api_warning
#define lintwarn api->api_lintwarn
#define register_input_parser(parser) (api->api_register_input_parser(ext_id, parser))
#define register_output_wrapper(wrapper) (api->api_register_output_wrapper(ext_id, wrapper))
#define register_two_way_processor(processor) \
(api->api_register_two_way_processor(ext_id, processor))
#define update_ERRNO_int(e) (api->api_update_ERRNO_int(ext_id, e))
#define update_ERRNO_string(str) \
(api->api_update_ERRNO_string(ext_id, str))
#define unset_ERRNO() (api->api_unset_ERRNO(ext_id))
#define add_ext_func(ns, func) (api->api_add_ext_func(ext_id, ns, func))
#define awk_atexit(funcp, arg0) (api->api_awk_atexit(ext_id, funcp, arg0))
#define sym_lookup(name, wanted, result) \
sym_lookup_ns("", name, wanted, result)
#define sym_update(name, value) \
sym_update_ns("", name, value)
#define sym_lookup_ns(name_space, name, wanted, result) \
(api->api_sym_lookup(ext_id, name_space, name, wanted, result))
#define sym_update_ns(name_space, name, value) \
(api->api_sym_update(ext_id, name_space, name, value))
#define sym_lookup_scalar(scalar_cookie, wanted, result) \
(api->api_sym_lookup_scalar(ext_id, scalar_cookie, wanted, result))
#define sym_update_scalar(scalar_cookie, value) \
(api->api_sym_update_scalar)(ext_id, scalar_cookie, value)
#define get_array_element(array, index, wanted, result) \
(api->api_get_array_element(ext_id, array, index, wanted, result))
#define set_array_element(array, index, value) \
(api->api_set_array_element(ext_id, array, index, value))
#define set_array_element_by_elem(array, elem) \
(api->api_set_array_element(ext_id, array, & (elem)->index, & (elem)->value))
#define del_array_element(array, index) \
(api->api_del_array_element(ext_id, array, index))
#define get_element_count(array, count_p) \
(api->api_get_element_count(ext_id, array, count_p))
#define create_array() (api->api_create_array(ext_id))
#define destroy_array(array) (api->api_destroy_array(ext_id, array))
#define clear_array(array) (api->api_clear_array(ext_id, array))
#define flatten_array_typed(array, data, index_type, value_type) \
(api->api_flatten_array_typed(ext_id, array, data, index_type, value_type))
#define flatten_array(array, data) \
flatten_array_typed(array, data, AWK_STRING, AWK_UNDEFINED)
#define release_flattened_array(array, data) \
(api->api_release_flattened_array(ext_id, array, data))
#define gawk_malloc(size) (api->api_malloc(size))
#define gawk_calloc(nmemb, size) (api->api_calloc(nmemb, size))
#define gawk_realloc(ptr, size) (api->api_realloc(ptr, size))
#define gawk_free(ptr) (api->api_free(ptr))
#define create_value(value, result) \
(api->api_create_value(ext_id, value,result))
#define release_value(value) \
(api->api_release_value(ext_id, value))
#define get_file(name, namelen, filetype, fd, ibuf, obuf) \
(api->api_get_file(ext_id, name, namelen, filetype, fd, ibuf, obuf))
/* These two are obsolete and should not be used. */
#define get_mpfr_ptr() (api->api_get_mpfr(ext_id))
#define get_mpz_ptr() (api->api_get_mpz(ext_id))
#define register_ext_version(version) \
(api->api_register_ext_version(ext_id, version))
#define emalloc(pointer, type, size, message) \
do { \
if ((pointer = (type) gawk_malloc(size)) == 0) \
fatal(ext_id, "%s: malloc of %d bytes failed", message, size); \
} while(0)
#define ezalloc(pointer, type, size, message) \
do { \
if ((pointer = (type) gawk_calloc(1, size)) == 0) \
fatal(ext_id, "%s: calloc of %d bytes failed", message, size); \
} while(0)
#define erealloc(pointer, type, size, message) \
do { \
if ((pointer = (type) gawk_realloc(pointer, size)) == 0) \
fatal(ext_id, "%s: realloc of %d bytes failed", message, size); \
} while(0)
/* Constructor functions */
/* r_make_string_type --- make a string or strnum or regexp value in result from the passed-in string */
static inline awk_value_t *
r_make_string_type(const gawk_api_t *api, /* needed for emalloc */
awk_ext_id_t ext_id, /* ditto */
const char *string,
size_t length,
awk_bool_t duplicate,
awk_value_t *result,
awk_valtype_t val_type)
{
char *cp = NULL;
memset(result, 0, sizeof(*result));
result->val_type = val_type;
result->str_value.len = length;
if (duplicate) {
emalloc(cp, char *, length + 1, "r_make_string");
memcpy(cp, string, length);
cp[length] = '\0';
result->str_value.str = cp;
} else {
result->str_value.str = (char *) string;
}
return result;
}
/* r_make_string --- make a string value in result from the passed-in string */
static inline awk_value_t *
r_make_string(const gawk_api_t *api, /* needed for emalloc */
awk_ext_id_t ext_id, /* ditto */
const char *string,
size_t length,
awk_bool_t duplicate,
awk_value_t *result)
{
return r_make_string_type(api, ext_id, string, length, duplicate, result, AWK_STRING);
}
#define make_const_string(str, len, result) r_make_string(api, ext_id, str, len, awk_true, result)
#define make_malloced_string(str, len, result) r_make_string(api, ext_id, str, len, awk_false, result)
#define make_const_regex(str, len, result) r_make_string_type(api, ext_id, str, len, awk_true, result, AWK_REGEX)
#define make_malloced_regex(str, len, result) r_make_string_type(api, ext_id, str, len, awk_false, result, AWK_REGEX)
/*
* Note: The caller may not create a STRNUM, but it can create a string that is
* flagged as user input that MAY be a STRNUM. Gawk will decide whether it's a
* STRNUM or a string by checking whether the string is numeric.
*/
#define make_const_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_STRNUM)
#define make_malloced_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_STRNUM)
/* make_null_string --- make a null string value */
static inline awk_value_t *
make_null_string(awk_value_t *result)
{
memset(result, 0, sizeof(*result));
result->val_type = AWK_UNDEFINED;
return result;
}
/* make_number --- make a number value in result */
static inline awk_value_t *
make_number(double num, awk_value_t *result)
{
result->val_type = AWK_NUMBER;
result->num_value = num;
result->num_type = AWK_NUMBER_TYPE_DOUBLE;
return result;
}
/*
* make_number_mpz --- make an mpz number value in result.
* The mpz_ptr must be from a call to get_mpz_ptr.
*/
static inline awk_value_t *
make_number_mpz(void *mpz_ptr, awk_value_t *result)
{
result->val_type = AWK_NUMBER;
result->num_type = AWK_NUMBER_TYPE_MPZ;
result->num_ptr = mpz_ptr;
return result;
}
/*
* make_number_mpfr --- make an mpfr number value in result.
* The mpfr_ptr must be from a call to get_mpfr_ptr.
*/
static inline awk_value_t *
make_number_mpfr(void *mpfr_ptr, awk_value_t *result)
{
result->val_type = AWK_NUMBER;
result->num_type = AWK_NUMBER_TYPE_MPFR;
result->num_ptr = mpfr_ptr;
return result;
}
/* make_bool --- make a bool value in result */
static inline awk_value_t *
make_bool(awk_bool_t boolval, awk_value_t *result)
{
result->val_type = AWK_BOOL;
result->bool_value = boolval;
return result;
}
/*
* Each extension must define a function with this prototype:
*
* int dl_load(gawk_api_t *api_p, awk_ext_id_t id)
*
* The return value should be zero on failure and non-zero on success.
*
* For the macros to work, the function should save api_p in a global
* variable named 'api' and save id in a global variable named 'ext_id'.
* In addition, a global function pointer named 'init_func' should be
* defined and set to either NULL or an initialization function that
* returns non-zero on success and zero upon failure.
*/
extern int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id);
#if 0
/* Boilerplate code: */
int plugin_is_GPL_compatible;
static gawk_api_t *const api;
static awk_ext_id_t ext_id;
static const char *ext_version = NULL; /* or ... = "some string" */
static awk_ext_func_t func_table[] = {
{ "name", do_name, 1 },
/* ... */
};
/* EITHER: */
static awk_bool_t (*init_func)(void) = NULL;
/* OR: */
static awk_bool_t
init_my_extension(void)
{
...
}
static awk_bool_t (*init_func)(void) = init_my_extension;
dl_load_func(func_table, some_name, "name_space_in_quotes")
#endif
#define dl_load_func(func_table, extension, name_space) \
int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id) \
{ \
size_t i, j; \
int errors = 0; \
\
api = api_p; \
ext_id = (void **) id; \
\
if (api->major_version != GAWK_API_MAJOR_VERSION \
|| api->minor_version < GAWK_API_MINOR_VERSION) { \
fprintf(stderr, #extension ": version mismatch with gawk!\n"); \
fprintf(stderr, "\tmy version (API %d.%d), gawk version (API %d.%d)\n", \
GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION, \
api->major_version, api->minor_version); \
exit(1); \
} \
\
check_mpfr_version(extension); \
\
/* load functions */ \
for (i = 0, j = sizeof(func_table) / sizeof(func_table[0]); i < j; i++) { \
if (func_table[i].name == NULL) \
break; \
if (! add_ext_func(name_space, & func_table[i])) { \
warning(ext_id, #extension ": could not add %s", \
func_table[i].name); \
errors++; \
} \
} \
\
if (init_func != NULL) { \
if (! init_func()) { \
warning(ext_id, #extension ": initialization function failed"); \
errors++; \
} \
} \
\
if (ext_version != NULL) \
register_ext_version(ext_version); \
\
return (errors == 0); \
}
#if defined __GNU_MP_VERSION && defined MPFR_VERSION_MAJOR
#define check_mpfr_version(extension) do { \
if (api->gmp_major_version != __GNU_MP_VERSION \
|| api->gmp_minor_version < __GNU_MP_VERSION_MINOR) { \
fprintf(stderr, #extension ": GMP version mismatch with gawk!\n"); \
fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
__GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, \
api->gmp_major_version, api->gmp_minor_version); \
exit(1); \
} \
if (api->mpfr_major_version != MPFR_VERSION_MAJOR \
|| api->mpfr_minor_version < MPFR_VERSION_MINOR) { \
fprintf(stderr, #extension ": MPFR version mismatch with gawk!\n"); \
fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
MPFR_VERSION_MAJOR, MPFR_VERSION_MINOR, \
api->mpfr_major_version, api->mpfr_minor_version); \
exit(1); \
} \
} while (0)
#else
#define check_mpfr_version(extension) /* nothing */
#endif
#endif /* GAWK */
#ifdef __cplusplus
}
#endif /* C++ */
#endif /* _GAWK_API_H */
|