2
Copyright (c) 1991-2005 Thomas T. Wetmore IV
4
Permission is hereby granted, free of charge, to any person
5
obtaining a copy of this software and associated documentation
6
files (the "Software"), to deal in the Software without
7
restriction, including without limitation the rights to use, copy,
8
modify, merge, publish, distribute, sublicense, and/or sell copies
9
of the Software, and to permit persons to whom the Software is
10
furnished to do so, subject to the following conditions:
12
The above copyright notice and this permission notice shall be
13
included in all copies or substantial portions of the Software.
15
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
/*==============================================================
25
* dateparse.c -- Code to parse a string into a date
26
* Copyright(c) 1992-2005 by T. T. Wetmore IV; all rights reserved
27
*============================================================*/
37
/*********************************************
38
* external/imported variables
39
*********************************************/
42
/*********************************************
43
* local types used in local function prototypes
44
*********************************************/
46
/* token types, used in parsing */
47
enum { MONTH_TOK=1, CHAR_TOK, WORD_TOK, ICONS_TOK, CALENDAR_TOK, YEAR_TOK };
50
/* used in parsing dates -- 1st, 2nd, & 3rd numbers found */
51
struct tag_nums { struct tag_dnum num1; struct tag_dnum num2; struct tag_dnum num3; };
53
/*********************************************
54
* local function prototypes
55
*********************************************/
58
static void analyze_numbers(GDATEVAL, struct tag_gdate *, struct tag_nums *);
59
static void analyze_word(GDATEVAL gdv, struct tag_gdate * pdate
60
, struct tag_nums * nums, INT ival, BOOLEAN * newdate);
61
static void assign_dnum(struct tag_dnum * dest, struct tag_dnum * src);
62
static void clear_dnum(struct tag_dnum * dnum);
63
static void clear_numbers(struct tag_nums * nums);
64
static void free_gdate(struct tag_gdate *);
65
static INT get_date_tok(struct tag_dnum*);
66
static BOOLEAN is_date_delim(char c);
67
static BOOLEAN is_valid_day(struct tag_gdate * pdate, struct tag_dnum day);
68
static BOOLEAN is_valid_month(struct tag_gdate * pdate, struct tag_dnum month);
69
static void mark_freeform(GDATEVAL gdv);
70
static void mark_invalid(GDATEVAL gdv);
71
static void set_date_string(STRING);
73
/*********************************************
74
* local types & variables
75
*********************************************/
77
static STRING sstr, sstr_start;
79
/*********************************************
80
* local & exported function definitions
82
*********************************************/
84
/*=====================================================
85
* mark_invalid -- Set a gdate_val to invalid
86
* gdv: [I/O] date_val we are building
87
*===================================================*/
89
mark_invalid (GDATEVAL gdv)
91
if (gdv->valid != GDV_V_PHRASE)
92
gdv->valid = GDV_V_INVALID;
94
/*=====================================================
95
* mark_freeform -- Set a gdate_val to freeform (unless invalid)
96
* gdv: [I/O] date_val we are building
97
*===================================================*/
99
mark_freeform (GDATEVAL gdv)
101
if (gdv->valid == GDV_V_GOOD)
102
gdv->valid = GDV_V_FREEFORM;
104
/*=====================================================
105
* extract_date -- Extract date from free format string
106
* str: [IN] date to parse
107
* returns new date_val
108
*===================================================*/
110
extract_date (STRING str)
112
/* we accumulate numbers to figure when we finish a
113
date (with a full period or range, we may finish the
114
first date partway thru) */
116
struct tag_dnum dnum = {BAD_YEAR, 0, 0};
117
struct tag_nums nums = { {BAD_YEAR, 0, 0}, {BAD_YEAR, 0, 0}, {BAD_YEAR, 0, 0} };
118
GDATEVAL gdv = create_gdateval();
119
struct tag_gdate * pdate = &gdv->date1;
122
/* GEDCOM DATE PHRASE, eg, "(one 1srt January)" */
123
if (str[0] == '(' && str[strlen(str)-1] == ')') {
124
gdv->valid = GDV_V_PHRASE;
126
set_date_string(str);
127
while ((tok = get_date_tok(&dnum))) {
130
if (!pdate->month.val) {
131
assign_dnum(&pdate->month, &dnum);
132
if (nums.num1.val != BAD_YEAR) {
133
/* if single number before month, it is a day if legal */
134
if (nums.num2.val == BAD_YEAR
135
&& is_valid_day(pdate, nums.num1)) {
136
assign_dnum(&pdate->day, &nums.num1);
146
if (!pdate->calendar)
147
pdate->calendar = dnum.val;
152
if (pdate->year.val == BAD_YEAR) {
153
assign_dnum(&pdate->year, &dnum);
159
/* this was anything unrecognized, including unusable
165
BOOLEAN newdate=FALSE;
166
analyze_word(gdv, pdate, &nums, dnum.val, &newdate);
168
analyze_numbers(gdv, pdate, &nums);
169
clear_numbers(&nums);
176
if (nums.num1.val == BAD_YEAR)
177
assign_dnum(&nums.num1, &dnum);
178
else if (nums.num2.val == BAD_YEAR)
179
assign_dnum(&nums.num2, &dnum);
180
else if (nums.num3.val == BAD_YEAR)
181
assign_dnum(&nums.num3, &dnum);
189
/* now analyze what numbers we got */
190
analyze_numbers(gdv, pdate, &nums);
191
clear_numbers(&nums);
192
gdv->text = strsave(str);
195
/*===============================================
196
* analyze_word -- Interpret word found in date parsing
197
* gdv: [I/O] current date_val we are building
198
* pdate: [IN] points to which date we're on
199
* (&gdv->date1, unless finishing a range or period)
200
* nums [I/O] accumulated potential numbers
201
* ival: [IN] word enum value (eg, GD_AFT)
202
* newdate: [OUT] flag we set if we are switching to 2nd date now
203
* Created: 2001/12/28 (Perry Rapp)
204
*=============================================*/
206
analyze_word (GDATEVAL gdv, struct tag_gdate * pdate, struct tag_nums * nums
207
, INT ival, BOOLEAN * newdate)
209
/* GEDCOM word modifiers */
212
/* already have a modifier -- very few 2nd modifiers are allowed */
215
if (gdv->type==GDV_RANGE && gdv->subtype==GDVR_BET) {
216
gdv->subtype = GDVR_BET_AND;
223
if (gdv->type==GDV_PERIOD && gdv->subtype==GDVP_FROM) {
224
gdv->subtype = GDVP_FROM_TO;
234
mark_freeform(gdv); /* AD is not in GEDCOM */
235
pdate->eratime = GDV_AD;
242
pdate->eratime = GDV_BC;
252
gdv->type = GDV_APPROX;
253
gdv->subtype = GDVA_ABT;
256
gdv->type = GDV_APPROX;
257
gdv->subtype = GDVA_EST;
260
gdv->type = GDV_APPROX;
261
gdv->subtype = GDVA_CAL;
264
gdv->type = GDV_RANGE;
265
gdv->subtype = GDVR_BEF;
268
gdv->type = GDV_RANGE;
269
gdv->subtype = GDVR_AFT;
272
gdv->type = GDV_RANGE;
273
gdv->subtype = GDVR_BET;
275
/* AND is not a legal first modifier */
277
gdv->type = GDV_PERIOD;
278
gdv->subtype = GDVP_FROM;
281
if (pdate->day.val || pdate->month.val || pdate->year.val != BAD_YEAR
282
|| nums->num1.val != BAD_YEAR) {
283
/* if we have a date before TO, switch to 2nd date */
284
/* (This is not legal GEDCOM syntax, however */
286
gdv->type = GDV_PERIOD;
287
gdv->subtype = GDVP_FROM_TO;
288
analyze_numbers(gdv, pdate, nums);
291
gdv->type = GDV_PERIOD;
292
gdv->subtype = GDVP_TO;
299
mark_freeform(gdv); /* AD is not in GEDCOM */
300
pdate->eratime = GDV_AD;
307
pdate->eratime = GDV_BC;
315
/*===============================================
316
* analyze_numbers -- Parse numbers found in date
317
* gdv: [I/O] date_val we are building
318
* pdate: [IN] pointer to current date (usually &gdv->date1)
319
* nums: [IN] unassigned numbers found in date line
320
* This function does not clear the numbers -- caller must do so.
321
* Created: 2001/12/28 (Perry Rapp)
322
*=============================================*/
324
analyze_numbers (GDATEVAL gdv, struct tag_gdate * pdate, struct tag_nums * nums)
326
if (nums->num1.val == BAD_YEAR) {
327
/* if we have no numbers, we're done */
330
/* we have at least 1 number */
331
if (pdate->day.val && pdate->month.val && pdate->year.val != BAD_YEAR) {
332
/* if we already have day & month & year, we're done */
335
/* we need something */
336
if (nums->num2.val == BAD_YEAR) {
337
/* if we only have 1 number */
338
if (pdate->year.val == BAD_YEAR) {
339
/* if we need year, it is year */
340
assign_dnum(&pdate->year, &nums->num1);
343
if (pdate->month.val && is_valid_day(pdate, nums->num1)) {
344
/* if we only need day, it is day (if legal) */
345
assign_dnum(&pdate->day, &nums->num1);
348
/* otherwise give up (ignore it) */
351
/* we have at least 2 numbers */
352
if (pdate->day.val && pdate->month.val) {
353
/* if all we need is year, then it is year */
354
assign_dnum(&pdate->year, &nums->num1);
357
/* we need at least day or month */
358
/* and we have at least 2 numbers */
360
if (pdate->month.val && pdate->year.val != BAD_YEAR) {
361
/* if all we need is day, see if it can be day */
362
if (is_valid_day(pdate, nums->num1)) {
363
assign_dnum(&pdate->day, &nums->num1);
367
if (pdate->month.val) {
368
/* if we get here, we need day & year */
369
/* prefer first num for day, if legal */
370
if (is_valid_day(pdate, nums->num1)) {
371
assign_dnum(&pdate->day, &nums->num1);
372
assign_dnum(&pdate->year, &nums->num2);
374
assign_dnum(&pdate->year, &nums->num1);
375
if (is_valid_day(pdate, nums->num2))
376
assign_dnum(&pdate->day, &nums->num2);
381
if we get here, we need at least month and have 2+ numbers
382
if we don't know month, then we don't know day either, as
383
we only recognize day during parsing if we see it before month
385
ASSERT(!pdate->day.val);
386
/* so we need at least day & month, & have 2+ numbers */
388
if (pdate->year.val != BAD_YEAR) {
389
/* we need day & month, but not year, and have 2+ numbers */
390
/* can we interpret them unambiguously ? */
391
if (is_valid_month(pdate, nums->num1)
392
&& !is_valid_month(pdate, nums->num2)
393
&& is_valid_day(pdate, nums->num2))
395
assign_dnum(&pdate->month, &nums->num1);
396
assign_dnum(&pdate->day, &nums->num2);
399
if (is_valid_month(pdate, nums->num2)
400
&& !is_valid_month(pdate, nums->num1)
401
&& is_valid_day(pdate, nums->num1))
403
assign_dnum(&pdate->month, &nums->num2);
404
assign_dnum(&pdate->day, &nums->num1);
407
/* not unambiguous, so don't guess */
410
/* if we get here, we need day, month, & year, and have 2+ numbers */
411
if (nums->num3.val == BAD_YEAR) {
412
/* we need day, month, & year, and have 2 numbers */
413
/* how about day, year ? */
414
if (is_valid_day(pdate, nums->num1)) {
415
assign_dnum(&pdate->day, &nums->num1);
416
assign_dnum(&pdate->year, &nums->num2);
418
/* how about year, day ? */
419
if (is_valid_day(pdate, nums->num2)) {
420
assign_dnum(&pdate->day, &nums->num2);
421
assign_dnum(&pdate->year, &nums->num1);
426
/* we need day, month, & year, and have 3 numbers */
427
/* how about day, month, year ? */
428
if (is_valid_day(pdate, nums->num1) && is_valid_month(pdate, nums->num2)) {
429
assign_dnum(&pdate->day, &nums->num1);
430
assign_dnum(&pdate->month, &nums->num2);
431
assign_dnum(&pdate->year, &nums->num3);
433
/* how about month, day, year ? */
434
if (is_valid_month(pdate, nums->num1) && is_valid_day(pdate, nums->num2)) {
435
assign_dnum(&pdate->day, &nums->num2);
436
assign_dnum(&pdate->month, &nums->num1);
437
assign_dnum(&pdate->year, &nums->num3);
439
/* how about year, month, day ? */
440
if (is_valid_day(pdate, nums->num3) && is_valid_month(pdate, nums->num2)) {
441
assign_dnum(&pdate->day, &nums->num3);
442
assign_dnum(&pdate->month, &nums->num2);
443
assign_dnum(&pdate->year, &nums->num1);
447
/*===============================================
448
* clear_dnum -- Empty a dnums_s structure
449
* nums: [I/O] date_val we are clearing
450
* Created: 2002/02/03 (Perry Rapp)
451
*=============================================*/
453
clear_dnum (struct tag_dnum * dnum)
455
dnum->val = dnum->val2 = BAD_YEAR;
461
/*===============================================
462
* clear_numbers -- Empty a nums_s structure
463
* nums: [I/O] date_val we are clearing
464
* Created: 2002/02/03 (Perry Rapp)
465
*=============================================*/
467
clear_numbers (struct tag_nums * nums)
469
clear_dnum(&nums->num1);
470
clear_dnum(&nums->num2);
471
clear_dnum(&nums->num3);
473
/*===============================================
474
* assign_dnum -- Move dnum from one variable to another
475
* Created: 2002/02/03 (Perry Rapp)
476
*=============================================*/
478
assign_dnum (struct tag_dnum * dest, struct tag_dnum * src)
480
dest->val = src->val;
481
dest->val2 = src->val2;
482
dest->str = src->str;
483
src->str = 0; /* transferring string to dest */
485
src->val2 = BAD_YEAR;
487
/*===============================================
488
* create_gdateval -- Create new, empty GEDCOM date_val
489
* Created: 2001/12/28 (Perry Rapp)
490
*=============================================*/
492
create_gdateval (void)
494
GDATEVAL gdv = (GDATEVAL)stdalloc(sizeof(*gdv));
495
memset(gdv, 0, sizeof(*gdv));
496
gdv->date1.year.val = BAD_YEAR;
497
gdv->date2.year.val = BAD_YEAR;
498
gdv->valid = GDV_V_GOOD;
502
/*===============================================
503
* free_gdate -- Delete existing GEDCOM date
504
* Created: 2001/12/28 (Perry Rapp)
505
*=============================================*/
507
free_gdate (struct tag_gdate * gdate)
509
clear_dnum(&gdate->year);
510
clear_dnum(&gdate->month);
511
clear_dnum(&gdate->day);
513
/*===============================================
514
* free_gdateval -- Delete existing GEDCOM date_val
515
* Created: 2001/12/28 (Perry Rapp)
516
*=============================================*/
518
free_gdateval (GDATEVAL gdv)
521
free_gdate(&gdv->date1);
522
free_gdate(&gdv->date2);
527
/*===============================================
528
* set_date_string -- Store date extraction string
529
* in static buffer for use during subsequent parsing
530
*=============================================*/
532
set_date_string (STRING str)
536
initialize_if_needed();
538
/*==================================================
539
* get_date_tok -- Return next date extraction token
540
* pdnum: [OUT] numeric value of token, if day/year number
541
* or numeric value of month or calendar or keyword
542
* psval: [OUT] pointer to (static) copy of original text
543
* (only used for slash years)
544
*================================================*/
546
get_date_tok (struct tag_dnum *pdnum)
548
static char scratch[90];
551
/* flag if token preceded by whitespace (or at start of buffer) */
552
BOOLEAN white_before = FALSE;
554
if (strlen(sstr) > sizeof(scratch)-1) return 0;
555
while (iswhite((uchar)*sstr++))
558
white_before = (sstr==sstr_start || iswhite((uchar)sstr[-1]));
559
if (sstr[0]=='@' && sstr[1]=='#' && sstr[2]=='D') {
561
/* collect calendar escape to closing @ (or end of string) */
564
} while (sstr[0] && sstr[0]!='@');
568
*p++ = *sstr++; /* consume the '@' */
571
/* look it up in our big table of GEDCOM keywords */
572
i = valueof_int(keywordtbl, upperascii_s(scratch));
573
if (i >= 2001 && i < 2000 + GDV_CALENDARS_IX) {
574
pdnum->val = i - 2000;
577
/* unrecognized word */
580
if (isletter((uchar)*sstr)) {
582
/* collect all letters (to end or whitespace or closeparen) */
585
} while (sstr[0] && sstr[0]!=')' && !iswhite((uchar)sstr[0]));
587
/* look it up in our big table of GEDCOM keywords */
588
i = valueof_int(keywordtbl, upperascii_s(scratch));
590
/* unrecognized word */
593
if (i > 0 && i <= 999) {
594
pdnum->val = i % 100;
595
/* TODO: we need to use the fact that calendar is i/100 */
596
/* That is, now we know what calendar this is in */
600
if (i >= 1001 && i < 1000 + GD_END2) {
601
pdnum->val = i - 1000;
604
FATAL(); /* something unexpected is in the keywordtbl ? Find out what! */
607
if (chartype((uchar)*sstr) == DIGIT) {
608
INT i=0; /* primary numeric value */
609
INT j=BAD_YEAR; /* secondary numeric value (for compound number) */
610
while (chartype(c = (uchar)(*p++ = *sstr++)) == DIGIT)
613
/* 5+ digit numbers are not recognized */
616
/* c is the char after the last digit,
617
and sstr is the next char after that */
618
/* check for compound number, if preceding whitespace */
619
if ((c=='/' || c=='-') && white_before) {
622
STRING saves = sstr, savep = p;
625
while (chartype(c = (uchar)(*p++ = *sstr++)) == DIGIT) {
629
/* 2nd number must be larger than first (subject to mod)
630
eg, 1953-54 is ok, but not 1953-52
631
also must be followed by whitespace (or be at end) */
632
delta = j - i % modnum;
633
if (delta > 0 && (!c || iswhite((uchar)c))
634
&& (csave == '-' || delta == 1)) {
638
pdnum->val2 = i + delta;
639
pdnum->str = strsave(scratch);
640
if (is_valid_day(NULL, *pdnum))
645
/* pop back to before slash/hyphen, so it can be handled as
646
a number before a date delimiter */
649
} else if ((c == 's' || c == 'S')
651
&& (!sstr[0] || iswhite((uchar)sstr[0]))) {
652
/* eg, 1850s -- this is English-specific */
656
pdnum->str = strsave(scratch);
657
if (is_valid_day(NULL, *pdnum))
664
if (*sstr && !is_date_delim(*sstr)) {
665
/* number only valid if followed by date delimiter */
680
/*=============================
681
* get_todays_date -- Get today's date
682
* returns static buffer
683
*===========================*/
685
get_todays_date (void)
691
curtime = time(NULL);
692
pt = localtime(&curtime);
693
initialize_if_needed();
694
/* TODO: Should this be one of the customizable formats ? */
695
month = gedkeys[pt->tm_mon].keyword;
696
sprintf(dat, "%d %s %d", pt->tm_mday, month, 1900 + pt->tm_year);
699
/*=============================
700
* gdateval_isdual -- Does gdateval contain
702
*===========================*/
704
gdateval_isdual (GDATEVAL gdv)
706
if (gdv->type == GDV_PERIOD)
707
return (gdv->subtype == GDVP_FROM_TO);
708
else if (gdv->type == GDV_RANGE)
709
return (gdv->subtype == GDVR_BET_AND);
712
/*=============================
713
* is_valid_day -- Is this day legal for this date ?
714
* pdate: [IN] date in which day occurred (may be NULL)
715
* day: [IN] candidate day number
716
* Created: 2001/12/28 (Perry Rapp)
717
*===========================*/
719
is_valid_day (struct tag_gdate * pdate, struct tag_dnum day)
721
/* To consider: Fancy code with calendars */
722
/* for now, use max (all cals all months), which is 31 */
723
pdate=pdate; /* unused */
724
return (day.val>=1 && day.val2<=31);
726
/*=============================
727
* is_valid_month -- Is this month legal for this date ?
728
* pdate: [IN] date in which month occurred (may be NULL)
729
* month: [IN] candidate month number
730
* Created: 2001/12/28 (Perry Rapp)
731
*===========================*/
733
is_valid_month (struct tag_gdate * pdate, struct tag_dnum month)
735
INT cal = pdate ? pdate->calendar : 0;
739
return (month.val>=1 && month.val2<=13);
741
return (month.val>=1 && month.val2<=12);
744
/*=============================
745
* is_date_delim -- Is this a valid character to end
746
* a number in a date ?
747
* Created: 2001/12/28 (Perry Rapp)
748
*===========================*/
750
is_date_delim (char c)
752
if (iswhite((uchar)c))
754
/* TODO: Any other characters here ? Do we internationalize it ? */
755
if (c=='/' || c=='-' || c=='.' || c==',')