2
######################################################################
3
# Copyright (C) 1999-2001, International Business Machines
4
# Corporation and others. All Rights Reserved.
5
######################################################################
6
# See: ftp://elsie.nci.nih.gov/pub/tzdata<year>
7
# where <year> is "1999b" or a similar string.
8
######################################################################
9
# This script takes time zone data in elsie.nci.nih.gov format and
10
# parses it into a form usable by ICU. The raw data contains more
11
# data than we need, since it contains historical zone data. We
12
# parse out the current zones and create a listing of current zones.
14
######################################################################
15
# This script reads an alias table, $TZ_ALIAS, and creates clones of
16
# standard UNIX zones with alias names.
17
######################################################################
18
# To update the zone data, download the latest data from the NIH URL
19
# listed above into a directory. Run this script with the directory
20
# name as an argument. THE DIRECTORY NAME MUST END IN tzdataYYYYR.
21
######################################################################
22
# OUTPUT FILE FORMAT (filename $OUT)
24
# As a matter of policy, this script wants to do as much of
25
# the parsing, data processing, and error checking as possible,
26
# leaving the C++ program that parses this file to just do the binary
29
# - The file is line based, with one record per line.
30
# - Lines may be followed by a comment; the parser must ignore
31
# anything of the form /\s+#.*$/ in each line.
32
# |3065,14400 # Asia/Dubai GMT+4:00
33
# - The file contains a header and 4 lists.
34
# - The header contains the version of this data file:
35
# 2 original version, without equivalency groups
36
# 3 current version, described here
37
# then the version of the unix data, and other counts:
38
# | 3 # format version number of this file
39
# | 1999 # (tzdata1999j) version of Olson zone
40
# | 10 # data from ftp://elsie.nci.nih.gov
41
# | 402 # total zone count
42
# | 40 # maximum zones per offset (used by gentz)
43
# - Lists start with a count of the records to follow, the records
44
# themselves (one per line), and a single line with the keyword
46
# - The first list is the name table:
47
# | 387 # count of names to follow
52
# Each name is terminated by a newline (like all lines in the file).
53
# The zone numbers in other lists refer to this table. The
54
# integer that precedes the name is an index into the equivalency
55
# table, with the first table entry being entry 0.
56
# - The second list is the equivalency table. It lists, in sorted
57
# order, the equivalency groups. Each group represents a
58
# set of one or more zones that have the same GMT offset and the
59
# same rules. While there are about 400 zones, there are less than
60
# 120 equivalency groups (as of this writing).
61
# | 120 # count of equivalency groups to follow
62
# | s,0,1,0 # GMT+0:00
63
# | d,0,8,1,0,0,w,11,31,0,0,w,20,4,15,16,17,18 # GMT+0:00 Sep 1...
66
# Entries start with 's' for standard zones, or 'd' for DST zones.
67
# Both zone descriptors start with the GMT offset in SECONDS. DST
68
# zones contain, in addition, data for the onset rule and the cease
69
# rule. Each rule is described by the following integers:
71
# dowim } These two values are in SimpleTimeZone encoded
72
# dow } format for DOM, DOWIM, DOW>=DOM, or DOW<=DOM.
74
# time mode ('w', 's', 'u')
75
# The last rule integer in the record is the DST savings in MINUTES,
78
# After either a standard or a DST zone, there is a list of the
79
# members of the equivalency group. This consists of a number of
80
# entries to follow (>=1), then the zone numbers themselves.
81
# - The third list is an index by GMT offset. Each line lists the
82
# zones with the same offset. The first number on the line is the
83
# GMT offset in seconds. The second number is the default zone
84
# number in the following list, taken from tz.default. The list
85
# consists of a number of entries to follow (>=1), then the zone
87
# | 39 # index by offset entries to follow
88
# | -43200,280,1,280 # -12:00 d=Etc/GMT+12 Etc/GMT+12
89
# | -39600,374,6,279,366,374,394,396,399 # -11:00 d=Pacific/Apia Etc/GMT+11 MIT Pacific/Apia Pacific/Midway Pacific/Niue Pacific/Pago_Pago
92
# - The fourth list is an index by ISO 3166 country code. Each line
93
# lists a country and the zones mapped into that country by the
94
# zone.tab file. Zones not mapped into any file are listed on the
95
# first line. The first number on each line is the intcode for the
96
# country code. The intcode for 'US' for example is ('U'-'A') * 32
97
# + ('S' - 'A') == 658. The second number is the count of list
98
# items, and the following number are the zone indices.
99
# | 238 # index by country entries to follow
100
# | 0,38,230,231,232,276,282,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,364,380,429,431 # (None) Asia/Riyadh87 Asia/Riyadh88 Asia/Riyadh89 CET EET Etc/GMT Etc/GMT+1 Etc/GMT+10 Etc/GMT+11 Etc/GMT+12 Etc/GMT+2 Etc/GMT+3 Etc/GMT+4 Etc/GMT+5 Etc/GMT+6 Etc/GMT+7 Etc/GMT+8 Etc/GMT+9 Etc/GMT-1 Etc/GMT-10 Etc/GMT-11 Etc/GMT-12 Etc/GMT-13 Etc/GMT-14 Etc/GMT-2 Etc/GMT-3 Etc/GMT-4 Etc/GMT-5 Etc/GMT-6 Etc/GMT-7 Etc/GMT-8 Etc/GMT-9 Etc/UCT Etc/UTC GMT MET UTC WET
101
# | 3,1,314 # AD (Andorra) Europe/Andorra
102
# | 4,1,199 # AE (United Arab Emirates) Asia/Dubai
104
# | 822,2,28,275 # ZW (Zimbabwe) Africa/Harare CAT
106
######################################################################
107
# As of 1999j, here are the various possible values taken by the
108
# rule fields. See code below that generates this data.
110
# at: 0:00, 0:00s, 1:00, 1:00s, 1:00u, 23:00s, 2:00, 2:00s, 2:30, 2:45s,
112
# in: Apr, Dec, Feb, Jan, Jun, Mar, May, Nov, Oct, Sep
113
# letter: -, D, GHST, GMT, HS, S, SLST
114
# on: 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1,
115
# Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15,
116
# Sun>=16, Sun>=23, Sun>=8, Sun>=9, lastFri, lastSun, lastThu
117
# save: 0, 0:20, 0:30, 1:00
120
require 5; # Minimum version of perl needed
123
use vars qw(@FILES $YEAR $DATA_DIR $OUT $SEP @MONTH
124
$VERSION_YEAR $VERSION_SUFFIX $RAW_VERSION
125
$TZ_ALIAS $TZ_DEFAULT $URL $TXT_FILE $HTML_FILE $JAVA_FILE
126
$TZ_TXT_VERSION %ZONE_ID_TO_INDEX $END_MARKER
128
require 'dumpvar.pl';
132
# Current version of the data file. Matches formatVersion[0] in the
133
# binary data file. SEE tzdat.h
134
# 1 - unreleased version (?)
135
# 2 - original version
136
# 3 - added equivalency groups
137
# 4 - added country code index
141
$TZ_ALIAS = 'tz.alias';
142
$TZ_DEFAULT = 'tz.default';
145
$URL = "ftp://elsie.nci.nih.gov/pub";
147
# Separator between fields in the output file
148
$SEP = ','; # Don't use ':'!
150
# Marker between sections
168
# We get the current year from the system here. Later
169
# we double check this against the zone data version.
170
$YEAR = 1900+@{[localtime]}[5]; # Get the current year
174
if (!$DATA_DIR || ! -d $DATA_DIR) {
175
print STDERR "No data directory or invalid directory specified\n\n";
186
print STDERR "Error: Multiple java files specified\n";
190
} elsif (/\.html?$/i) {
192
print STDERR "Error: Multiple html files specified\n";
196
} elsif (/\.txt$/i) {
198
print STDERR "Error: Multiple txt files specified\n";
203
print STDERR "Error: Unexpected command line parameter \"$_\"\n";
208
if (!($TXT_FILE || $JAVA_FILE || $HTML_FILE)) {
209
print STDERR "Nothing to do! Please specify one or more output files.\n";
213
if ($DATA_DIR =~ /(tzdata(\d{4})(\w?))/) {
216
$VERSION_SUFFIX = $3;
217
if ($YEAR != $VERSION_YEAR) {
218
print STDERR "WARNING: You appear to be building $VERSION_YEAR data. Don't you want to use current $YEAR data?\n\n";
219
#usage(); # Add an override option for this check, if needed
221
$VERSION_SUFFIX =~ tr/a-z/A-Z/;
222
if ($VERSION_SUFFIX =~ /[A-Z]/) {
223
$VERSION_SUFFIX = ord($VERSION_SUFFIX) - ord('A') + 1;
225
if ($VERSION_SUFFIX) {
226
print STDERR "Warning: Ignoring version suffix '$VERSION_SUFFIX' for \"$DATA_DIR\"\n";
230
print "Time zone version $RAW_VERSION = $VERSION_YEAR($VERSION_SUFFIX)\n";
232
print STDERR "The directory specified doesn't contain \"tzdataNNNNR\", so I can't tell what version the data is. Please rename the directory and try again.\n";
236
@MONTH = qw(jan feb mar apr may jun
237
jul aug sep oct nov dec);
243
print STDERR "Usage: $0 data_dir [txt_out] [html_out] [java_out]\n\n";
244
print STDERR " data_dir contains the unpacked files from\n";
245
print STDERR " $URL/tzdataYYYYR,\n";
246
print STDERR " where YYYY is the year and R is the revision\n";
247
print STDERR " letter.\n";
249
print STDERR " Files that are expected to be present are:\n";
250
print STDERR " ", join(", ", @FILES), "\n";
252
print STDERR " [txt_out] optional name of .txt file to output\n";
253
print STDERR " [html_out] optional name of .htm|.html file to output\n";
254
print STDERR " [java_out] optional name of .java file to output\n";
259
my (%ZONES, %RULES, @EQUIV, %LINKS, %COUNTRIES);
263
if (! -e "$DATA_DIR/$_") {
264
print STDERR "\nMissing file $DATA_DIR/$_\n\n";
268
TZ::ParseFile("$DATA_DIR/$_", \%ZONES, \%RULES, \%LINKS, $YEAR);
272
# Add country data from zone.tab
273
TZ::ParseZoneTab("$DATA_DIR/zone.tab", \%ZONES, \%LINKS);
275
# We'll also read the iso3166.tab file here. We don't really need
276
# this except for documentation purposes (in generated files)
277
# and for the HTML file.
279
open(FILE, "$DATA_DIR/iso3166.tab") or die "Can't open $DATA_DIR/iso3166.tab";
284
if (/^([A-Z]{2})\s+(\S.*)/) {
285
$COUNTRY_CODES{$1} = $2; # Map from code to country name
287
print STDERR "Ignoring $DATA_DIR/iso3166.tab line: $_";
292
TZ::Postprocess(\%ZONES, \%RULES);
294
my $aliases = incorporateAliases($TZ_ALIAS, \%ZONES, \%LINKS);
297
"Read ", scalar keys %ZONES, " current zones and ",
298
scalar keys %RULES, " rules for $YEAR\n";
300
# Make sure we have a zone named GMT from either the
301
# UNIX data or the alias table. If not, add one.
302
if (!exists $ZONES{GMT}) {
303
print "Adding GMT zone\n";
304
my %GMT = ('format' => 'GMT',
306
'rule' => $TZ::STANDARD,
312
foreach my $z (keys %ZONES) {
313
# Make sure zone IDs only contain invariant chars
314
assertInvariantChars($z);
317
# Create the offset index table, that includes the zones
318
# for each offset and the default zone for each offset.
319
# This is a hash{$name -> array ref}. Element [0] of
320
# the array is the default name. Elements [1..n] are the
321
# zones for the offset, in sorted order, including the default.
322
my $offsetIndex = createOffsetIndex(\%ZONES, $TZ_DEFAULT);
324
# Group zones into equivalency groups
325
TZ::FormZoneEquivalencyGroups(\%ZONES, \%RULES, \@EQUIV);
327
"Equivalency groups (including unique zones): ",
330
# Sort equivalency table first by GMT offset, then by
331
# alphabetic order of encoded rule string.
332
@EQUIV = sort { my $x = $ZONES{$a->[0]};
333
my $y = $ZONES{$b->[0]};
334
TZ::ParseOffset($x->{gmtoff}) <=>
335
TZ::ParseOffset($y->{gmtoff}) ||
336
TZ::ZoneCompare($x, $y, \%RULES); } @EQUIV;
338
# Sort the zones in each equivalency table entry
339
foreach my $eg (@EQUIV) {
340
next unless (@$eg > 1); # Skip single-zone entries
341
my @zoneList = sort @$eg;
345
# Create an index from zone ID to index #
347
foreach my $z (sort keys %ZONES) {
348
$ZONE_ID_TO_INDEX{$z} = $i++;
351
# Create the country -> zone array hash
352
# This hash has the form:
353
# $COUNTRIES{'US'}->{zones}->[13] == "America/Los_Angeles"
354
# $COUNTRIES{'US'}->{intcode} == 658
356
# Some zones are not affiliated with any country (e.g., UTC). We
357
# use a fake country code for these, chosen to precede any real
358
# country code. 'A' or 'AA' work.
360
foreach (sort keys %ZONES) {
361
my $country = $ZONES{$_}->{country};
362
$country = $NONE unless ($country);
363
push @{$COUNTRIES{$country}->{zones}}, $_;
365
foreach my $country (keys %COUNTRIES) {
366
# Compute the int code, which is just a numerical
367
# rep. of the two letters. Use 0 to represent no
368
# country; this MUST BE CHANGED if AA ever becomes
369
# a valid country code.
371
if ($country ne $NONE) {
372
if ($country =~ /^([A-Z])([A-Z])$/) {
373
$intcode = ((ord($1) - ord('A')) << 5) |
374
(ord($2) - ord('A'));
376
die "Can't parse country code $country";
379
$COUNTRIES{$country}->{intcode} = $intcode;
384
emitText($TXT_FILE, \%ZONES, \%RULES, \@EQUIV, $offsetIndex, $aliases,
386
print "$TXT_FILE written.\n";
391
emitJava($JAVA_FILE, \%ZONES, \%RULES, \@EQUIV, $offsetIndex, $aliases,
393
print "$JAVA_FILE written.\n";
398
emitHTML($HTML_FILE, \%ZONES, \%RULES, \@EQUIV, $offsetIndex, $aliases,
400
print "$HTML_FILE written.\n";
403
#::dumpValue($ZONES{"America/Los_Angeles"});
404
#::dumpValue($RULES{"US"});
405
#::dumpValue($RULES{"Tonga"});
407
# Find all the different values of rule fields:
408
# in, at, on, save, type, letter
411
foreach my $ruleName (keys %RULES) {
412
for (my $i=0; $i<2; ++$i) {
413
foreach my $key (qw(in on at save type letter)) {
414
if (@{$RULES{$ruleName}} < 2) {
415
print $ruleName, ":";
416
::dumpValue($RULES{$ruleName});
418
my $x = $RULES{$ruleName}->[$i]->{$key};
419
$RULEVALS{$key}->{$x} = 1;
423
foreach my $key (sort keys %RULEVALS) {
424
print "$key: ", join(", ", sort keys %{$RULEVALS{$key}}), "\n";
429
# Create an index of all the zones by GMT offset. This index will
430
# list the zones for each offset and also the default zone for that
433
# Param: Ref to zone table
434
# Param: Name of default file
436
# Return: ref to hash; the hash has offset integers as keys and arrays
437
# of zone names as values. If there are n zone names at an offset,
438
# the array contains n+1 items. The first item, [0], is the default
439
# zone. Items [1..n] are the zones sorted lexically. Thus the
440
# default appears twice, once in slot [0], and once somewhere in
442
sub createOffsetIndex {
444
my $defaultFile = shift;
446
# Create an index by gmtoff.
448
foreach (sort keys %{$zones}) {
449
my $offset = TZ::ParseOffset($zones->{$_}->{gmtoff});
450
push @{$offsetMap{$offset}}, $_;
453
# Select defaults. We do this by reading the file $defaultFile.
454
# If there are multiple errors, we want to report them all,
455
# so we set a flag and die at the end if there are problems.
456
my %defaults; # key=offset integer, value=zone name
458
open(IN, $defaultFile) or die "Can't open $defaultFile: $!";
461
s/\#.*//; # Trim comments
462
next unless (/\S/); # Skip blank lines
463
if (/^\s*(\S+)\s*$/) {
465
if (! exists $zones->{$z}) {
466
print "Error: Nonexistent zone $z listed in $defaultFile line: $raw";
470
my $offset = TZ::ParseOffset($zones->{$z}->{gmtoff});
471
if (exists $defaults{$offset}) {
473
"Error: Offset ", formatOffset($offset), " has both ",
474
$defaults{$offset}, " and ", $z,
475
" specified as defaults\n";
479
$defaults{$offset} = $z;
481
print "Error: Can't parse line in $defaultFile: $raw";
486
die "Error: Aborting due to errors in $defaultFile\n" unless ($ok);
487
print "Incorporated ", scalar keys %defaults, " defaults from $defaultFile\n";
489
# Go through and record the default for each GMT offset, and unshift
491
# Fill in the blanks, since the default table will typically
492
# not list a default for every single offset.
494
foreach my $gmtoff (keys %offsetMap) {
495
my $aref = $offsetMap{$gmtoff};
497
if (exists $defaults{$gmtoff}) {
498
$def = $defaults{$gmtoff};
500
# If there is an offset for which we have no listed default
501
# in $defaultFile, we try to figure out a reasonable default
502
# ourselves. We ignore any zone named Etc/ because that's not
503
# a "real" zone; it's just one listed as a POSIX convience.
504
# We take the first (alphabetically) zone of what's left,
505
# and if there are more than one of those, we emit a warning.
508
# Ignore zones named Etc/ and take the first one we otherwise see;
509
# if there is more than one of those, emit a warning.
510
foreach (sort @{$aref}) {
518
$def = $aref->[0] unless ($def);
522
"Warning: No default for GMT", formatOffset($gmtoff),
523
", using ", $def, "\n";
526
# Push $def onto front of list
527
unshift @{$aref}, $def;
529
print "Defaults may be specified in $TZ_DEFAULT\n" if ($missing);
534
# Given a zone and an offset index, return the gmtoff if the name
535
# is a default zone, otherwise return ''.
537
# Param: zone offset, as a string (that is, raw {gmtoff})
538
# Param: ref to offset index hash
542
my $offsetIndex = shift;
543
my $aref = $offsetIndex->{TZ::ParseOffset($offset)};
544
return ($aref->[0] eq $name);
547
# Emit a text file that contains data for the system time zones.
549
# Param: ref to zone hash
550
# Param: ref to rule hash
551
# Param: ref to equiv table
552
# Param: ref to offset index
553
# Param: ref to alias hash
559
my $offsetIndex = shift;
561
my $countries = shift;
563
# Find the maximum number of zones with the same value of
565
my %perOffset; # Hash of offset -> count
566
foreach my $z (keys %$zones) {
567
# Use TZ::ParseOffset to normalize values - probably unnecessary
568
++$perOffset{TZ::ParseOffset($zones->{$z}->{gmtoff})};
570
my $maxPerOffset = 0;
571
foreach (values %perOffset) {
572
$maxPerOffset = $_ if ($_ > $maxPerOffset);
575
# Count maximum number of zones per equivalency group
577
foreach my $eg (@$equiv) {
578
$maxPerEquiv = @$eg if (@$eg > $maxPerEquiv);
581
# Count total name size
583
foreach my $z (keys %$zones) {
584
$name_size += 1 + length($z);
588
open(OUT,">$file") or die "Can't open $file for writing: $!";
590
############################################################
592
############################################################
594
print OUT "#--- Header --- Generated by tz.pl\n";
595
print OUT $TZ_TXT_VERSION, " # format version number of this file\n";
596
print OUT $VERSION_YEAR, " # ($RAW_VERSION) version of Olson zone\n";
597
print OUT $VERSION_SUFFIX, " # data from $URL\n";
598
print OUT scalar keys %$zones, " # total zone count\n";
599
# The following counts are all used by gentz during its parse
600
# of the tz.txt file and creation of the tz.dat file, even
601
# if they don't show up in the tz.dat file header. For example,
602
# gentz needs the maxPerOffset to preallocate the offset index
603
# entries. It needs the $name_size to allocate the big buffer
604
# that will receive all the names.
605
print OUT scalar @$equiv, " # equivalency groups count\n";
606
print OUT $maxPerOffset, " # max zones with same gmtOffset\n";
607
print OUT $maxPerEquiv, " # max zones in an equivalency group\n";
608
print OUT $name_size, " # length of name table in bytes\n";
609
print OUT $END_MARKER, "\n\n";
611
############################################################
613
############################################################
614
# Output the name table, followed by 'end' keyword
615
print OUT "#--- Zone table ---\n";
616
print OUT "#| equiv_index,name\n";
617
print OUT scalar keys %$zones, " # count of zones to follow\n";
619
# IMPORTANT: This sort must correspond to the sort
620
# order of UnicodeString::compare. That
621
# is, it must be a plain sort.
622
foreach my $z (sort keys %$zones) {
623
# Make sure zone IDs only contain invariant chars
624
assertInvariantChars($z);
626
print OUT equivIndexOf($z, $equiv), ',', $z, "\n";
628
print OUT $END_MARKER, "\n\n";
630
############################################################
631
# EMIT EQUIVALENCY TABLE
632
############################################################
633
print OUT "#--- Equivalency table ---\n";
634
print OUT "#| ('s'|'d'),zone_spec,id_count,id_list\n";
635
print OUT scalar @$equiv, " # count of equivalency groups to follow\n";
637
foreach my $aref (@$equiv) {
638
# $aref is an array ref; the array is full of zone IDs
639
# Use the ID of the first array element
642
# Output either 's' or 'd' to indicate standard or DST
643
my $isStd = ($zones->{$z}->{rule} eq $TZ::STANDARD);
644
print OUT $isStd ? 's,' : 'd,';
647
my ($spec, $notes) = formatZone($z, $zones->{$z}, $rules);
649
# Now add the equivalency list
650
push @$spec, scalar @$aref;
653
foreach $z (@$aref) {
654
my $index = $ZONE_ID_TO_INDEX{$z};
655
# Make sure they are in order
656
die("Unsorted equiv table indices") if ($index <= $min);
663
unshift @$notes, $i++; # Insert index of this group at front
664
print OUT join($SEP, @$spec) . " # " . join(' ', @$notes), "\n";
666
print OUT $END_MARKER, "\n\n";
668
############################################################
669
# EMIT INDEX BY GMT OFFSET
670
############################################################
671
# Create a hash mapping zone name -> integer, from 0..n-1.
672
# Create an array mapping zone number -> name.
676
foreach (sort keys %$zones) {
678
$zoneNumber{$_} = $i++;
682
print OUT "#--- Offset INDEX ---\n";
683
print OUT "#| gmt_offset,default_id,id_count,id_list\n";
684
print OUT scalar keys %{$offsetIndex}, " # index by offset entries to follow\n";
685
foreach (sort {$a <=> $b} keys %{$offsetIndex}) {
686
my $aref = $offsetIndex->{$_};
687
my $def = $aref->[0];
688
# Make a slice of 1..n
689
my @b = @{$aref}[1..$#{$aref}];
691
$_, ",", $zoneNumber{$def}, ",",
693
join(",", map($zoneNumber{$_}, @b)),
694
" # ", formatOffset($_), " d=", $def, " ",
698
print OUT $END_MARKER, "\n\n";
700
############################################################
701
# EMIT INDEX BY COUNTRY
702
############################################################
703
print OUT "#--- Country INDEX ---\n";
704
print OUT "#| country_int_code,id_count,id_list\n";
705
print OUT scalar keys %$countries, " # index by country entries to follow\n";
706
foreach my $country (sort keys %$countries) {
707
my $intcode = $countries->{$country}->{intcode};
708
my $aref = $countries->{$country}->{zones};
710
$intcode, ",", scalar @$aref, ",",
711
join(",", map($zoneNumber{$_}, @$aref)), " # ",
712
($intcode ? ($country . " (" . $COUNTRY_CODES{$country} . ") ") : "(None) "),
713
join(" ", @$aref), "\n";
716
print OUT $END_MARKER, "\n";
718
############################################################
720
############################################################
724
# Emit a Java file that contains data for the system time zones.
726
# Param: ref to zone hash
727
# Param: ref to rule hash
728
# Param: ref to equiv table
729
# Param: ref to offset index
730
# Param: ref to alias hash
736
my $offsetIndex = shift;
738
my $countries = shift;
742
#############################################################
745
foreach my $z (sort keys %$zones) {
746
$_IDS .= "$_indent\"$z\",\n";
749
#############################################################
751
# - While we output this, keep track of a mapping from equivalency table ID
752
# (a value from, e.g., 0..114) to equivalency int[] array index (e.g.,
753
# 0, 15, 30, 34, etc.).
756
my %equiv_id_to_index;
759
foreach my $aref (@$equiv) {
760
$equiv_id_to_index{$i} = $index;
762
# $aref is an array ref; the array is full of zone IDs
763
# Use the ID of the first array element
766
$_DATA .= $_indent; # Indent
768
# Output either 's' or 'd' to indicate standard or DST
769
my $isStd = ($zones->{$z}->{rule} eq $TZ::STANDARD);
770
$_DATA .= $isStd ? '0/*s*/,' : '1/*d*/,';
773
my ($spec, $notes) = formatZone($z, $zones->{$z}, $rules);
775
# Now add the equivalency list
776
push @$spec, scalar @$aref;
779
foreach $z (@$aref) {
780
my $index = $ZONE_ID_TO_INDEX{$z};
781
# Make sure they are in order
782
die("Unsorted equiv table indices") if ($index <= $min);
789
unshift @$notes, $i++; # Insert index of this group at front
791
# Convert to Java constants:
792
# 'w' -> 0, 's' -> 1, 'u' -> 2
803
$_DATA .= join($SEP, @$spec) . ", // " . join(' ', @$notes) . "\n";
804
$index += (scalar @$spec) + 1; # +1 for s/d
807
#############################################################
808
# Zone->Equivalency mapping
810
foreach my $z (sort keys %$zones) {
813
$equiv_id_to_index{equivIndexOf($z, $equiv)} .
817
#############################################################
819
# Create a hash mapping zone name -> integer, from 0..n-1.
820
# Create an array mapping zone number -> name.
821
my $_INDEX_BY_OFFSET;
825
foreach (sort keys %$zones) {
827
$zoneNumber{$_} = $i++;
830
foreach (sort {$a <=> $b} keys %{$offsetIndex}) {
831
my $aref = $offsetIndex->{$_};
832
my $def = $aref->[0];
833
# Make a slice of 1..n
834
my @b = @{$aref}[1..$#{$aref}];
836
$_indent . $_ . "," . $zoneNumber{$def} . "," .
838
join(",", map($zoneNumber{$_}, @b)) .
839
", // " . formatOffset($_) . " d=" . $def . " " .
840
join(" ", @b) . "\n";
843
############################################################
845
my $_INDEX_BY_COUNTRY;
846
foreach my $country (sort keys %$countries) {
847
my $intcode = $countries->{$country}->{intcode};
848
my $aref = $countries->{$country}->{zones};
849
# Emit int code (n1*32 + n0), #of zones,
851
$_INDEX_BY_COUNTRY .=
852
$_indent . $intcode . ", " .
853
scalar(@$aref) . ", " .
854
join(", ", map($zoneNumber{$_}, @$aref)) . ", // " .
855
($intcode ? ($country . " (" . $COUNTRY_CODES{$country} . ")") : "(None)") . ": " .
860
############################################################
861
# BEGIN JAVA TEMPLATE
862
############################################################
864
// Instructions: Build against icu4j. Run and save output.
865
// Paste output into icu4j/src/com/ibm/util/TimeZoneData.java
866
import com.ibm.util.Utility;
867
import java.util.Date;
869
public static void main(String[] args) {
870
System.out.println(" // BEGIN GENERATED SOURCE CODE");
871
System.out.println(" // Date: " + new Date());
872
System.out.println(" // Version: $RAW_VERSION from $URL");
873
System.out.println(" // Tool: icu/source/tools/gentz");
874
System.out.println(" // See: icu/source/tools/gentz/readme.txt");
875
System.out.println(" // DO NOT EDIT THIS SECTION");
876
System.out.println();
878
System.out.println(" /**");
879
System.out.println(" * Array of IDs in lexicographic order. The INDEX_BY_OFFSET and DATA");
880
System.out.println(" * arrays refer to zones using indices into this array. To map from ID");
881
System.out.println(" * to equivalency group, use the INDEX_BY_NAME Hashtable.");
882
System.out.println(" * >> GENERATED DATA: DO NOT EDIT <<");
883
System.out.println(" */");
884
System.out.println(" static final String[] IDS = {");
885
for (int i=0;i<IDS.length;++i) {
886
System.out.println(" \\\"" + IDS[i] + "\\\",");
888
System.out.println(" };\\n");
890
System.out.println(" /**");
891
System.out.println(" * RLE encoded form of DATA.");
892
System.out.println(" * \@see com.ibm.util.Utility.RLEStringToIntArray");
893
System.out.println(" * >> GENERATED DATA: DO NOT EDIT <<");
894
System.out.println(" */");
895
System.out.println(" static final String DATA_RLE =");
896
System.out.println(Utility.formatForSource(Utility.arrayToRLEString(DATA)));
897
System.out.println(" ;\\n");
899
System.out.println(" /**");
900
System.out.println(" * RLE encoded form of INDEX_BY_NAME_ARRAY.");
901
System.out.println(" * \@see com.ibm.util.Utility.RLEStringToIntArray");
902
System.out.println(" * >> GENERATED DATA: DO NOT EDIT <<");
903
System.out.println(" */");
904
System.out.println(" static final String INDEX_BY_NAME_ARRAY_RLE =");
905
System.out.println(Utility.formatForSource(Utility.arrayToRLEString(INDEX_BY_NAME_ARRAY)));
906
System.out.println(" ;\\n");
908
System.out.println(" /**");
909
System.out.println(" * RLE encoded form of INDEX_BY_OFFSET.");
910
System.out.println(" * \@see com.ibm.util.Utility.RLEStringToIntArray");
911
System.out.println(" * >> GENERATED DATA: DO NOT EDIT <<");
912
System.out.println(" */");
913
System.out.println(" static final String INDEX_BY_OFFSET_RLE =");
914
System.out.println(Utility.formatForSource(Utility.arrayToRLEString(INDEX_BY_OFFSET)));
915
System.out.println(" ;\\n");
917
System.out.println(" /**");
918
System.out.println(" * RLE encoded form of INDEX_BY_COUNTRY.");
919
System.out.println(" * \@see com.ibm.util.Utility.RLEStringToIntArray");
920
System.out.println(" * >> GENERATED DATA: DO NOT EDIT <<");
921
System.out.println(" */");
922
System.out.println(" static final String INDEX_BY_COUNTRY_RLE =");
923
System.out.println(Utility.formatForSource(Utility.arrayToRLEString(INDEX_BY_COUNTRY)));
924
System.out.println(" ;\\n");
926
System.out.println(" // END GENERATED SOURCE CODE");
929
static final String[] IDS = {
933
static final int[] DATA = {
937
static final int[] INDEX_BY_NAME_ARRAY = {
941
static final int[] INDEX_BY_OFFSET = {
942
// gmt_offset,default_id,id_count,id_list
946
static final int[] INDEX_BY_COUNTRY = {
951
############################################################
953
############################################################
955
open(OUT, ">$file") or die "Can't open $file for writing: $!";
960
# Emit an HTML file that contains a description of the system zones.
962
# Param: ref to zone hash
963
# Param: ref to rule hash
964
# Param: ref to equiv table
965
# Param: ref to offset index
966
# Param: ref to alias hash
972
my $offsetIndex = shift;
974
my $countries = shift;
976
# These are variables for the template
977
my $_count = scalar keys %{$zones};
978
my $_equiv = scalar @$equiv;
980
# Build table in order of zone offset
981
my $_offsetTable = "<p><table>\n";
982
foreach (sort {$a <=> $b} keys %{$offsetIndex}) {
983
my $aref = $offsetIndex->{$_};
984
my $def = $aref->[0];
985
# Make a slice of 1..n
986
my @b = @{$aref}[1..$#{$aref}];
987
my $gmtoff = "GMT" . formatOffset($_);
990
"<td><a name=\"" . bookmark($gmtoff) . "\">$gmtoff</a></td>" .
992
join(", ", map($_ eq $def ?
993
"<a href=\"#" . bookmark($_) . "\"><b>$_</b></a>" :
994
"<a href=\"#" . bookmark($_) . "\">$_</a>", @b)) .
998
$_offsetTable .= "</table>\n";
1000
# Build table in alphabetical order of zone name
1001
my $_nameTable = "<p><table>\n";
1002
$_nameTable .= "<tr><td>ID</td>";
1003
$_nameTable .= "<td>Offset</td><td>DST Begins</td><td>DST Ends</td>";
1004
$_nameTable .= "<td>Savings</td><td></td></tr>\n";
1006
$_nameTable .= "<tr><td><hr></td>";
1007
$_nameTable .= "<td><hr></td><td><hr></td>";
1008
$_nameTable .= "<td><hr></td><td><hr></td><td></td></tr>\n";
1009
# Need a reverse alias table
1010
my %revaliases = reverse(%$aliases);
1011
foreach my $z (sort keys %$zones) {
1012
$_nameTable .= emitHTMLZone($z, $zones->{$z}, $rules, $offsetIndex,
1013
$aliases, \%revaliases);
1015
$_nameTable .= "</table>\n";
1017
# Build equivalency group table
1018
my $_equivTable = "<p><table>\n";
1019
$_equivTable .= "<tr><td>Offset</td><td>DST Begins</td><td>DST Ends</td>";
1020
$_equivTable .= "<td>Savings</td><td>Zones</td></tr>\n";
1022
$_equivTable .= "<tr><td><hr></td>";
1023
$_equivTable .= "<td><hr></td><td><hr></td>";
1024
$_equivTable .= "<td><hr></td><td><hr></td><td><hr></td></tr>\n";
1026
# Equiv table is sorted elsewhere -- output it in native order
1027
foreach my $eg (@$equiv) {
1028
$_equivTable .= emitHTMLEquiv($eg, $zones, $rules);
1030
$_equivTable .= "</table>\n";
1032
# Build country table
1034
$_countryTable .= "<p><table>\n";
1035
$_countryTable .= "<tr><td>Country</td><td>Zones</td></tr>\n";
1036
$_countryTable .= "<tr><td><hr></td><td><hr></td></tr>\n";
1038
foreach my $country (sort keys %$countries) {
1040
"<tr valign=top><td nowrap>" .
1041
(($country ne 'A') ? ($country . " (" . $COUNTRY_CODES{$country} . ")") : "(None)") .
1043
join(", ", map("<a href=\"#" . bookmark($_) . "\">$_</a>", @{$countries->{$country}->{zones}})) .
1044
#join(", ", @{$countries->{$country}->{zones}}) .
1047
$_countryTable .= "</table>\n";
1050
my $_timeStamp = localtime;
1052
############################################################
1053
# BEGIN HTML TEMPLATE
1054
############################################################
1059
<title>ICU System Time Zones</title>
1064
<h1>ICU System Time Zones</h1>
1069
<td><strong>$RAW_VERSION</strong> ($VERSION_YEAR.$VERSION_SUFFIX)</td>
1072
<td>Total zone count</td>
1073
<td><strong>$_count</strong> in <strong>$_equiv</strong> equivalency groups</td>
1076
<td>Original source</td>
1077
<td><strong><a href="$URL">$URL</a></strong></td>
1081
<td><strong>Alan Liu <a href="mailto:liuas\@us.ibm.com"><liuas\@us.ibm.com></a></strong></td>
1084
<td>This document generated</td>
1085
<td><strong>$_timeStamp</strong></td>
1091
<p>A time zone represents an offset applied to Greenwich Mean Time
1092
(GMT) to obtain local time. The offset may vary throughout the year,
1093
if daylight savings time (DST) is used, or may be the same all year
1094
long. Typically, regions closer to the equator do not use DST. If DST
1095
is in use, then specific rules define the point at which the offset
1096
changes, and the amount by which it changes. Thus, a time zone is
1097
described by the following information:
1100
<li><a name="cols">An</a> identifying string, or ID. This consists only of invariant characters (see the file <code>utypes.h</code>).
1101
It typically has the format <em>continent</em> / <em>city</em>. The city chosen is
1102
not the only city in which the zone applies, but rather a representative city for the
1103
region. Some IDs consist of three or four uppercase letters; these are legacy zone
1104
names that are aliases to standard zone names.</li>
1105
<li>An offset from GMT, either positive or negative. Offsets range from approximately minus
1106
half a day to plus half a day.</li>
1109
<p>If DST is observed, then three additional pieces of information are needed:
1112
<li>The precise date and time during the year when DST begins. This is in the first
1113
half of the year in the northern hemisphere, and in the second half of the year in the
1114
southern hemisphere.</li>
1115
<li>The precise date and time during the year when DST ends. This is in the first half
1116
of the year in the southern hemisphere, and in the second half of the year in the northern
1118
<li>The amount by which the GMT offset changes when DST is in effect. This is almost
1119
always one hour.</li>
1122
<h3>System and User Time Zones</h3>
1124
<p>ICU supports local time zones through the classes
1125
<code>TimeZone</code> and <code>SimpleTimeZone</code> in the C++
1126
API. In the C API, time zones are designated by their ID strings.</p>
1128
<p>Users may construct their own time zone objects by specifying the
1129
above information to the C++ API. However, it is more typical for
1130
users to use a pre-existing system time zone, since these represent
1131
all current international time zones in use. This document lists the
1132
system time zones, both in order of GMT offset, and in alphabetical
1135
<p>Since this list changes one or more times a year, <em>this document
1136
only represents a snapshot</em>. For the current list of ICU system
1137
zones, use the method <code>TimeZone::getAvailableIDs()</code>.</p>
1141
<p><a name="order">The</a> zones are listed in binary sort order. That is, 'A' through
1142
'Z' come before 'a' through 'z'. This is the same order in which the
1143
zones are stored internally, and the same order in which they are
1144
returned by <code>TimeZone::getAvailableIDs()</code>. The reason for
1145
this is that ICU locates zones using a binary search, and the binary
1146
search relies on this sort order.</p>
1148
<p>You may notice that zones such as <a href="#EtcGMTp1">Etc/GMT+1</a>
1149
appear to have the wrong sign for their GMT offset. In fact, their
1150
sign is inverted because the the Etc zones follow the POSIX sign
1151
conventions. This is the way the original Olson data is set up, and
1152
ICU reproduces the Olson data faithfully, including this confusing
1153
aspect. See the Olson files for more details.
1157
<p>The ICU system time zones are derived from the Olson data at <a
1158
href="$URL">$URL</a>. This is the data used by UNIX systems and is
1159
updated one or more times each year. Unlike the Olson zone data, ICU
1160
only contains data for current zone usage. There is no support for
1161
historical zone data in ICU at this time.</p>
1165
<h2>Time Zones in order of GMT offset</h2>
1167
<p>Zone listed in <strong>bold</strong> are the default zone for a
1168
given GMT offset. This default is used by ICU if it cannot identify
1169
the host OS time zone by name. In that case, it uses the default zone
1170
for the host zone offset.</p>
1175
<h2>Time Zones in order of ID</h2>
1177
<p>Zone listed in <strong>bold</strong> are the default zone for their
1178
GMT offset. This default is used by ICU if it cannot identify the host
1179
OS time zone by name. In that case, it uses the default zone for the
1180
host zone offset. See above for a description of <a
1181
href="#cols">columns</a>. See note above for an explanation of the
1182
sort <a href="#order">order</a>.</p>
1184
<p>Times suffixed with 's' are in standard time. Times suffixed with 'u' are in UTC time.
1185
Times without suffixes are in wall time (that is, either standard time or daylight savings
1186
time, depending on which is in effect).</p>
1191
<h2>Time Zone Equivalency Groups</h2>
1193
<p>ICU groups zones into <em>equivalency groups</em>. These are
1194
groups of zones that are identical in GMT offset and in rules, but
1195
that have different IDs. Knowledge of equivalency groups allows ICU
1196
to reduce the amount of data stored. More importantly, it allows ICU
1197
to apply data for one zone to other equivalent zones when appropriate
1198
(e.g., in formatting). Equivalency groups are formed at build time,
1199
not at runtime, so the runtime cost to lookup the equivalency group of
1200
a given zone is negligible.</p>
1205
<h2>Time Zones by Country</h2>
1207
<p>ICU captures and exports the country data from the Olson database.
1208
The country code is the ISO 3166 two-letter code. Some zones have no
1209
associated country; these are listed under the entry "(None)".
1215
############################################################
1217
############################################################
1219
open(HTML, ">$file") or die "Can't open $file for writing: $!";
1224
# Make a bookmark name out of a string. This just means normalizing
1225
# non-word characters.
1234
# Emit an equivalency group as an HTML table row. Return the string.
1235
# Param: ref to array of zone IDs
1236
# Param: ref to zone hash
1237
# Param: ref to rule hash
1242
local $_ = "<tr valign=top>";
1243
$_ .= _emitHTMLZone($zone->{$eg->[0]}, $rule);
1244
# Don't sort @$eg -- output in native order
1245
$_ .= "<td>" . join(" ", @$eg) . "</td>";
1250
# Emit a zone description without ID, alias info etc.
1251
# Param: zone OBJECT hash ref
1252
# Param: rule hash ref
1254
my ($zone, $rules) = @_;
1255
my $gmtoff = "GMT" . formatOffset(TZ::ParseOffset($zone->{gmtoff}));
1256
local $_ = "<td><a href=\"#" . bookmark($gmtoff) . "\">$gmtoff</a></td>";
1257
if ($zone->{rule} ne $TZ::STANDARD) {
1258
my $rule = $rules->{$zone->{rule}};
1259
$_ .= "<td nowrap>" . emitHTMLRule($rule->[0]) . "</td>";
1260
$_ .= "<td nowrap>" . emitHTMLRule($rule->[1]) . "</td>";
1261
$_ .= "<td>" . $rule->[0]->{save} . "</td>";
1263
$_ .= "<td colspan=3></td>";
1268
# Emit a single zone description as HTML table row. Return the string.
1270
# Param: Zone hash object ref
1271
# Param: Ref to rules hash
1272
# Param: ref to offset index
1273
# Param: ref to alias hash
1274
# Param: ref to reverse alias hash
1276
my ($name, $zone, $rules, $offsetIndex, $aliases, $revaliases) = @_;
1277
my $isDefault = isDefault($name, $zone->{gmtoff}, $offsetIndex);
1278
my $alias = exists $aliases->{$name} ? $aliases->{$name} : '';
1279
my $revalias = exists $revaliases->{$name} ? $revaliases->{$name} : '';
1280
local $_ = "<tr><td>" . ($isDefault?"<b>":"") .
1281
"<a name=\"" . bookmark($name) . "\">$name</a>" . ($isDefault?"</b>":"") . "</td>";
1282
$_ .= _emitHTMLZone($zone, $rules);
1284
$_ .= "<td><em>alias for</em> <a href=\"#" .
1285
bookmark($alias) . "\">$alias</a></td>";
1286
} elsif ($revalias) {
1287
$_ .= "<td><em>alias </em> <a href=\"#" .
1288
bookmark($revalias) . "\">$revalias</a></td>";
1296
# Emit a zone rule as HTML. Return the string.
1297
# Param: Rule hash object ref
1300
$rule->{in} ." ". $rule->{on} ." ". $rule->{at};
1303
# Read the alias list and create clones with alias names. This
1304
# sub should be called AFTER all standard zones have been read in.
1305
# Param: File name of alias list
1306
# Param: Ref to zone hash
1307
# Param: Ref to LINK hash
1308
# Return: Ref to hash of {alias name -> zone name}
1309
sub incorporateAliases {
1310
my $aliasFile = shift;
1316
open(IN,$aliasFile) or die "Can't open $aliasFile: $!";
1318
s/\#.*//; # Trim comments
1319
next unless (/\S/); # Skip blank lines
1320
if (/^\s*(\S+)\s+(\S+)\s*$/) {
1321
my ($alias, $original) = ($1, $2);
1322
if (exists $zones->{$alias}) {
1323
die "Bad alias in $aliasFile: $alias is a standard UNIX zone. " .
1324
"Please remove $alias from the alias table.\n";
1326
if (!exists $zones->{$original}) {
1327
die "Bad alias in $aliasFile: $alias maps to the nonexistent " .
1328
"zone $original. Please fix this entry in the alias table.\n";
1330
if (exists $links->{$alias} &&
1331
$links->{$alias} ne $original) {
1332
print STDERR "Warning: Alias $alias for $original exists as link for ",
1333
$links->{$alias}, "\n";
1336
$zones->{$alias} = $zones->{$original};
1337
$hash{$alias} = $original;
1340
die "Bad line in alias table $aliasFile: $_\n";
1343
print "Incorporated $n aliases from $aliasFile\n";
1348
# Format a time zone as a machine-readable line of text. Another
1349
# tool will read this line to construct a binary data structure
1350
# representing this zone.
1353
# Param: Ref to hash of all rules
1354
# Return: Two array refs, one to the specs, one to the notes
1355
sub formatZone { # ($z, $ZONES{$z}, \%RULES)
1361
#my @notes = ( $name );
1365
push @notes, ($zone->{gmtoff}=~/^-/?"GMT":"GMT+") . $zone->{gmtoff};
1366
push @spec, TZ::ParseOffset($zone->{gmtoff});
1368
#|rawOffset The new SimpleTimeZone's raw GMT offset
1369
#|ID The new SimpleTimeZone's time zone ID.
1370
#|startMonth The daylight savings starting month. Month is
1371
#| 0-based. eg, 0 for January.
1372
#|startDay The daylight savings starting
1373
#| day-of-week-in-month. See setStartRule() for a
1374
#| complete explanation.
1375
#|startDayOfWeek The daylight savings starting day-of-week. See
1376
#| setStartRule() for a complete explanation.
1377
#|startTime The daylight savings starting time, expressed as the
1378
#| number of milliseconds after midnight.
1379
#|endMonth The daylight savings ending month. Month is
1380
#| 0-based. eg, 0 for January.
1381
#|endDay The daylight savings ending day-of-week-in-month.
1382
#| See setStartRule() for a complete explanation.
1383
#|endDayOfWeek The daylight savings ending day-of-week. See
1384
#| setStartRule() for a complete explanation.
1385
#|endTime The daylight savings ending time, expressed as the
1386
#| number of milliseconds after midnight.
1388
my $rule = $zone->{rule};
1389
if ($rule ne $TZ::STANDARD) {
1390
$rule = $rules->{$rule};
1391
# $rule is now an array ref, with [0] being the onset and
1392
# [1] being the cease.
1394
formatRule($rule->[0], \@spec, \@notes); # Onset
1395
formatRule($rule->[1], \@spec, \@notes); # Cease
1397
my @a = parseTime($rule->[0]->{save});
1399
die "Strange DST savings value: \"$rule->[0]->{save}\"";
1401
push @notes, $rule->[0]->{save};
1408
# Format a rule and return the string
1409
# Param: reference to rule hash
1410
# Param: ref to spec array (this is a result param)
1411
# Param: ref to annotation array (this is a result param)
1416
push @$notes, $rule->{in}, $rule->{on}, $rule->{at};
1417
push @$spec, parseMonth($rule->{in}); # Month
1418
push @$spec, parseDaySpecifier($rule->{on}); # Day
1419
push @$spec, parseTime($rule->{at}); # Time
1422
# Format an offset in seconds and return a string of the form
1423
# /[+-]\d{1,2}:\d\d(:\d\d)?/.
1424
# Param: Offset in seconds
1428
my $result = $_<0 ? "-":"+";
1429
$_ = -$_ if ($_ < 0);
1430
my $sec = $_ % 60; $_ = ($_ - $sec) / 60;
1431
my $min = $_ % 60; $_ = ($_ - $min) / 60;
1432
$min = "0$min" if ($min < 10);
1433
$sec = $sec ? ($sec < 10 ? ":0$sec" : ":$sec") : "";
1434
$result . $_ . ":" . $min . $sec;
1437
# Parse a time of the format dd:dds, where s is a suffix character.
1438
# Return the time, in minutes, and the suffix, in an array.
1439
# Only the suffixes 's' and 'u' are recognized.
1440
# Param: String, with optional suffix
1441
# Return: Array ( seconds, suffix ). If no suffix, 'w' is used.
1444
if (/^(\d{1,2}):(\d\d)([su])?$/) {
1445
my $a = ($1*60) + $2;
1446
my $s = defined $3?$3:'w';
1449
die "Cannot parse time \"$_\"";
1453
# Given a month string, return an integer from 0 (Jan) to 11 (Dec).
1455
# Return: Int 0..11.
1458
for (my $i=0; $i<12; $i++) {
1459
return $i if (/$MONTH[$i]/i);
1461
die "Can't parse month \"$_\"";
1464
# Given a specifier for the day of the month on which a rule triggers,
1465
# return an array of two integers encoding that information. We use
1466
# the ICU/java.util.SimpleTimeZone encoding scheme using two integers.
1467
# We return the two integers in an array of ( dowim dow ).
1468
# Param: String, such as
1469
# 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1,
1470
# Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15,
1471
# Sun>=16, Sun>=23, Sun>=8, Sun>=9, lastFri, lastSun, lastThu
1472
# This is the {on} field of the rule hash.
1473
# Return: Array of two integers, ( dowim dow ).
1474
# The dow has Sunday = 1 .. Saturday = 7.
1475
sub parseDaySpecifier {
1478
#|+If both dayOfWeekInMonth and dayOfWeek are positive, they specify the
1479
#| day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday
1481
#|+If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify
1482
#| the day of week in the month counting backward from the end of the month.
1483
#| (e.g., (-1, MONDAY) is the last Monday in the month)
1484
#|+If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth
1485
#| specifies the day of the month, regardless of what day of the week it is.
1486
#| (e.g., (10, 0) is the tenth day of the month)
1487
#|+If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth
1488
#| specifies the day of the month counting backward from the end of the
1489
#| month, regardless of what day of the week it is (e.g., (-2, 0) is the
1490
#| next-to-last day of the month).
1491
#|+If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the
1492
#| first specified day of the week on or after the specfied day of the month.
1493
#| (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month
1494
#| [or the 15th itself if the 15th is a Sunday].)
1495
#|+If dayOfWeek and dayOfWeekInMonth are both negative, they specify the
1496
#| last specified day of the week on or before the specified day of the month.
1497
#| (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month
1498
#| [or the 20th itself if the 20th is a Tuesday].)
1501
# >0 >0 day of week in month
1502
# <0 >0 day of week in month (from end)
1504
# <0 0 day of month (from end; -1 is last dom)
1505
# >0 <0 first dow on or after dom
1506
# <0 <0 last dow on or before dom
1511
# Check for straight DOM
1515
return ( $dowim, $dow );
1518
# Anything else must have a dow embedded in it; parse it out
1519
my @DOW = ( 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat' );
1520
for (my $i=0; $i<@DOW; $i++) {
1527
die "Cannot parse day specifier \"$_\"";
1530
# Now we have either >=n, <=n, last, or first.
1533
} elsif (/^first$/) {
1535
} elsif (/^>=(\d+)$/) {
1538
} elsif (/^<=(\d+)$/) {
1542
die "Cannot parse day specifier \"$_\"";
1548
# Confirm that the given ID contains only invariant characters.
1549
# See utypes.h for an explanation.
1550
# Param: string to be checked
1551
sub assertInvariantChars {
1553
if (/[^A-Za-z0-9 \"%&\'()*+,-.\/:;<=>?_]/) {
1554
die "Error: Zone ID \"$_\" contains non-invariant characters\n";
1558
# Map ID to equivalency table index. Return the index of the given ID
1559
# in the equivalency array. The array contains array refs. Each ref
1560
# points to an array of strings.
1562
# Param: Ref to equiv array (ref to array of refs to arrays of IDs)
1563
# Return: Index into array where ID is found, or -1 if not found
1564
# NOTE: This function can be eliminated by generating a reverse
1565
# mapping hash when we create the equivalency table.
1569
for (my $i=0; $i < scalar @{$a}; ++$i) {
1572
return $i if ($_ eq $id);