1
# Copyright (C) 2008-2009 Sun Microsystems, Inc. All rights reserved.
2
# Use is subject to license terms.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; version 2 of the License.
8
# This program is distributed in the hope that it will be useful, but
9
# WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
# General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19
# This grammar is an example on how to create grammars that operate against tables
20
# which have different structure and it only makes sense to join them in certain
21
# ways and on certain fields. The following principles apply:
23
# * The first table is always the ontime table, with alias a1. This provies a fixed reference
24
# point for subsequent joins
26
# * The SELECT list is either SELECT * , SELECT COUNT(*) , or may use the fact that the fields
27
# in the a1 table are always known
29
# * There is always a join, therefore there is always an a2 table, however it may be one of
32
# * Each potential join is listed separately with a specific join condition that should be realistic
34
# * In the WHERE clause, if we generate a condition for which the table is not present, the condition is
35
# commented out in order to avoid semantic errors
41
# The queries from this grammar may produce resultsets of varying length
42
# to avoid excessive memory usage, we reduce all queries to a COUNT(*)
43
# This way, we also do not have to tinker with any field names in the SELECT list
46
{ $alias_count = 0 ; %tables = () ; %aliases = () ; return undef ; }
50
SELECT aggregate_item FROM join_list WHERE where |
51
SELECT a1 . * FROM join_list WHERE where ORDER BY a1 . `id` LIMIT _digit ;
54
COUNT(*) | MIN(a1 . dep_time) | SUM( distinct a1 . distance) | MAX(a1 . id) | COUNT( distinct a1 . tail_num ) ;
60
# We divide the joins into "big", those containing the `ontime` table, and
61
# small, those containing stuff like states and ZIP codes.
66
( big_join_item ) third_join_item |
67
( big_join_item ) CROSS JOIN small_join_item ;
70
ontime2carrier | ontime2airport | ontime2aircraft ;
73
airport2state | airport2zipcode | aircraft2engine | airport2remark;
76
first2carrier | first2airport | first2aircraft ;
79
# Here we define only joins that are meaningful, useful and very likely to use indexes.
83
ontime_table INNER JOIN carrier_table ON ( previous_table . `carrier` = current_table . `code` );
86
INNER JOIN carrier_table ON ( a1 . `carrier` = current_table . `code` );
89
ontime_table INNER JOIN airport_table ON ( previous_table . origin_destination = current_table .`code` ) ;
92
INNER JOIN airport_table ON ( a1 . origin_destination = current_table .`code` );
95
`origin` | `destination` ;
98
ontime_table INNER JOIN aircraft_table ON ( current_table .`tail_num` = previous_table .`tail_num` ) ;
101
INNER JOIN aircraft_table ON ( a1 .`tail_num` = current_table .`tail_num` );
104
airport_table INNER JOIN state_table ON ( previous_table . `state` = current_table . `state_code` );
107
airport_table INNER JOIN zipcode_table ON ( previous_table . `state` = current_table . `state_code` );
110
airport_table INNER JOIN remark_table USING ( `site_number` ) ;
113
aircraft_table INNER JOIN engine_table USING ( `aircraft_engine_code` ) ;
116
`ontime_mysiam` { $table_name = 'ontime'; return undef; } new_table;
119
`carriers` { $table_name = 'carriers'; return undef; } new_table ;
122
`airports` { $table_name = 'airports'; return undef; } new_table ;
125
`airport_remarks` { $table_name = 'airport_remarks' ; return undef; } new_table ;
128
`aircraft` { $table_name = 'aircraft'; return undef; } new_table;
131
`aircraft_engines` { $table_name = 'aircraft_engines' ; return undef ; } new_table;
134
`states` { $table_name = 'states' ; return undef; } new_table ;
137
`zipcodes` { $table_name = 'zipcodes' ; return undef; } new_table ;
140
# We always have a WHERE and it contains a lot of expressions combined with an AND in order to provide
141
# numerous opportunities for optimization and reduce calculation times.
142
# In addition, we always define at least one condition against the `ontime` table.
146
{ $condition_table = 'ontime' ; return undef; } start_condition ontime_condition end_condition AND
150
where_condition AND where_condition AND where_condition AND where_condition AND where_condition AND where_condition ;
153
# Each of the conditions described below are valid and meaningful for the particular table in question
154
# They are likely to use indexes and/or zero down on a smaller number of records
158
{ $condition_table = 'ontime' ; return undef; } start_condition ontime_condition end_condition |
159
{ $condition_table = 'carriers' ; return undef; } start_condition carrier_condition end_condition |
160
{ $condition_table = 'aircraft'; return undef; } start_condition aircraft_condition end_condition |
161
{ $condition_table = 'airports'; return undef; } start_condition airport_condition end_condition |
162
{ $condition_table = 'states'; return undef; } start_condition state_condition end_condition |
163
{ $condition_table = 'zipcodes'; return undef; } start_condition zipcode_condition end_condition |
164
{ $condition_table = 'airport_remarks'; return undef; } start_condition remark_condition end_condition |
165
{ $condition_table = 'aircraft_engines'; return undef; } start_condition engine_condition end_condition ;
168
table_alias . `carrier` generic_carrier_expression |
169
table_alias . `origin` generic_code_expression |
170
table_alias . `destination` generic_code_expression |
171
table_alias . `origin` generic_code_expression AND table_alias . `destination` generic_code_expression |
172
table_alias . `origin` generic_code_expression OR table_alias . `destination` generic_code_expression |
173
table_alias . `tail_num` generic_char_expression ;
176
table_alias . `state_code` generic_state_expression |
177
table_alias . `name` generic_char_expression ;
180
table_alias . `zipcode` BETWEEN 10000 + ( _tinyint_unsigned * 100) AND 10000 + ( _tinyint_unsigned * 100) ;
183
{ my $alias = shift @{$aliases{$condition_table}}; push @{$aliases{$condition_table}} , $alias ; return $alias } ;
186
table_alias . `code` generic_carrier_expression;
188
generic_carrier_expression:
190
IN ( carrier_list ) ;
193
table_alias . `code` generic_code_expression |
194
table_alias . `state` generic_state_expression |
195
( table_alias . `state` generic_state_expression ) AND ( table_alias . `city` generic_char_expression) |
196
table_alias . `longitude` BETWEEN _tinyint AND _tinyint_unsigned ;
199
table_alias . `tail_num` generic_char_expression |
200
table_alias . `state` generic_state_expression ;
203
table_alias . `manufacturer` generic_char_expression ;
206
table_alias . `airport_remark_id` BETWEEN _tinyint_unsigned AND _smallint_unsigned ;
208
generic_char_expression:
209
BETWEEN _char[invariant] AND CHAR(ASCII( _char[invariant] ) + one_two ) |
210
LIKE 'N10%' | # 6098 aircraft with tail num starting with N10
211
LIKE 'N9Q%' ; # 10 aircraft starting with N9Q
216
generic_code_expression:
217
# BETWEEN _char[invariant] AND CHAR(ASCII( _char[invariant] ) + one_two ) |
219
IN ( airport_list ) ;
222
'ORD' | # busiest airport
223
'AKN' | # un-busiest airport
224
'BIS' | # 100 flights
225
'LIT' | # 1000 flights
226
'MSP' ; # 10000 flights
230
single_airport , airport_list ;
232
generic_state_expression:
235
BETWEEN _char(2) AND _char(2) ;
239
single_state , state_list ;
243
single_carrier , carrier_list ;
246
'AK' | 'AL' | 'AR' | 'AS' | 'AZ' | 'CA' | 'CO' | 'CQ' | 'CT' | 'DC' | 'DE' | 'FL' | 'GA' | 'GU' | 'HI' | 'IA' | 'ID' | 'IL' | 'IN' | 'KS' | 'KY' | 'LA' | 'MA' | 'MD' | 'ME' | 'MI' | 'MN' | 'MO' | 'MQ' | 'MS' | 'MT' | 'NC' | 'ND' | 'NE' | 'NH' | 'NJ' | 'NM' | 'NV' | 'NY' | 'OH' | 'OK' | 'OR' | 'PA' | 'PR' | 'RI' | 'SC' | 'SD' | 'TN' | 'TX' | 'UT' | 'VA' | 'VI' | 'VT' | 'WA' | 'WI' | 'WQ' | 'WV' | 'WY' ;
249
'AA'|'AQ'|'AS'|'B6'|'CO'|'DH'|'DL'|'EV'|'FL'|'HA'|'HP'|'MQ'|'NW'|'OH'|'OO'|'RU'|'TW'|'TZ'|'UA'|'US'|'WN';
252
# When we define a condition, we check if the table for which this condition would apply is present in
253
# the list of the tables we selected for joining. If the table is not present, the condition is still
254
# generated, but it is commented out in order to avoid "unknown table" errors.
258
{ ((exists $tables{$condition_table}) ? '' : '/* ') } ;
261
{ ((exists $tables{$condition_table}) ? '' : '*/ 1 = 1 ') };
264
AS { $alias_count++ ; $tables{$table_name}++ ; push @{$aliases{$table_name}}, 'a'.$alias_count ; return 'a'.$alias_count } ;
267
{ 'a'.$alias_count };
270
{ 'a'.($alias_count - 1) };