1
/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
4
* Copyright (C) 2008 Sun Microsystems
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; version 2 of the License.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
#ifndef DRIZZLED_ITEM_SUM_H
21
#define DRIZZLED_ITEM_SUM_H
23
/* classes for sum functions */
26
#include "drizzled/my_tree.h"
27
#include <drizzled/hybrid_type.h>
28
#include <drizzled/item.h>
29
#include <drizzled/item/field.h>
30
#include <drizzled/item/bin_string.h>
35
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
38
int group_concat_key_cmp_with_order(void* arg, const void* key1,
45
Class Item_sum is the base class used for special expressions that SQL calls
46
'set functions'. These expressions are formed with the help of aggregate
47
functions such as SUM, MAX, GROUP_CONCAT etc.
51
A set function cannot be used in certain positions where expressions are
52
accepted. There are some quite explicable restrictions for the usage of
56
SELECT AVG(b) FROM t1 WHERE SUM(b) > 20 GROUP by a
57
the usage of the set function AVG(b) is legal, while the usage of SUM(b)
58
is illegal. A WHERE condition must contain expressions that can be
59
evaluated for each row of the table. Yet the expression SUM(b) can be
60
evaluated only for each group of rows with the same value of column a.
62
SELECT AVG(b) FROM t1 WHERE c > 30 GROUP BY a HAVING SUM(b) > 20
63
both set function expressions AVG(b) and SUM(b) are legal.
65
We can say that in a query without nested selects an occurrence of a
66
set function in an expression of the SELECT list or/and in the HAVING
67
clause is legal, while in the WHERE clause it's illegal.
69
The general rule to detect whether a set function is legal in a query with
70
nested subqueries is much more complicated.
72
Consider the the following query:
73
SELECT t1.a FROM t1 GROUP BY t1.a
74
HAVING t1.a > ALL (SELECT t2.c FROM t2 WHERE SUM(t1.b) < t2.c).
75
The set function SUM(b) is used here in the WHERE clause of the subquery.
76
Nevertheless it is legal since it is under the HAVING clause of the query
77
to which this function relates. The expression SUM(t1.b) is evaluated
78
for each group defined in the main query, not for groups of the subquery.
80
The problem of finding the query where to aggregate a particular
81
set function is not so simple as it seems to be.
84
SELECT t1.a FROM t1 GROUP BY t1.a
85
HAVING t1.a > ALL(SELECT t2.c FROM t2 GROUP BY t2.c
86
HAVING SUM(t1.a) < t2.c)
87
the set function can be evaluated for both outer and inner selects.
88
If we evaluate SUM(t1.a) for the outer query then we get the value of t1.a
89
multiplied by the cardinality of a group in table t1. In this case
90
in each correlated subquery SUM(t1.a) is used as a constant. But we also
91
can evaluate SUM(t1.a) for the inner query. In this case t1.a will be a
92
constant for each correlated subquery and summation is performed
93
for each group of table t2.
94
(Here it makes sense to remind that the query
95
SELECT c FROM t GROUP BY a HAVING SUM(1) < a
96
is quite legal in our SQL).
98
So depending on what query we assign the set function to we
99
can get different result sets.
101
The general rule to detect the query where a set function is to be
102
evaluated can be formulated as follows.
103
Consider a set function S(E) where E is an expression with occurrences
104
of column references C1, ..., CN. Resolve these column references against
105
subqueries that contain the set function S(E). Let Q be the innermost
106
subquery of those subqueries. (It should be noted here that S(E)
107
in no way can be evaluated in the subquery embedding the subquery Q,
108
otherwise S(E) would refer to at least one unbound column reference)
109
If S(E) is used in a construct of Q where set functions are allowed then
110
we evaluate S(E) in Q.
111
Otherwise we look for a innermost subquery containing S(E) of those where
112
usage of S(E) is allowed.
114
Let's demonstrate how this rule is applied to the following queries.
116
1. SELECT t1.a FROM t1 GROUP BY t1.a
117
HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
118
HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
119
HAVING SUM(t1.a+t2.b) < t3.c))
120
For this query the set function SUM(t1.a+t2.b) depends on t1.a and t2.b
121
with t1.a defined in the outermost query, and t2.b defined for its
122
subquery. The set function is in the HAVING clause of the subquery and can
123
be evaluated in this subquery.
125
2. SELECT t1.a FROM t1 GROUP BY t1.a
126
HAVING t1.a > ALL(SELECT t2.b FROM t2
127
WHERE t2.b > ALL (SELECT t3.c FROM t3 GROUP BY t3.c
128
HAVING SUM(t1.a+t2.b) < t3.c))
129
Here the set function SUM(t1.a+t2.b)is in the WHERE clause of the second
130
subquery - the most upper subquery where t1.a and t2.b are defined.
131
If we evaluate the function in this subquery we violate the context rules.
132
So we evaluate the function in the third subquery (over table t3) where it
133
is used under the HAVING clause.
135
3. SELECT t1.a FROM t1 GROUP BY t1.a
136
HAVING t1.a > ALL(SELECT t2.b FROM t2
137
WHERE t2.b > ALL (SELECT t3.c FROM t3
138
WHERE SUM(t1.a+t2.b) < t3.c))
139
In this query evaluation of SUM(t1.a+t2.b) is not legal neither in the second
140
nor in the third subqueries. So this query is invalid.
142
Mostly set functions cannot be nested. In the query
143
SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20
144
the expression SUM(b) is not acceptable, though it is under a HAVING clause.
145
Yet it is acceptable in the query:
146
SELECT t.1 FROM t1 GROUP BY t1.a HAVING SUM(t1.b) > 20.
148
An argument of a set function does not have to be a reference to a table
149
column as we saw it in examples above. This can be a more complex expression
150
SELECT t1.a FROM t1 GROUP BY t1.a HAVING SUM(t1.b+1) > 20.
151
The expression SUM(t1.b+1) has a very clear semantics in this context:
152
we sum up the values of t1.b+1 where t1.b varies for all values within a
153
group of rows that contain the same t1.a value.
155
A set function for an outer query yields a constant within a subquery. So
156
the semantics of the query
157
SELECT t1.a FROM t1 GROUP BY t1.a
158
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
159
HAVING AVG(t2.c+SUM(t1.b)) > 20)
160
is still clear. For a group of the rows with the same t1.a values we
161
calculate the value of SUM(t1.b). This value 's' is substituted in the
163
SELECT t2.c FROM t2 GROUP BY t2.c HAVING AVG(t2.c+s)
164
than returns some result set.
166
By the same reason the following query with a subquery
167
SELECT t1.a FROM t1 GROUP BY t1.a
168
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
169
HAVING AVG(SUM(t1.b)) > 20)
174
Three methods were added to the class to check the constraints specified
175
in the previous section. These methods utilize several new members.
177
The field 'nest_level' contains the number of the level for the subquery
178
containing the set function. The main SELECT is of level 0, its subqueries
179
are of levels 1, the subqueries of the latter are of level 2 and so on.
181
The field 'aggr_level' is to contain the nest level of the subquery
182
where the set function is aggregated.
184
The field 'max_arg_level' is for the maximun of the nest levels of the
185
unbound column references occurred in the set function. A column reference
186
is unbound within a set function if it is not bound by any subquery
187
used as a subexpression in this function. A column reference is bound by
188
a subquery if it is a reference to the column by which the aggregation
189
of some set function that is used in the subquery is calculated.
190
For the set function used in the query
191
SELECT t1.a FROM t1 GROUP BY t1.a
192
HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
193
HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
194
HAVING SUM(t1.a+t2.b) < t3.c))
195
the value of max_arg_level is equal to 1 since t1.a is bound in the main
196
query, and t2.b is bound by the first subquery whose nest level is 1.
197
Obviously a set function cannot be aggregated in the subquery whose
198
nest level is less than max_arg_level. (Yet it can be aggregated in the
199
subqueries whose nest level is greater than max_arg_level.)
201
SELECT t.a FROM t1 HAVING AVG(t1.a+(SELECT MIN(t2.c) FROM t2))
202
the value of the max_arg_level for the AVG set function is 0 since
203
the reference t2.c is bound in the subquery.
205
The field 'max_sum_func_level' is to contain the maximum of the
206
nest levels of the set functions that are used as subexpressions of
207
the arguments of the given set function, but not aggregated in any
208
subquery within this set function. A nested set function s1 can be
209
used within set function s0 only if s1.max_sum_func_level <
210
s0.max_sum_func_level. Set function s1 is considered as nested
211
for set function s0 if s1 is not calculated in any subquery
214
A set function that is used as a subexpression in an argument of another
215
set function refers to the latter via the field 'in_sum_func'.
217
The condition imposed on the usage of set functions are checked when
218
we traverse query subexpressions with the help of the recursive method
219
fix_fields. When we apply this method to an object of the class
220
Item_sum, first, on the descent, we call the method init_sum_func_check
221
that initialize members used at checking. Then, on the ascent, we
222
call the method check_sum_func that validates the set function usage
223
and reports an error if it is illegal.
224
The method register_sum_func serves to link the items for the set functions
225
that are aggregated in the embedding (sub)queries. Circular chains of such
226
functions are attached to the corresponding Select_Lex structures
227
through the field inner_sum_func_list.
229
Exploiting the fact that the members mentioned above are used in one
230
recursive function we could have allocated them on the thread stack.
231
Yet we don't do it now.
233
We assume that the nesting level of subquries does not exceed 127.
234
TODO: to catch queries where the limit is exceeded to make the
239
class Item_sum :public Item_result_field
243
{ COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
244
AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
245
VARIANCE_FUNC, SUM_BIT_FUNC, GROUP_CONCAT_FUNC
248
Item **args, *tmp_args[2];
249
Item **ref_by; /* pointer to a ref to the object used to register it */
250
Item_sum *next; /* next in the circular chain of registered objects */
252
Item_sum *in_sum_func; /* embedding set function if any */
253
Select_Lex * aggr_sel; /* select where the function is aggregated */
254
int8_t nest_level; /* number of the nesting level of the set function */
255
int8_t aggr_level; /* nesting level of the aggregating subquery */
256
int8_t max_arg_level; /* max level of unbound column references */
257
int8_t max_sum_func_level;/* max level of aggregation for embedded functions */
258
bool quick_group; /* If incremental update of fields */
260
This list is used by the check for mixing non aggregated fields and
261
sum functions in the ONLY_FULL_GROUP_BY_MODE. We save all outer fields
262
directly or indirectly used under this function it as it's unclear
263
at the moment of fixing outer field whether it's aggregated or not.
265
List<Item_field> outer_fields;
268
table_map used_tables_cache;
273
void mark_as_sum_func();
274
Item_sum() :arg_count(0), quick_group(1), forced_const(false)
278
Item_sum(Item *a) :args(tmp_args), arg_count(1), quick_group(1),
284
Item_sum( Item *a, Item *b ) :args(tmp_args), arg_count(2), quick_group(1),
287
args[0]=a; args[1]=b;
290
Item_sum(List<Item> &list);
291
//Copy constructor, need to perform subselects with temporary tables
292
Item_sum(Session *session, Item_sum *item);
293
enum Type type() const { return SUM_FUNC_ITEM; }
294
virtual enum Sumfunctype sum_func () const=0;
297
This method is similar to add(), but it is called when the current
298
aggregation group changes. Thus it performs a combination of
301
inline bool reset() { clear(); return add(); };
304
Prepare this item for evaluation of an aggregate value. This is
305
called by reset() when a group changes, or, for correlated
306
subqueries, between subquery executions. E.g. for COUNT(), this
307
method should set count= 0;
309
virtual void clear()= 0;
312
This method is called for the next row in the same group. Its
313
purpose is to aggregate the new value to the previous values in
314
the group (i.e. since clear() was called last time). For example,
315
for COUNT(), do count++.
317
virtual bool add()=0;
320
Called when new group is started and results are being saved in
321
a temporary table. Similar to reset(), but must also store value in
322
result_field. Like reset() it is supposed to reset start value to
324
This set of methods (reult_field(), reset_field, update_field()) of
325
Item_sum is used only if quick_group is not null. Otherwise
326
copy_or_same() is used to obtain a copy of this item.
328
virtual void reset_field()=0;
330
Called for each new value in the group, when temporary table is in use.
331
Similar to add(), but uses temporary table field to obtain current value,
332
Updated value is then saved in the field.
334
virtual void update_field()=0;
335
virtual bool keep_field_type(void) const { return 0; }
336
virtual void fix_length_and_dec() { maybe_null=1; null_value=1; }
338
This method is used for debug purposes to print the name of an
339
item to the debug log. The second use of this method is as
340
a helper function of print(), where it is applicable.
341
To suit both goals it should return a meaningful,
342
distinguishable and sintactically correct string. This method
343
should not be used for runtime type identification, use enum
344
{Sum}Functype and Item_func::functype()/Item_sum::sum_func()
347
NOTE: for Items inherited from Item_sum, func_name() return part of
348
function name till first argument (including '(') to make difference in
349
names for functions with 'distinct' clause and without 'distinct' and
350
also to make printing of items inherited from Item_sum uniform.
352
virtual const char *func_name() const= 0;
353
virtual Item *result_item(Field *field)
354
{ return new Item_field(field); }
355
table_map used_tables() const { return used_tables_cache; }
356
void update_used_tables ();
362
bool is_null() { return null_value; }
365
used_tables_cache= 0;
368
virtual bool const_item() const { return forced_const; }
369
void make_field(SendField *field);
370
virtual void print(String *str, enum_query_type query_type);
371
void fix_num_length_and_dec();
374
This function is called by the execution engine to assign 'NO ROWS
375
FOUND' value to an aggregate item, when the underlying result set
376
has no rows. Such value, in a general case, may be different from
377
the default value of the item after 'clear()': e.g. a numeric item
378
may be initialized to 0 by clear() and to NULL by
381
void no_rows_in_result() { clear(); }
383
virtual bool setup(Session *) {return 0;}
384
virtual void make_unique(void) {}
385
Item *get_tmp_table_item(Session *session);
386
virtual Field *create_tmp_field(bool group, Table *table,
387
uint32_t convert_blob_length);
388
bool walk(Item_processor processor, bool walk_subquery, unsigned char *argument);
389
bool init_sum_func_check(Session *session);
390
bool check_sum_func(Session *session, Item **ref);
391
bool register_sum_func(Session *session, Item **ref);
392
Select_Lex *depended_from()
393
{ return (nest_level == aggr_level ? 0 : aggr_sel); }
397
class Item_sum_num :public Item_sum
401
val_xxx() functions may be called several times during the execution of a
402
query. Derived classes that require extensive calculation in val_xxx()
403
maintain cache of aggregate value. This variable governs the validity of
408
Item_sum_num() :Item_sum(),is_evaluated(false) {}
409
Item_sum_num(Item *item_par)
410
:Item_sum(item_par), is_evaluated(false) {}
411
Item_sum_num(Item *a, Item* b) :Item_sum(a,b),is_evaluated(false) {}
412
Item_sum_num(List<Item> &list)
413
:Item_sum(list), is_evaluated(false) {}
414
Item_sum_num(Session *session, Item_sum_num *item)
415
:Item_sum(session, item),is_evaluated(item->is_evaluated) {}
416
bool fix_fields(Session *, Item **);
418
String *val_str(String*str);
419
my_decimal *val_decimal(my_decimal *);
424
class Item_sum_int :public Item_sum_num
427
Item_sum_int(Item *item_par) :Item_sum_num(item_par) {}
428
Item_sum_int(List<Item> &list) :Item_sum_num(list) {}
429
Item_sum_int(Session *session, Item_sum_int *item) :Item_sum_num(session, item) {}
430
double val_real() { assert(fixed == 1); return (double) val_int(); }
431
String *val_str(String*str);
432
my_decimal *val_decimal(my_decimal *);
433
enum Item_result result_type () const { return INT_RESULT; }
434
void fix_length_and_dec()
435
{ decimals=0; max_length=21; maybe_null=null_value=0; }
439
class Item_sum_sum :public Item_sum_num
442
Item_result hybrid_type;
444
my_decimal dec_buffs[2];
445
uint32_t curr_dec_buff;
446
void fix_length_and_dec();
449
Item_sum_sum(Item *item_par) :Item_sum_num(item_par) {}
450
Item_sum_sum(Session *session, Item_sum_sum *item);
451
enum Sumfunctype sum_func () const {return SUM_FUNC;}
456
String *val_str(String*str);
457
my_decimal *val_decimal(my_decimal *);
458
enum Item_result result_type () const { return hybrid_type; }
461
void no_rows_in_result() {}
462
const char *func_name() const { return "sum("; }
463
Item *copy_or_same(Session* session);
468
/* Common class for SUM(DISTINCT), AVG(DISTINCT) */
472
class Item_sum_distinct :public Item_sum_num
475
/* storage for the summation result */
478
/* storage for unique elements */
481
enum enum_field_types table_field_type;
482
uint32_t tree_key_length;
484
Item_sum_distinct(Session *session, Item_sum_distinct *item);
486
Item_sum_distinct(Item *item_par);
487
~Item_sum_distinct();
489
bool setup(Session *session);
494
my_decimal *val_decimal(my_decimal *);
496
String *val_str(String *str);
498
/* XXX: does it need make_unique? */
500
enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
501
void reset_field() {} // not used
502
void update_field() {} // not used
503
virtual void no_rows_in_result() {}
504
void fix_length_and_dec();
505
enum Item_result result_type () const;
506
virtual void calculate_val_and_count();
507
virtual bool unique_walk_function(void *elem);
512
Item_sum_sum_distinct - implementation of SUM(DISTINCT expr).
513
See also: MySQL manual, chapter 'Adding New Functions To MySQL'
514
and comments in item_sum.cc.
517
class Item_sum_sum_distinct :public Item_sum_distinct
520
Item_sum_sum_distinct(Session *session, Item_sum_sum_distinct *item)
521
:Item_sum_distinct(session, item) {}
523
Item_sum_sum_distinct(Item *item_arg) :Item_sum_distinct(item_arg) {}
525
enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
526
const char *func_name() const { return "sum(distinct "; }
527
Item *copy_or_same(Session* session) { return new Item_sum_sum_distinct(session, this); }
531
/* Item_sum_avg_distinct - SELECT AVG(DISTINCT expr) FROM ... */
533
class Item_sum_avg_distinct: public Item_sum_distinct
536
Item_sum_avg_distinct(Session *session, Item_sum_avg_distinct *original)
537
:Item_sum_distinct(session, original) {}
539
uint32_t prec_increment;
540
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
542
void fix_length_and_dec();
543
virtual void calculate_val_and_count();
544
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
545
const char *func_name() const { return "avg(distinct "; }
546
Item *copy_or_same(Session* session) { return new Item_sum_avg_distinct(session, this); }
550
class Item_sum_count :public Item_sum_int
555
Item_sum_count(Item *item_par)
556
:Item_sum_int(item_par),count(0)
558
Item_sum_count(Session *session, Item_sum_count *item)
559
:Item_sum_int(session, item), count(item->count)
561
enum Sumfunctype sum_func () const { return COUNT_FUNC; }
563
void no_rows_in_result() { count=0; }
565
void make_const_count(int64_t count_arg)
568
Item_sum::make_const();
574
const char *func_name() const { return "count("; }
575
Item *copy_or_same(Session* session);
579
class Tmp_Table_Param;
581
class Item_sum_count_distinct :public Item_sum_int
584
uint32_t *field_lengths;
585
Tmp_Table_Param *tmp_table_param;
586
bool force_copy_fields;
588
If there are no blobs, we can use a tree, which
589
is faster than heap table. In that case, we still use the table
590
to help get things set up, but we insert nothing in it
594
Storage for the value of count between calls to val_int() so val_int()
595
will not recalculate on each call. Validitiy of the value is stored in
600
Following is 0 normal object and pointer to original one for copy
601
(to correctly free resources)
603
Item_sum_count_distinct *original;
604
uint32_t tree_key_length;
607
bool always_null; // Set to 1 if the result is always NULL
610
friend int composite_key_cmp(void* arg, unsigned char* key1, unsigned char* key2);
611
friend int simple_str_key_cmp(void* arg, unsigned char* key1, unsigned char* key2);
614
Item_sum_count_distinct(List<Item> &list)
615
:Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
616
force_copy_fields(0), tree(0), count(0),
617
original(0), always_null(false)
619
Item_sum_count_distinct(Session *session, Item_sum_count_distinct *item)
620
:Item_sum_int(session, item), table(item->table),
621
field_lengths(item->field_lengths),
622
tmp_table_param(item->tmp_table_param),
623
force_copy_fields(0), tree(item->tree), count(item->count),
624
original(item), tree_key_length(item->tree_key_length),
625
always_null(item->always_null)
627
~Item_sum_count_distinct();
631
enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
635
void reset_field() { return ;} // Never called
636
void update_field() { return ; } // Never called
637
const char *func_name() const { return "count(distinct "; }
638
bool setup(Session *session);
640
Item *copy_or_same(Session* session);
641
void no_rows_in_result() {}
645
/* Item to get the value of a stored sum function */
649
class Item_avg_field :public Item_result_field
653
Item_result hybrid_type;
654
uint32_t f_precision, f_scale, dec_bin_size;
655
uint32_t prec_increment;
656
Item_avg_field(Item_result res_type, Item_sum_avg *item);
657
enum Type type() const { return FIELD_AVG_ITEM; }
660
my_decimal *val_decimal(my_decimal *);
661
bool is_null() { update_null_value(); return null_value; }
662
String *val_str(String*);
663
enum_field_types field_type() const
665
return hybrid_type == DECIMAL_RESULT ?
666
DRIZZLE_TYPE_DECIMAL : DRIZZLE_TYPE_DOUBLE;
668
void fix_length_and_dec() {}
669
enum Item_result result_type () const { return hybrid_type; }
673
class Item_sum_avg :public Item_sum_sum
677
uint32_t prec_increment;
678
uint32_t f_precision, f_scale, dec_bin_size;
680
Item_sum_avg(Item *item_par) :Item_sum_sum(item_par), count(0) {}
681
Item_sum_avg(Session *session, Item_sum_avg *item)
682
:Item_sum_sum(session, item), count(item->count),
683
prec_increment(item->prec_increment) {}
685
void fix_length_and_dec();
686
enum Sumfunctype sum_func () const {return AVG_FUNC;}
690
// In SPs we might force the "wrong" type with select into a declare variable
692
my_decimal *val_decimal(my_decimal *);
693
String *val_str(String *str);
696
Item *result_item(Field *)
697
{ return new Item_avg_field(hybrid_type, this); }
698
void no_rows_in_result() {}
699
const char *func_name() const { return "avg("; }
700
Item *copy_or_same(Session* session);
701
Field *create_tmp_field(bool group, Table *table, uint32_t convert_blob_length);
705
Item_sum_sum::cleanup();
709
class Item_sum_variance;
711
class Item_variance_field :public Item_result_field
715
Item_result hybrid_type;
716
uint32_t f_precision0, f_scale0;
717
uint32_t f_precision1, f_scale1;
718
uint32_t dec_bin_size0, dec_bin_size1;
720
uint32_t prec_increment;
721
Item_variance_field(Item_sum_variance *item);
722
enum Type type() const {return FIELD_VARIANCE_ITEM; }
725
String *val_str(String *str)
726
{ return val_string_from_real(str); }
727
my_decimal *val_decimal(my_decimal *dec_buf)
728
{ return val_decimal_from_real(dec_buf); }
729
bool is_null() { update_null_value(); return null_value; }
730
enum_field_types field_type() const
732
return hybrid_type == DECIMAL_RESULT ?
733
DRIZZLE_TYPE_DECIMAL : DRIZZLE_TYPE_DOUBLE;
735
void fix_length_and_dec() {}
736
enum Item_result result_type () const { return hybrid_type; }
743
= sum (ai - avg(a))^2 / count(a) )
744
= sum (ai^2 - 2*ai*avg(a) + avg(a)^2) / count(a)
745
= (sum(ai^2) - sum(2*ai*avg(a)) + sum(avg(a)^2))/count(a) =
746
= (sum(ai^2) - 2*avg(a)*sum(a) + count(a)*avg(a)^2)/count(a) =
747
= (sum(ai^2) - 2*sum(a)*sum(a)/count(a) + count(a)*sum(a)^2/count(a)^2 )/count(a) =
748
= (sum(ai^2) - 2*sum(a)^2/count(a) + sum(a)^2/count(a) )/count(a) =
749
= (sum(ai^2) - sum(a)^2/count(a))/count(a)
751
But, this falls prey to catastrophic cancellation. Instead, use the recurrence formulas
753
M_{1} = x_{1}, ~ M_{k} = M_{k-1} + (x_{k} - M_{k-1}) / k newline
754
S_{1} = 0, ~ S_{k} = S_{k-1} + (x_{k} - M_{k-1}) times (x_{k} - M_{k}) newline
755
for 2 <= k <= n newline
756
ital variance = S_{n} / (n-1)
760
class Item_sum_variance : public Item_sum_num
762
void fix_length_and_dec();
765
Item_result hybrid_type;
767
double recurrence_m, recurrence_s; /* Used in recurrence relation. */
769
uint32_t f_precision0, f_scale0;
770
uint32_t f_precision1, f_scale1;
771
uint32_t dec_bin_size0, dec_bin_size1;
773
uint32_t prec_increment;
775
Item_sum_variance(Item *item_par, uint32_t sample_arg) :Item_sum_num(item_par),
776
hybrid_type(REAL_RESULT), count(0), sample(sample_arg)
778
Item_sum_variance(Session *session, Item_sum_variance *item);
779
enum Sumfunctype sum_func () const { return VARIANCE_FUNC; }
784
my_decimal *val_decimal(my_decimal *);
787
Item *result_item(Field *)
788
{ return new Item_variance_field(this); }
789
void no_rows_in_result() {}
790
const char *func_name() const
791
{ return sample ? "var_samp(" : "variance("; }
792
Item *copy_or_same(Session* session);
793
Field *create_tmp_field(bool group, Table *table, uint32_t convert_blob_length);
794
enum Item_result result_type () const { return REAL_RESULT; }
798
Item_sum_num::cleanup();
804
class Item_std_field :public Item_variance_field
807
Item_std_field(Item_sum_std *item);
808
enum Type type() const { return FIELD_STD_ITEM; }
810
my_decimal *val_decimal(my_decimal *);
811
enum Item_result result_type () const { return REAL_RESULT; }
812
enum_field_types field_type() const { return DRIZZLE_TYPE_DOUBLE;}
816
standard_deviation(a) = sqrt(variance(a))
819
class Item_sum_std :public Item_sum_variance
822
Item_sum_std(Item *item_par, uint32_t sample_arg)
823
:Item_sum_variance(item_par, sample_arg) {}
824
Item_sum_std(Session *session, Item_sum_std *item)
825
:Item_sum_variance(session, item)
827
enum Sumfunctype sum_func () const { return STD_FUNC; }
829
Item *result_item(Field *)
830
{ return new Item_std_field(this); }
831
const char *func_name() const { return "std("; }
832
Item *copy_or_same(Session* session);
833
enum Item_result result_type () const { return REAL_RESULT; }
834
enum_field_types field_type() const { return DRIZZLE_TYPE_DOUBLE;}
837
// This class is a string or number function depending on num_func
839
class Item_sum_hybrid :public Item_sum
842
String value,tmp_value;
846
Item_result hybrid_type;
847
enum_field_types hybrid_field_type;
849
bool was_values; // Set if we have found at least one row (for max/min only)
852
Item_sum_hybrid(Item *item_par,int sign)
853
:Item_sum(item_par), sum(0.0), sum_int(0),
854
hybrid_type(INT_RESULT), hybrid_field_type(DRIZZLE_TYPE_LONGLONG),
855
cmp_sign(sign), was_values(true)
856
{ collation.set(&my_charset_bin); }
857
Item_sum_hybrid(Session *session, Item_sum_hybrid *item);
858
bool fix_fields(Session *, Item **);
862
my_decimal *val_decimal(my_decimal *);
864
String *val_str(String *);
865
bool keep_field_type(void) const { return 1; }
866
enum Item_result result_type () const { return hybrid_type; }
867
enum enum_field_types field_type() const { return hybrid_field_type; }
869
void min_max_update_str_field();
870
void min_max_update_real_field();
871
void min_max_update_int_field();
872
void min_max_update_decimal_field();
874
bool any_value() { return was_values; }
875
void no_rows_in_result();
876
Field *create_tmp_field(bool group, Table *table,
877
uint32_t convert_blob_length);
881
class Item_sum_min :public Item_sum_hybrid
884
Item_sum_min(Item *item_par) :Item_sum_hybrid(item_par,1) {}
885
Item_sum_min(Session *session, Item_sum_min *item) :Item_sum_hybrid(session, item) {}
886
enum Sumfunctype sum_func () const {return MIN_FUNC;}
889
const char *func_name() const { return "min("; }
890
Item *copy_or_same(Session* session);
894
class Item_sum_max :public Item_sum_hybrid
897
Item_sum_max(Item *item_par) :Item_sum_hybrid(item_par,-1) {}
898
Item_sum_max(Session *session, Item_sum_max *item) :Item_sum_hybrid(session, item) {}
899
enum Sumfunctype sum_func () const {return MAX_FUNC;}
902
const char *func_name() const { return "max("; }
903
Item *copy_or_same(Session* session);
907
class Item_sum_bit :public Item_sum_int
910
uint64_t reset_bits,bits;
913
Item_sum_bit(Item *item_par,uint64_t reset_arg)
914
:Item_sum_int(item_par),reset_bits(reset_arg),bits(reset_arg) {}
915
Item_sum_bit(Session *session, Item_sum_bit *item):
916
Item_sum_int(session, item), reset_bits(item->reset_bits), bits(item->bits) {}
917
enum Sumfunctype sum_func () const {return SUM_BIT_FUNC;}
922
void fix_length_and_dec()
923
{ decimals= 0; max_length=21; unsigned_flag= 1; maybe_null= null_value= 0; }
927
Item_sum_int::cleanup();
932
class Item_sum_or :public Item_sum_bit
935
Item_sum_or(Item *item_par) :Item_sum_bit(item_par,0) {}
936
Item_sum_or(Session *session, Item_sum_or *item) :Item_sum_bit(session, item) {}
938
const char *func_name() const { return "bit_or("; }
939
Item *copy_or_same(Session* session);
943
class Item_sum_and :public Item_sum_bit
946
Item_sum_and(Item *item_par) :Item_sum_bit(item_par, UINT64_MAX) {}
947
Item_sum_and(Session *session, Item_sum_and *item) :Item_sum_bit(session, item) {}
949
const char *func_name() const { return "bit_and("; }
950
Item *copy_or_same(Session* session);
953
class Item_sum_xor :public Item_sum_bit
956
Item_sum_xor(Item *item_par) :Item_sum_bit(item_par,0) {}
957
Item_sum_xor(Session *session, Item_sum_xor *item) :Item_sum_bit(session, item) {}
959
const char *func_name() const { return "bit_xor("; }
960
Item *copy_or_same(Session* session);
967
class Item_func_group_concat : public Item_sum
969
Tmp_Table_Param *tmp_table_param;
970
DRIZZLE_ERROR *warning;
977
If DISTINCT is used with this GROUP_CONCAT, this member is used to filter
979
@see Item_func_group_concat::setup
980
@see Item_func_group_concat::add
981
@see Item_func_group_concat::clear
983
Unique *unique_filter;
986
Name_resolution_context *context;
987
/** The number of ORDER BY items. */
988
uint32_t arg_count_order;
989
/** The number of selected items, aka the expr list. */
990
uint32_t arg_count_field;
991
uint32_t count_cut_values;
993
bool warning_for_row;
995
bool force_copy_fields;
998
Following is 0 normal object and pointer to original one for copy
999
(to correctly free resources)
1001
Item_func_group_concat *original;
1003
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
1005
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
1007
friend int dump_leaf_key(unsigned char* key, uint32_t,
1008
Item_func_group_concat *group_concat_item);
1011
Item_func_group_concat(Name_resolution_context *context_arg,
1012
bool is_distinct, List<Item> *is_select,
1013
SQL_LIST *is_order, String *is_separator);
1015
Item_func_group_concat(Session *session, Item_func_group_concat *item);
1016
~Item_func_group_concat();
1019
enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;}
1020
const char *func_name() const { return "group_concat"; }
1021
virtual Item_result result_type () const { return STRING_RESULT; }
1022
enum_field_types field_type() const
1024
if (max_length/collation.collation->mbmaxlen > CONVERT_IF_BIGGER_TO_BLOB )
1025
return DRIZZLE_TYPE_BLOB;
1027
return DRIZZLE_TYPE_VARCHAR;
1031
void reset_field() { assert(0); } // not used
1032
void update_field() { assert(0); } // not used
1033
bool fix_fields(Session *,Item **);
1034
bool setup(Session *session);
1038
my_decimal *val_decimal(my_decimal *decimal_value)
1040
return val_decimal_from_string(decimal_value);
1042
String* val_str(String* str);
1043
Item *copy_or_same(Session* session);
1044
void no_rows_in_result() {}
1045
virtual void print(String *str, enum_query_type query_type);
1046
virtual bool change_context_processor(unsigned char *cntx)
1047
{ context= (Name_resolution_context *)cntx; return false; }
1050
} /* namespace drizzled */
1052
#endif /* DRIZZLED_ITEM_SUM_H */