26
25
* Stored in T_hitlog as hit_ID, gets set when {@link log()} is called and the hit is logged.
31
* Is the hit already logged?
37
* The referer/referrer.
44
* The type of referer.
33
* Stored in T_hitlog as hit_referer_type, possible values: 'search'|'blacklist'|
34
* 'spam'|'referer'|'direct'|'self'|'admin'. By default this is not set to anything.
46
35
* Note: "spam" referers do not get logged.
47
* 'search'|'blacklist'|'referer'|'direct'|'spam'
54
* The ID of the referer's base domain in T_basedomains
43
* Stored in T_hitlog as hit_referer, from either HTTP_REFERER or _SERVER or getenv().
44
* The stored value is truncated to 500 characters, which should be more than enough.
52
* Stored in T_hitlog as hit_referer_dom_ID, the ID of the referer's base domain in T_basedomains.
58
55
var $referer_domain_ID;
59
* Stored in T_hitlog as hit_serprank, If it is a search hit then the referer's SERP rank, else NULL
63
var $_serprank = NULL;
67
* Stored in T_hitlog as hit_remote_addr, the remote address (IP) of the hit.
75
* The user agent string, cut to 500 characters. Stored in T_hitlog as hit_agent_signature,
76
* the user agent string is truncated to 500 characters, which should be more than enough.
78
* @see Hit::get_user_agent()
85
* Stored in T_hitlog as hit_agent_type, the default setting ('unknown') is
86
* for hits that are not detected as 'rss'|'robot'|'browser'.
88
* @see Hit::detect_useragent()
95
* Stored in T_hitlog as hit_agent_platform, the user agent platform (operating system really)
97
* @see Hit::detect_useragent()
61
107
* Is this a reload?
62
109
* This gets lazy-filled by {@link is_new_view()}.
66
113
var $_is_new_view;
115
* Is the hit already logged?
75
* Remote address (IP).
82
* @see Hit::get_user_agent()
89
120
* The user agent name, eg "safari"
90
121
* @see Hit::get_agent_name()
97
* The user agent platform, eg "mac"
98
* @see Hit::get_agent_platform()
106
126
* The user's remote host.
107
127
* Use {@link get_remote_host()} to access it (lazy filled).
111
130
var $_remoteHost;
114
* The user agent type.
116
* The default setting ('unknown') is taken for new entries (into T_useragents),
117
* that are not detected as 'rss', 'robot' or 'browser'.
118
* 'rss'|'robot'|'browser'|'unknown'
120
* @see Hit::get_agent_type()
127
132
* The ID of the user agent entry in T_useragents.
128
133
* @see Hit::get_agent_ID()
136
138
* Extracted from search referers:
138
140
var $_extracted_keyphrase = false;
139
145
var $_keyphrase = NULL;
140
150
var $_extracted_serprank = false;
141
var $_serprank = NULL;
151
160
// Get the first IP in the list of REMOTE_ADDR and HTTP_X_FORWARDED_FOR
152
161
$this->IP = get_ip_list( true );
154
// Check the REFERER and determine referer_type:
155
// TODO: dh> move this out of here, too, only if "antispam_block_spam_referers" is true,
156
// do something about it (here or somewhere else, but early).
163
// Check the REFERER and determine referer_type
164
// @todo (0000) dh> move this out of here, too, only if "antispam_block_spam_referers"
165
// is true, do something about it (here or somewhere else, but early).
157
166
$this->detect_referer(); // May EXIT if we are set up to block referer spam.
162
171
* Detect admin page
173
* We are inside of admin, this supersedes 'direct' access.
174
* NOTE: this is not really a referer type but more a hit type
164
176
function detect_admin_page()
166
178
if( is_admin_page() )
168
// We are inside of admin, this supersedes 'direct' access
169
// NOTE: this is not really a referer type but more a hit type
170
180
$this->referer_type = 'admin';
178
* Detect Referer (sic!).
179
* Due to potential non-thread safety with getenv() (fallback), we'd better do this early.
188
* Detect Referer (sic!)
181
* referer_type: enum('search', 'blacklist', 'referer', 'direct'); 'spam' gets used internally
190
* Due to potential non-thread safety with getenv() (fallback), we'd better do
191
* this early. referer_type: enum('search', 'blacklist', 'referer', 'direct');
192
* 'spam' gets used internally
183
194
function detect_referer()
185
global $HTTP_REFERER; // might be set by PHP (give highest priority)
187
global $self_referer_list, $blackList, $search_engines; // used to detect $referer_type
198
global $HTTP_REFERER; // might be set by PHP
199
global $search_engines;
200
global $self_referer_list;
188
202
global $templates_path;
204
// detect the referer if possible
191
205
if( isset( $HTTP_REFERER ) )
193
207
// Referer provided by PHP
198
if( isset($_SERVER['HTTP_REFERER']) )
212
// use _SERVER if we can get something
213
if( isset( $_SERVER['HTTP_REFERER'] ) )
200
215
$this->referer = $_SERVER['HTTP_REFERER'];
204
// Fallback method (not thread safe :[[ ) - this function does not work in ISAPI mode
219
// Fallback method (not thread safe), this function does not work in ISAPI mode
205
220
$this->referer = getenv('HTTP_REFERER');
209
if( empty($this->referer) )
225
if( empty( $this->referer ) )
212
227
// This type may be superseeded and set to 'admin'
213
228
if( ! $this->detect_admin_page() )
230
// Not an admin page, assume 'direct'
216
231
$this->referer_type = 'direct';
221
// ANALYZE referer...
223
// Check self referer list, see {@link $self_referer_list}
224
// fplanque: we log these (again), because if we didn't we woudln't detect
225
// reloads on these... and that would be a problem!
236
// Check self referer list (see $self_referer_list)
237
// fplanque: we log these (again), because if we didn't we wouldn't detect
238
// reloads on these ... and that would be a problem!
226
239
foreach( $self_referer_list as $self_referer )
228
241
$pos = strpos( $this->referer, $self_referer );
229
242
// If not starting within in the first 12 chars it's probably an url param as in &url=http://this_blog.com
230
if( $pos !== false && $pos <= 12
231
&& ! ($debug && strpos( $this->referer, '/search.html' ) ) ) // search simulation
243
if( $pos !== false && $pos <= 12 && ! ( $debug && strpos( $this->referer, '/search.html' ) ) )
233
// This type may be superseeded by admin page
245
// This type may be superceded by admin page
234
246
if( ! $this->detect_admin_page() )
248
// Not an admin page, must be self
237
249
$this->referer_type = 'self';
244
// Check blacklist, see {@link $blackList}
245
// NOTE: This is NOT the antispam!!
255
// Check blacklist, see {@link $blackList} NOTE: This is NOT the antispam!!
246
256
// fplanque: we log these (again), because if we didn't we woudln't detect
247
// reloads on these... and that would be a problem!
257
// reloads on these ... and that would be a problem!
248
258
foreach( $blackList as $lBlacklist )
250
260
$pos = strpos( $this->referer, $lBlacklist );
251
261
// If not starting within in the first 12 chars it's probably an url param as in &url=http://this_blog.com
252
262
if( $pos !== false && $pos <= 12 )
254
// This type may be superseeded by admin page
255
// fp> 2009-05-10: because of the 12 char limit above the following is probably no longer needed. Please enable it back if anyone has a problem with admin being detected as blacklist
256
// if( ! $this->detect_admin_page() )
264
if( ! $this->detect_admin_page() )
266
// Not an admin page:
259
267
$this->referer_type = 'blacklist';
266
273
// Check if the referer is valid and does not match the antispam blacklist:
267
// NOTE: requests to admin pages should not arrive here, because they should be matched above through $self_referer_list!
268
if( $error = validate_url( $this->referer, 'commenting' ) )
274
// NOTE: requests to admin pages should not arrive here, because they
275
// should be matched above through $self_referer_list!
276
if( $error = validate_url( $this->referer, 'commenting', true, 'referers' ) )
270
278
// This is most probably referer spam!!
271
279
$this->referer_type = 'spam';
273
if( $Settings->get('antispam_block_spam_referers') )
280
// In order to preserve server resources, we're going to stop processing immediatly (no logging)!!
281
if( $Settings->get( 'antispam_block_spam_referers' ) )
275
// In order to preserve server resources, we're going to stop processing immediatly (no logging)!!
276
283
require $templates_path.'_403_referer_spam.main.php'; // error & exit
277
284
exit(0); // just in case.
281
return; // type "spam"
285
289
// Is the referer a search engine?
286
290
// Note: for debug simulation, you may need to add sth like $search_engines[] = '/credits.html'; into the conf
287
291
foreach( $search_engines as $lSearchEngine )
289
if( stristr($this->referer, $lSearchEngine) ) // search simulation
293
if( stristr( $this->referer, $lSearchEngine ) ) // search simulation
291
295
$this->referer_type = 'search';
446
450
* Detect requests for XML feeds by $template / $viewmode param.
448
* Use $template, if not empty (may be set in /qp_srvc/atom.php
449
* for example), otherwise $viewmode (and if that isn't set, $tempskin
450
* for b2evolution backward compatiblity).
452
* Use $template, if not empty (may be set in /qp_srvc/atom.php for example),
453
* otherwise $viewmode (and if that isn't set, $tempskin for b2evolution
454
* backward compatiblity).
452
* @todo (0000): fp> this is WEAK! Do we really need to know before going into the template?
453
* dh> not necessary, but only where ->agent_type gets used (logging).
454
* @todo (0000): It would be less "weak" to call Template::get_default_type(), but that
456
* @todo (0000) fp> this is WEAK! Do we really need to know before going into
457
* the template? dh> not necessary, but only where ->agent_type gets used (logging).
458
* @todo (0000) It would be less "weak" to call Template::get_default_type(), but that
455
459
* would mean having to instantiate a Template object! Another possibility: Use PHP's
456
460
* magic __get() method. I think combining both approaches would be a good solution
457
461
* (perhaps we could simply check whether the template name begins with an underscore?
458
462
* That would avoid having to instantiate a Template object (hmm, isn't that done
459
* in qp_inc/_blog_main.inc.php anyway?).
463
* in qp_inc/_blog_main.inc.php anyway?).
461
465
if( ! empty( $template ) )
490
494
* Log a hit on a blog page / rss feed
492
* This function should be called at the end of the page, otherwise if the page is
493
* displaying previous hits, it may display the current one too. The hit will not be
494
* logged in special occasions, see {@link $ignore} and {@link is_good_hit()}. It will
495
* call {@link Hitlist::dbprune()} to do the automatic pruning of old hits in case of
496
* auto_prune_stats_mode == "page".
496
* This function should be called at the end of the page, otherwise if the page
497
* is displaying previous hits, it may display the current one too. The hit will
498
* not be logged in special occasions, see {@link $ignore} and {@link is_good_hit()}.
499
* It will call {@link Hitlist::dbprune()} to do the automatic pruning of old
500
* hits in case of auto_prune_stats_mode == "page".
497
502
* @uses Log::should_be_logged()
498
503
* @uses Log::record_the_hit()
499
504
* @return boolean true if the hit gets logged; false if not
604
$hit_uri = substr( $ReqURI, 0, 250 ); // VARCHAR(250) and likely to be longer
605
$hit_referer = substr( $this->referer, 0, 250 ); // VARCHAR(250) and likely to be longer
610
// VARCHAR(250) and likely to be longer
611
$hit_uri = substr( $ReqURI, 0, 250 );
612
// VARCHAR(250) and likely to be longer
613
$hit_referer = substr( $this->referer, 0, 250 );
607
// Extract the keyphrase from search referers:
615
// Extract the keyphrase from search referers
608
616
$keyphrase = $this->get_keyphrase();
626
// Extract the serprank from search referers:
634
// Extract the serprank from search referers
627
635
$serprank = $this->get_serprank();
629
// insert hit into DB table:
631
INSERT INTO T_hitlog(
632
hit_sess_ID, hit_datetime, hit_uri, hit_referer_type,
633
hit_referer, hit_referer_dom_ID, hit_keyphrase_keyp_ID, hit_serprank, hit_blog_ID, hit_remote_addr, hit_agnt_signature, hit_agnt_type )
637
// insert hit into DB table
638
$sql = "INSERT INTO T_hitlog(
639
hit_sess_ID, hit_datetime, hit_uri, hit_referer_type,
640
hit_referer, hit_referer_dom_ID, hit_keyphrase_keyp_ID, hit_serprank, hit_blog_ID, hit_remote_addr, hit_agnt_signature, hit_agnt_type )
634
641
VALUES( '".$Session->ID."', FROM_UNIXTIME(".$localtimenow."), '".$DB->escape( $hit_uri )."', '".$this->referer_type
635
."', '".$DB->escape( $hit_referer )."', ".$DB->null( $this->get_referer_domain_ID() ).', '.$DB->null( $keyp_ID )
636
.', '.$DB->null( $serprank ).', '.$DB->null( $blog_ID ).", '".$DB->escape( $this->IP )."', ".$DB->quote( $this->get_user_agent() ).", ".$DB->quote( $this->get_agent_type() ).'
642
."', '".$DB->escape( $hit_referer )."', ".$DB->null( $this->get_referer_domain_ID() ).', '.$DB->null( $keyp_ID )
643
.', '.$DB->null( $serprank ).', '.$DB->null( $blog_ID ).", '".$DB->escape( $this->IP )."', ".$DB->quote( $this->get_user_agent() ).", ".$DB->quote( $this->get_agent_type() ).'
639
646
$DB->query( $sql, 'Record the hit' );
794
801
elseif( $allow_nslookup )
796
// We allowed reverse DNS lookup
797
// This can be terribly time consuming (4/5 seconds!) when there is no reverse dns available!
798
// This is the case on many intranets and many users' first time installs!!!
799
// Some people end up considering the application core is very slow just because of this line!
800
// This cannot be enabled by default.
803
// We allowed reverse DNS lookup. This can be terribly time consuming (4/5 seconds!)
804
// when there is no reverse dns available! This is the case on many intranets
805
// and many users' first time installs!!! Some people end up considering
806
// the application core is very slow just because of this line! This cannot
807
// be enabled by default.
801
808
$this->_remoteHost = @gethostbyaddr( $this->IP );
816
* Determine if a hit is a new view (not reloaded or from a robot).
818
* 'Reloaded' means: visited before from the same user (in a session) or from same
819
* IP/user_agent in the last {@link $Settings reloadpage_timeout} seconds.
821
* This gets queried by the Item objects before incrementing its view count (if the Item
822
* gets viewed in total ({@link $dispmore})).
824
* @todo (0000): if this is only useful to display who's online or view counts, provide
825
* option to disable all those resource consuming gadgets. (Those gadgets should be
826
* plugins actually, and they should enable this query only if needed)
823
* Determine if a hit is a new view (not reloaded or from a robot)
825
* 'Reloaded' means: visited before from the same user (in a session) or from
826
* same IP/user_agent in the last {@link $Settings reloadpage_timeout} seconds.
828
* This gets queried by the Item objects before incrementing its view count (if
829
* the Item gets viewed in total ({@link $dispmore})).
831
* @todo (0000) if this is only useful to display who's online or view counts,
832
* provide option to disable all those resource consuming gadgets. (Those gadgets
833
* should be plugins actually, and they should enable this query only if needed)
827
834
* blueyed>> Move functionality to Plugin (with a hook in Item::content())?!
828
835
* @return boolean
846
853
// Restrict to current user if logged in
847
854
if( ! empty( $current_User->ID ) )
849
// select by user ID: one user counts really just once. May be even faster than the anonymous query below..!?
856
// select by user ID: one user counts really just once. May be even faster
857
// than the anonymous query below!?
851
859
SELECT hit_ID FROM T_hitlog INNER JOIN T_sessions ON hit_sess_ID = sess_ID
852
WHERE sess_user_ID = ".$current_User->ID."
853
AND hit_uri = '".$DB->escape( substr( $ReqURI, 0, 250 ) )."'
860
WHERE sess_user_ID = ".$current_User->ID."
861
AND hit_uri = '".$DB->escape( substr( $ReqURI, 0, 250 ) )."'
858
866
// select by remote_addr/agnt_signature
862
WHERE hit_datetime > '".date( 'Y-m-d H:i:s', $localtimenow - $Settings->get('reloadpage_timeout') )."'
863
AND hit_remote_addr = ".$DB->quote( $this->IP )."
864
AND hit_uri = '".$DB->escape( substr( $ReqURI, 0, 250 ) )."'
865
AND hit_agnt_signature = ".$DB->quote( $this->get_user_agent() )."
870
WHERE hit_datetime > '".date( 'Y-m-d H:i:s', $localtimenow - $Settings->get('reloadpage_timeout') )."'
871
AND hit_remote_addr = ".$DB->quote( $this->IP )."
872
AND hit_uri = '".$DB->escape( substr( $ReqURI, 0, 250 ) )."'
873
AND hit_agnt_signature = ".$DB->quote( $this->get_user_agent() )."
868
876
if( $DB->get_var( $sql, 0, 0, 'Hit: Check for reload' ) )
870
$this->_is_new_view = false; // We don't want to log this hit again
878
$this->_is_new_view = false; // We don't want to log this hit again
917
$known_search_params = array(
919
'as_q', // Google Advanced Search Query
920
'as_epq', // Google Advanced Search Query
927
'searchfor', // mysearch.myway.com
929
'rdata', // search.ke.voila.fr
931
'su', // suche.web.de
932
'Gw', // scroogle.org
926
$known_search_params = array(
928
'as_q', // Google Advanced Search Query
929
'as_epq', // Google Advanced Search Query
936
'searchfor', // mysearch.myway.com
938
'rdata', // search.ke.voila.fr
940
'su', // suche.web.de
941
'Gw', // scroogle.org
936
945
$ref_params = explode( '&', app_substr( $ref, $pos_question+1 ) );
937
946
foreach( $ref_params as $ref_param )
973
984
* @return boolean
975
986
function is_lynx()
977
988
if( ! isset( $this->is_lynx ) )
978
990
$this->detect_useragent();
979
992
return $this->is_lynx;
983
996
* Is this Firefox?
984
998
* @return boolean
986
1000
function is_firefox()
988
1002
if( ! isset( $this->is_firefox ) )
989
1004
$this->detect_useragent();
990
1006
return $this->is_firefox;
994
1010
* Is this Gecko?
995
1012
* @return boolean
997
1014
function is_gecko()
999
1016
if( ! isset( $this->is_gecko ) )
1000
1018
$this->detect_useragent();
1001
1020
return $this->is_gecko;
1005
1024
* Is this WinIE?
1006
1026
* @return boolean
1008
1028
function is_winIE()
1010
1030
if( ! isset( $this->is_winIE ) )
1011
1032
$this->detect_useragent();
1012
1034
return $this->is_winIE;
1016
1038
* Is this MacIE?
1017
1040
* @return boolean
1019
1042
function is_macIE()
1021
1044
if( ! isset( $this->is_macIE ) )
1022
1046
$this->detect_useragent();
1023
1048
return $this->is_macIE;
1027
1052
* Is this Safari?
1028
1054
* @return boolean
1030
1056
function is_safari()
1032
1058
if( ! isset( $this->is_safari ) )
1033
1060
$this->detect_useragent();
1034
1062
return $this->is_safari;
1038
1066
* Is this Opera?
1039
1068
* @return boolean
1041
1070
function is_opera()
1043
1072
if( ! isset( $this->is_opera ) )
1044
1074
$this->detect_useragent();
1045
1076
return $this->is_opera;
1049
1080
* Is this Netscape4?
1050
1082
* @return boolean
1052
1084
function is_NS4()
1054
1086
if( ! isset( $this->NS4 ) )
1055
1088
$this->detect_useragent();
1056
1090
return $this->is_NS4;