3
A brief file description
5
@section license License
7
Licensed to the Apache Software Foundation (ASF) under one
8
or more contributor license agreements. See the NOTICE file
9
distributed with this work for additional information
10
regarding copyright ownership. The ASF licenses this file
11
to you under the Apache License, Version 2.0 (the
12
"License"); you may not use this file except in compliance
13
with the License. You may obtain a copy of the License at
15
http://www.apache.org/licenses/LICENSE-2.0
17
Unless required by applicable law or agreed to in writing, software
18
distributed under the License is distributed on an "AS IS" BASIS,
19
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
See the License for the specific language governing permissions and
21
limitations under the License.
28
#include "ProxyConfig.h"
29
#include "StatSystem.h"
30
#include "HttpUpdateSM.h"
31
#include "HttpDebugNames.h"
34
//#include "MixtAPIInternal.h"
36
RecRawStatBlock *update_rsb;
38
#define UpdateEstablishStaticConfigInteger(_ix,_n) \
39
REC_EstablishStaticConfigInteger(_ix,_n); \
41
#define UPDATE_INCREMENT_DYN_STAT(x) \
42
RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, 1);
43
#define UPDATE_DECREMENT_DYN_STAT(x) \
44
RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, -1);
45
#define UPDATE_READ_DYN_STAT(x, C, S) \
46
RecGetRawStatCount(update_rsb, (int) x, &C); \
47
RecGetRawStatSum(update_rsb, (int) x, &S);
49
#define UPDATE_CLEAR_DYN_STAT(x) \
51
RecSetRawStatSum(update_rsb, x, 0); \
52
RecSetRawStatCount(update_rsb, x, 0); \
55
#define UPDATE_ConfigReadInteger REC_ConfigReadInteger
56
#define UPDATE_ConfigReadString REC_ConfigReadString
57
#define UPDATE_RegisterConfigUpdateFunc REC_RegisterConfigUpdateFunc
61
// Fundamental constants
63
static const char *const GET_METHOD = "GET ";
64
static const char *const HTTP_VERSION = " HTTP/1.0";
65
static const char *const REQUEST_TERMINATOR = "\r\n\r\n";
66
static const char *const TERMINATOR = "\r\n";
67
static const char *const HTML_COMMENT_TAG = "!--";
68
static const char *const HTML_COMMENT_END = "-->";
69
static const int MAX_LINE_LENGTH = (32 * 1024);
71
// Fundamental constants initialized by UpdateManager::start()
73
static int len_GET_METHOD = 0;
74
static int len_HTTP_VERSION = 0;
75
static int len_REQUEST_TERMINATOR = 0;
76
static int len_TERMINATOR = 0;
78
struct html_tag update_allowable_html_tags[] = {
82
{"body", "background"},
92
{"base", "href"}, // special handling
93
{"meta", "content"}, // special handling
97
struct schemes_descriptor
103
struct schemes_descriptor proto_schemes[] = {
136
struct schemes_descriptor supported_proto_schemes[] = {
141
static int global_id = 1;
147
for (n = 0; proto_schemes[n].tag; ++n) {
148
proto_schemes[n].tag_len = strlen(proto_schemes[n].tag);
153
init_supported_proto_schemes()
156
for (n = 0; supported_proto_schemes[n].tag; ++n) {
157
supported_proto_schemes[n].tag_len = strlen(supported_proto_schemes[n].tag);
161
///////////////////////////////////////////////////////////////////////////////
162
// Class UpdateConfigParams
163
// Global subsystem configuration parameters
164
///////////////////////////////////////////////////////////////////////////////
166
UpdateConfigParams::UpdateConfigParams():
167
_enabled(0), _immediate_update(0), _retry_count(0),
168
_retry_interval(0), _concurrent_updates(0), _max_update_state_machines(0), _memory_use_in_mb(0)
172
UpdateConfigParams::UpdateConfigParams(UpdateConfigParams & p)
174
_enabled = p._enabled;
175
_immediate_update = p._immediate_update;
176
_retry_count = p._retry_count;
177
_retry_interval = p._retry_interval;
178
_concurrent_updates = p._concurrent_updates;
179
_max_update_state_machines = p._max_update_state_machines;
180
_memory_use_in_mb = p._memory_use_in_mb;
183
UpdateConfigParams::~UpdateConfigParams()
187
UpdateConfigParams & UpdateConfigParams::operator=(UpdateConfigParams & p)
189
_enabled = p._enabled;
190
_immediate_update = p._immediate_update;
191
_retry_count = p._retry_count;
192
_retry_interval = p._retry_interval;
193
_concurrent_updates = p._concurrent_updates;
194
_max_update_state_machines = p._max_update_state_machines;
195
_memory_use_in_mb = p._memory_use_in_mb;
200
UpdateConfigParams::operator==(UpdateConfigParams & p)
202
if (_enabled != p._enabled)
204
if (_immediate_update != p._immediate_update)
206
if (_retry_count != p._retry_count)
208
if (_retry_interval != p._retry_interval)
210
if (_concurrent_updates != p._concurrent_updates)
212
if (_max_update_state_machines != p._max_update_state_machines)
214
if (_memory_use_in_mb != p._memory_use_in_mb)
219
/////////////////////////////////////////////////////////////////////////////
221
// Per update object descriptor
222
/////////////////////////////////////////////////////////////////////////////
224
UpdateEntry::UpdateEntry():_group_link(0), _hash_link(0), _id(0), _url(0),
225
_URLhandle(), _terminal_url(0),
226
_request_headers(0), _num_request_headers(0),
228
_offset_hour(0), _interval(0), _max_depth(0), _start_time(0), _expired(0), _scheme_index(-1), _update_event_status(0)
230
http_parser_init(&_http_parser);
233
UpdateEntry::~UpdateEntry()
239
if (_URLhandle.valid()) {
240
_URLhandle.destroy();
242
if (_request_headers) {
243
xfree(_request_headers);
244
_request_headers = NULL;
246
// INKqa12891: _http_hdr can be NULL
247
if (_http_hdr && _http_hdr->valid()) {
248
_http_hdr->destroy();
252
_indirect_list = NULL;
256
UpdateEntry::Init(int derived_url)
258
_id = ink_atomic_increment(&global_id, 1);
262
ComputeScheduleTime();
265
const char *scheme = _URLhandle.scheme_get(&scheme_len);
266
if (scheme != URL_SCHEME_HTTP) {
267
// Depth is only valid for scheme "http"
274
UpdateEntry::ValidURL(char *s, char *e)
276
// Note: string 's' is null terminated.
278
const char *url_start = s;
282
_URLhandle.create(NULL);
283
err = _URLhandle.parse(&url_start, url_end);
286
return 0; // Valid URL
288
_URLhandle.destroy();
289
return 1; // Invalid URL
295
UpdateEntry::ValidHeaders(char *s, char *e)
298
// Note: string 's' is null terminated.
302
FIND_START_OF_HEADER_NAME = 1,
303
SCAN_FOR_HEADER_NAME,
304
SCAN_FOR_END_OF_HEADER_VALUE
310
int end_of_headers = 0;
311
int scan_state = FIND_START_OF_HEADER_NAME;
314
switch (scan_state) {
315
case FIND_START_OF_HEADER_NAME:
317
if (!ValidHeaderNameChar(*p)) {
321
scan_state = SCAN_FOR_HEADER_NAME;
325
case SCAN_FOR_HEADER_NAME:
327
if (!ValidHeaderNameChar(*p)) {
329
scan_state = SCAN_FOR_END_OF_HEADER_VALUE;
340
case SCAN_FOR_END_OF_HEADER_VALUE:
344
if (*(t + 1) == '\n') {
346
++_num_request_headers;
347
scan_state = FIND_START_OF_HEADER_NAME;
356
++_num_request_headers;
367
if (_num_request_headers) {
368
return 1; // Fail; Bad header with > 1 valid headers
371
return 0; // OK; user specified no headers
373
return 1; // Fail; first header is invalid
377
if (end_of_headers) {
385
// At least 1 valid header exists
387
_request_headers = xstrdup(s);
388
return 0; // OK; > 1 valid headers
392
UpdateEntry::BuildHttpRequest()
394
// Given the HTTP request and associated headers,
395
// transform the data into a HTTPHdr object.
397
char request[MAX_LINE_LENGTH];
400
request_size = len_GET_METHOD + strlen(_url) +
401
len_HTTP_VERSION + (_request_headers ? len_TERMINATOR + strlen(_request_headers) : 0) + len_REQUEST_TERMINATOR + 1;
402
if (request_size > MAX_LINE_LENGTH) {
405
if (_request_headers) {
406
snprintf(request, sizeof(request), "%s%s%s%s%s%s", GET_METHOD, _url,
407
HTTP_VERSION, TERMINATOR, _request_headers, REQUEST_TERMINATOR);
409
snprintf(request, sizeof(request), "%s%s%s%s", GET_METHOD, _url, HTTP_VERSION, REQUEST_TERMINATOR);
411
_http_hdr = NEW(new HTTPHdr);
412
http_parser_init(&_http_parser);
413
_http_hdr->create(HTTP_TYPE_REQUEST);
415
const char *start = request;
416
const char *end = start + request_size - 1;
418
while (start < end) {
419
err = _http_hdr->parse_req(&_http_parser, &start, end, false);
420
if (err != PARSE_CONT) {
423
end = start + strlen(start);
425
http_parser_clear(&_http_parser);
430
UpdateEntry::ValidHeaderNameChar(char c)
432
if ((c > 31) && (c < 127)) {
433
if (ValidSeparatorChar(c)) {
444
UpdateEntry::ValidSeparatorChar(char c)
466
return 1; // Valid separator char
473
UpdateEntry::ValidHour(char *s, char *e)
476
// Note: string 's' is null terminated.
478
_offset_hour = atoi(s);
479
if ((_offset_hour >= MIN_OFFSET_HOUR) && (_offset_hour <= MAX_OFFSET_HOUR)) {
480
return 0; // Valid data
482
return 1; // Invalid data
487
UpdateEntry::ValidInterval(char *s, char *e)
490
// Note: string 's' is null terminated.
493
if ((_interval >= MIN_INTERVAL) && (_interval <= MAX_INTERVAL)) {
494
return 0; // Valid data
496
return 1; // Invalid data
502
UpdateEntry::ValidDepth(char *s, char *e)
505
// Note: string 's' is null terminated.
507
_max_depth = atoi(s);
508
if ((_max_depth >= MIN_DEPTH) && (_max_depth <= MAX_DEPTH)) {
509
return 0; // Valid data
511
return 1; // Invalid data
517
UpdateEntry::SetTerminalStatus(int term_url)
519
_terminal_url = term_url;
523
UpdateEntry::TerminalURL()
525
return _terminal_url;
530
UpdateEntry::ComputeScheduleTime()
534
time_t start_time_delta;
544
ht = ink_get_based_hrtime();
545
cur_time = ht / HRTIME_SECOND;
546
ink_localtime_r(&cur_time, &cur_tm);
550
if (cur_tm.tm_hour == _offset_hour) {
551
start_time_delta = 24 * SECONDS_PER_HOUR;
553
} else if (cur_tm.tm_hour < _offset_hour) {
554
start_time_delta = (_offset_hour - cur_tm.tm_hour) * SECONDS_PER_HOUR;
557
start_time_delta = ((24 - cur_tm.tm_hour) + _offset_hour) * SECONDS_PER_HOUR;
559
start_time_delta -= ((cur_tm.tm_min * SECONDS_PER_MIN) + cur_tm.tm_sec);
560
_start_time = cur_time + start_time_delta;
563
// Compute next start time
564
_start_time += _interval;
569
UpdateEntry::ScheduleNow(time_t cur_time)
571
if (cur_time >= _start_time) {
579
/////////////////////////////////////////////////////////////////////////////
580
// Class UpdateConfigList
581
// Container for UpdateEntry objects
582
/////////////////////////////////////////////////////////////////////////////
583
UpdateConfigList::UpdateConfigList():_entry_q_elements(0), _pending_q_elements(0), _hash_table(0)
587
UpdateConfigList::~UpdateConfigList()
596
UpdateConfigList::Add(UpdateEntry * e)
603
UpdateConfigList::HashAdd(UpdateEntry * e)
605
uint64_t folded64 = e->_url_md5.fold();
606
ink_assert(folded64);
607
int32_t index = folded64 % HASH_TABLE_SIZE;
610
// One time initialization
612
_hash_table = NEW(new UpdateEntry *[HASH_TABLE_SIZE]);
613
memset((char *) _hash_table, 0, (sizeof(UpdateEntry *) * HASH_TABLE_SIZE));
615
// Add to hash table only if unique
617
UpdateEntry *he = _hash_table[index];
618
UpdateEntry **last_link = &_hash_table[index];
621
if (e->_url_md5 == he->_url_md5) {
622
return 1; // duplicate detected
624
last_link = &he->_hash_link;
629
// Entry is unique, add to hash list
631
e->_hash_link = *last_link;
634
// Add to entry queue
638
return 0; // Entry added
642
UpdateConfigList::Remove()
644
UpdateEntry *e = _entry_q.dequeue();
652
UpdateConfigList::AddPending(UpdateEntry * e)
654
_pending_q_elements++;
655
_pending_q.enqueue(e);
659
UpdateConfigList::RemovePending()
661
UpdateEntry *e = _pending_q.dequeue();
663
_pending_q_elements--;
668
/////////////////////////////////////////////////////////////////////////////
669
// Class UpdateManager
670
// External interface to Update subsystem
671
/////////////////////////////////////////////////////////////////////////////
673
UpdateManager::UpdateManager():_CM(0), _SCH(0)
677
UpdateManager::~UpdateManager()
682
UpdateManager::start()
684
// Initialize fundamental constants
686
len_GET_METHOD = strlen(GET_METHOD);
687
len_HTTP_VERSION = strlen(HTTP_VERSION);
688
len_REQUEST_TERMINATOR = strlen(REQUEST_TERMINATOR);
689
len_TERMINATOR = strlen(TERMINATOR);
690
init_proto_schemes();
691
init_supported_proto_schemes();
693
_CM = NEW(new UpdateConfigManager);
696
_SCH = NEW(new UpdateScheduler(_CM));
702
UpdateManager updateManager;
704
typedef int (UpdateConfigManager::*UpdateConfigManagerContHandler) (int, void *);
705
/////////////////////////////////////////////////////////////////////////////
706
// Class UpdateConfigManager
707
// Handle Update subsystem global configuration and URL list updates
708
/////////////////////////////////////////////////////////////////////////////
709
UpdateConfigManager::UpdateConfigManager()
710
:Continuation(new_ProxyMutex()), _periodic_event(0), _filename(0)
712
SET_HANDLER((UpdateConfigManagerContHandler)
713
& UpdateConfigManager::ProcessUpdate);
716
UpdateConfigManager::~UpdateConfigManager()
720
static RecInt local_http_server_port = 0;
723
UpdateConfigManager::init()
725
update_rsb = RecAllocateRawStatBlock((int) update_stat_count);
727
UpdateEstablishStaticConfigInteger(local_http_server_port, "proxy.config.http.server_port");
729
_CP_actual = NEW(new UpdateConfigParams);
731
// Setup update handlers for each global configuration parameter
733
UpdateEstablishStaticConfigInteger(_CP_actual->_enabled, "proxy.config.update.enabled");
735
UpdateEstablishStaticConfigInteger(_CP_actual->_immediate_update, "proxy.config.update.force");
737
UpdateEstablishStaticConfigInteger(_CP_actual->_retry_count, "proxy.config.update.retry_count");
739
UpdateEstablishStaticConfigInteger(_CP_actual->_retry_interval, "proxy.config.update.retry_interval");
741
UpdateEstablishStaticConfigInteger(_CP_actual->_concurrent_updates, "proxy.config.update.concurrent_updates");
743
UpdateEstablishStaticConfigInteger(_CP_actual->_max_update_state_machines,
744
"proxy.config.update.max_update_state_machines");
746
UpdateEstablishStaticConfigInteger(_CP_actual->_memory_use_in_mb, "proxy.config.update.memory_use_mb");
748
// Register Scheduled Update stats
750
RecRegisterRawStat(update_rsb, RECT_PROCESS,
751
"proxy.process.update.successes",
752
RECD_INT, RECP_NON_PERSISTENT, (int) update_successes_stat, RecRawStatSyncCount);
753
UPDATE_CLEAR_DYN_STAT(update_successes_stat);
755
RecRegisterRawStat(update_rsb, RECT_PROCESS,
756
"proxy.process.update.no_actions",
757
RECD_INT, RECP_NON_PERSISTENT, (int) update_no_actions_stat, RecRawStatSyncCount);
758
UPDATE_CLEAR_DYN_STAT(update_no_actions_stat);
760
RecRegisterRawStat(update_rsb, RECT_PROCESS,
761
"proxy.process.update.fails",
762
RECD_INT, RECP_NON_PERSISTENT, (int) update_fails_stat, RecRawStatSyncCount);
763
UPDATE_CLEAR_DYN_STAT(update_fails_stat);
765
RecRegisterRawStat(update_rsb, RECT_PROCESS,
766
"proxy.process.update.unknown_status",
767
RECD_INT, RECP_NON_PERSISTENT, (int) update_unknown_status_stat, RecRawStatSyncCount);
768
UPDATE_CLEAR_DYN_STAT(update_unknown_status_stat);
770
RecRegisterRawStat(update_rsb, RECT_PROCESS,
771
"proxy.process.update.state_machines",
772
RECD_INT, RECP_NON_PERSISTENT, (int) update_state_machines_stat, RecRawStatSyncCount);
773
UPDATE_CLEAR_DYN_STAT(update_state_machines_stat);
776
"Update params: enable %d force %d rcnt %d rint %d updates %d "
778
_CP_actual->_enabled, _CP_actual->_immediate_update,
779
_CP_actual->_retry_count, _CP_actual->_retry_interval,
780
_CP_actual->_concurrent_updates, _CP_actual->_max_update_state_machines, _CP_actual->_memory_use_in_mb);
782
// Make working and actual global config copies equal
784
_CP = NEW(new UpdateConfigParams(*_CP_actual));
786
// Setup "update.config" update handler
788
SetFileName((char *) "update.config");
789
REC_RegisterConfigUpdateFunc("proxy.config.update.update_configuration", URL_list_update_callout, (void *) this);
791
// Simulate configuration update to sync working and current databases
793
handleEvent(EVENT_IMMEDIATE, (Event *) NULL);
795
// Setup periodic to detect global config updates
797
_periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
803
UpdateConfigManager::GetConfigParams(Ptr<UpdateConfigParams> *P)
805
MUTEX_TRY_LOCK(lock, mutex, this_ethread());
807
return 0; // Try again later
815
UpdateConfigManager::GetConfigList(Ptr<UpdateConfigList> *L)
817
MUTEX_TRY_LOCK(lock, mutex, this_ethread());
819
return 0; // Try again later
827
UpdateConfigManager::URL_list_update_callout(const char *name, RecDataT data_type, RecData data, void *cookie)
830
NOWARN_UNUSED(data_type);
831
UpdateConfigManager *cm = (UpdateConfigManager *) cookie;
832
cm->SetFileName((char *) data.rec_string);
835
// URL update may block in file i/o.
836
// Reschedule on ET_CACHE thread.
838
eventProcessor.schedule_imm(cm, ET_CACHE);
844
UpdateConfigManager::ProcessUpdate(int event, Event * e)
846
if (event == EVENT_IMMEDIATE) {
847
////////////////////////////////////////////////////////////////////
848
// EVENT_IMMEDIATE -- URL list update
849
////////////////////////////////////////////////////////////////////
851
UpdateConfigList *l = NULL;
853
l = BuildUpdateList();
860
if (event == EVENT_INTERVAL) {
861
////////////////////////////////////////////////////////////////////
862
// EVENT_INTERVAL -- Global configuration update check
863
////////////////////////////////////////////////////////////////////
865
UpdateConfigParams *p = NEW(new UpdateConfigParams(*_CP_actual));
869
Debug("update", "enable %d force %d rcnt %d rint %d updates %d mem %d",
870
p->_enabled, p->_immediate_update, p->_retry_count,
871
p->_retry_interval, p->_concurrent_updates, p->_max_update_state_machines, p->_memory_use_in_mb);
877
// Unknown event, ignore it.
879
Debug("update", "ProcessUpdate: Unknown event %d 0x%x", event, e);
884
UpdateConfigManager::BuildUpdateList()
886
// Build pathname to "update.config" and open file
888
char ConfigFilePath[PATH_NAME_MAX];
890
ink_strncpy(ConfigFilePath, system_config_directory, sizeof(ConfigFilePath));
891
strncat(ConfigFilePath, "/", sizeof(ConfigFilePath) - strlen(ConfigFilePath) - 1);
892
strncat(ConfigFilePath, _filename, sizeof(ConfigFilePath) - strlen(ConfigFilePath) - 1);
894
return (UpdateConfigList *) NULL;
898
// O_BINARY to avoid translation of CR-LF
899
int fd = open(ConfigFilePath, O_RDONLY | O_BINARY);
901
int fd = open(ConfigFilePath, O_RDONLY);
904
Warning("read update.config, open failed");
905
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, open failed");
906
return (UpdateConfigList *) NULL;
908
return ParseConfigFile(fd);
912
UpdateConfigManager::GetDataLine(int fd, int bufsize, char *buf, int field_delimiters, int delimiter)
915
int linesize = bufsize;
919
while ((rlen = ink_file_fd_readline(fd, linesize, line)) > 0) {
920
////////////////////////////////////////////////////////////////////
922
// 1) ink_file_fd_readline() null terminates returned buffer
923
// 2) Input processing guarantees that the item delimiter '\'
924
// does not exist in any data field.
925
////////////////////////////////////////////////////////////////////
927
// Just return data if we have a comment line
929
if (!bytes_read && *line == '#') {
934
// Determine if we have a complete line.
937
int delimiters_found = 0;
940
if (*p == delimiter) {
945
if (delimiters_found == field_delimiters) {
946
// We have a complete line.
949
} else if ((delimiters_found == (field_delimiters - 1))
950
&& (*(p - 1) == '\n')) {
951
// End of line not delimited.
952
// Fix it and consider it a complete line.
965
UpdateConfigManager::ParseConfigFile(int f)
968
"update.config" line syntax:
969
<URL>\<Request Headers>\<Offset Hour>\<Interval>\<Recursion depth>\
973
{ F_URL, F_HEADERS, F_HOUR, F_INTERVAL, F_DEPTH, F_ITEMS };
974
char *p_start[F_ITEMS];
975
char *p_end[F_ITEMS];
977
char line[MAX_LINE_LENGTH];
983
UpdateEntry *e = NULL;
984
UpdateConfigList *ul = NEW(new UpdateConfigList);
986
while (GetDataLine(f, sizeof(line) - 1, line, F_ITEMS, '\\') > 0) {
996
for (i = 0; i < F_ITEMS; ++i) {
998
p_end[i] = strchr(p, '\\');
999
*p_end[i] = 0; // Null terminate string
1004
Warning("read update.config, invalid syntax, line %d", ln);
1005
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid syntax");
1011
goto abort_processing;
1013
// Validate data fields
1015
e = NEW(new UpdateEntry);
1017
////////////////////////////////////
1019
////////////////////////////////////
1020
if (e->ValidURL(p_start[F_URL], p_end[F_URL])) {
1021
Warning("read update.config, invalid URL field, line %d", ln);
1022
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid URL field");
1023
goto abort_processing;
1025
////////////////////////////////////
1027
////////////////////////////////////
1028
if (e->ValidHeaders(p_start[F_HEADERS], p_end[F_HEADERS])) {
1029
Warning("read update.config, invalid headers field, line %d", ln);
1030
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid headers field");
1031
goto abort_processing;
1033
/////////////////////////////////////////////////////////////
1034
// Convert request (URL+Headers) into HTTPHdr format.
1035
/////////////////////////////////////////////////////////////
1036
if (e->BuildHttpRequest()) {
1037
Warning("read update.config, header processing error, line %d", ln);
1038
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, header processing error");
1039
goto abort_processing;
1041
////////////////////////////////////
1043
////////////////////////////////////
1044
if (e->ValidHour(p_start[F_HOUR], p_end[F_HOUR])) {
1045
Warning("read update.config, invalid hour field, line %d", ln);
1046
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid hour field");
1047
goto abort_processing;
1049
////////////////////////////////////
1050
// Validate interval
1051
////////////////////////////////////
1052
if (e->ValidInterval(p_start[F_INTERVAL], p_end[F_INTERVAL])) {
1053
Warning("read update.config, invalid interval field, line %d", ln);
1054
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid interval field");
1055
goto abort_processing;
1057
////////////////////////////////////
1058
// Validate recursion depth
1059
////////////////////////////////////
1060
if (e->ValidDepth(p_start[F_DEPTH], p_end[F_DEPTH])) {
1061
Warning("read update.config, invalid depth field, line %d", ln);
1062
SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid depth field");
1063
goto abort_processing;
1065
// Valid entry, add to list
1069
"[%d] [%s] [%s] nhdrs %d hour %d interval %d depth %d",
1070
e->_id, e->_url, e->_request_headers, e->_num_request_headers, e->_offset_hour, e->_interval, e->_max_depth);
1075
// All file entries are valid.
1088
return (UpdateConfigList *) NULL;
1091
/////////////////////////////////////////////////////////////////////////////
1092
// Class UpdateScheduler
1093
// Handle scheduling of UpdateEntry objects
1094
/////////////////////////////////////////////////////////////////////////////
1095
UpdateScheduler::UpdateScheduler(UpdateConfigManager * c)
1096
:Continuation(new_ProxyMutex()), _periodic_event(0),
1097
_recursive_update(0), _CM(c), _schedule_event_callbacks(0), _update_state_machines(0), _base_EN(0), _parent_US(0)
1099
SET_HANDLER((UpdateSchedulerContHandler)
1100
& UpdateScheduler::ScheduleEvent);
1103
UpdateScheduler::~UpdateScheduler()
1108
UpdateScheduler::Init()
1110
_recursive_update = 0;
1111
_periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
1116
UpdateScheduler::Init(UpdateScheduler * us, UpdateEntry * ue, Ptr<UpdateConfigParams> p)
1118
ink_assert(ue->_indirect_list->Entries());
1120
_recursive_update = 1;
1122
_CL = ue->_indirect_list;
1126
// Schedule entries for update by moving entries to pending queue.
1129
while ((e = _CL->Remove())) {
1132
_periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
1137
UpdateScheduler::ScheduleEvent(int event, void *e)
1139
UpdateEntry *ue = NULL;
1140
int update_complete = 1;
1142
if (event == EVENT_IMMEDIATE) {
1143
//////////////////////////////////////////////////////////////////////
1144
// Callback on update completion from Update State Machine
1145
//////////////////////////////////////////////////////////////////////
1146
ue = (UpdateEntry *) e;
1148
switch (ue->_update_event_status) {
1149
case UPDATE_EVENT_SUCCESS:
1151
Debug("update", "%s update complete, UPDATE_EVENT_SUCCESS id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
1152
UPDATE_INCREMENT_DYN_STAT(update_successes_stat);
1154
if ((ue->_max_depth > 0) && ue->_indirect_list) {
1155
if (ue->_indirect_list->Entries()) {
1156
//////////////////////////////////////////////////////////
1157
// Recursive update case.
1158
// At this point, we have a list of URLs which was
1159
// recursively derived from the base URL.
1160
// Instantiate UpdateScheduler to process this URL list.
1161
//////////////////////////////////////////////////////////
1162
Debug("update", "Starting UpdateScheduler for id: %d [%s]", ue->_id, ue->_url);
1163
UpdateScheduler *us = NEW(new UpdateScheduler());
1164
us->Init(this, ue, _CP);
1165
update_complete = 0;
1168
ue->_indirect_list = NULL;
1173
case UPDATE_EVENT_SUCCESS_NOACTION:
1176
"%s update complete, UPDATE_EVENT_SUCCESS_NOACTION id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
1177
UPDATE_INCREMENT_DYN_STAT(update_no_actions_stat);
1180
case UPDATE_EVENT_FAILED:
1182
Debug("update", "%s update complete, UPDATE_EVENT_FAILED id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
1183
UPDATE_INCREMENT_DYN_STAT(update_fails_stat);
1189
"%s update complete, unknown status %d, id: %d",
1190
(_recursive_update ? "(R)" : ""), ue->_update_event_status, ue->_id);
1191
UPDATE_INCREMENT_DYN_STAT(update_unknown_status_stat);
1196
if (update_complete) {
1197
if (!_recursive_update) {
1198
/////////////////////////////////////////////////////////
1199
// Recompute expire time and place entry back on list
1200
/////////////////////////////////////////////////////////
1202
ue->ComputeScheduleTime();
1203
_CL->Add(ue); // Place back on list
1208
--_update_state_machines;
1209
UPDATE_DECREMENT_DYN_STAT(update_state_machines_stat);
1211
////////////////////////////////////////////////////////////////
1212
// Start another update SM if scheduling is allowed
1213
// and an entry exists on the pending list.
1214
////////////////////////////////////////////////////////////////
1216
if (Schedule() < 0) {
1217
// Scheduling allowed, but nothing to schedule
1218
if (_update_state_machines == 0) {
1219
//////////////////////////////////////////////////////////////
1220
// No more active updates, deallocate config/entry structures
1221
//////////////////////////////////////////////////////////////
1226
if (_recursive_update) {
1228
// Recursive list update is now complete.
1229
// Callback parent UpdateScheduler.
1231
_periodic_event->cancel();
1232
_base_EN->_indirect_list = NULL;
1233
_base_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
1235
SET_HANDLER((UpdateSchedulerContHandler)
1236
& UpdateScheduler::ChildExitEventHandler);
1237
handleEvent(EVENT_IMMEDIATE, 0);
1243
//////////////////////////////////////
1244
// Periodic event callback
1245
//////////////////////////////////////
1246
if (event == EVENT_INTERVAL) {
1247
++_schedule_event_callbacks;
1249
// Unknown event, ignore it.
1250
Debug("update", "UpdateScheduler::ScheduleEvent unknown event %d", event);
1255
// No updates pending, attempt to schedule any expired updates
1257
if (!_CM->GetConfigParams(&_CP)) {
1258
return EVENT_CONT; // Missed lock, try at next event
1260
if (!_CM->GetConfigList(&_CL)) {
1262
return EVENT_CONT; // Missed lock, try at next event
1264
// Cannot do anything unless we have valid params and list
1269
return EVENT_CONT; // try at next event
1271
// Determine if the subsystem is enabled
1273
if (!_CP->IsEnabled()) {
1276
return EVENT_CONT; // try at next event
1280
///////////////////////////////////////////////////////////////////
1281
// Updates pending from last schedule event, attempt to restart
1282
// additional update SM(s).
1283
///////////////////////////////////////////////////////////////////
1288
ink_release_assert(!_update_state_machines);
1290
///////////////////////////////////////////////////////
1291
// Scan entry list and schedule expired updates
1292
///////////////////////////////////////////////////////
1294
ink_hrtime ht = ink_get_based_hrtime();
1295
time_t cur_time = ht / HRTIME_SECOND;
1296
Queue<UpdateEntry> no_action_q;
1299
while ((ue = _CL->Remove())) {
1300
time_expired = ue->ScheduleNow(cur_time);
1301
if (time_expired || _CP->ImmediateUpdate()) {
1302
if (Schedule(ue) > 0) {
1303
Debug("update", "%s and started id: %d", time_expired ? "expired" : "force expire", ue->_id);
1305
Debug("update", "%s with deferred start id: %d", time_expired ? "expired" : "force expire", ue->_id);
1309
no_action_q.enqueue(ue);
1313
// Place no_action_q elements back on list
1315
while ((ue = no_action_q.dequeue())) {
1319
if (!_update_state_machines && !_CL->_pending_q.head) {
1320
// Nothing active or pending.
1321
// Drop references to config/param structures.
1330
UpdateScheduler::ChildExitEventHandler(int event, Event * e)
1334
case EVENT_IMMEDIATE:
1335
case EVENT_INTERVAL:
1337
MUTEX_TRY_LOCK(lock, _parent_US->mutex, this_ethread());
1339
Debug("update", "Child UpdateScheduler exit id: %d", _base_EN->_id);
1340
_parent_US->handleEvent(EVENT_IMMEDIATE, _base_EN);
1344
// Lock miss, try again later.
1345
eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
1351
ink_release_assert(!"UpdateScheduler::ChildExitEventHandler invalid event");
1359
UpdateScheduler::Schedule(UpdateEntry * e)
1361
// Return > 0, UpdateEntry scheduled
1362
// Return == 0, Scheduling not allowed
1363
// Return < 0, Scheduling allowed, but nothing to schedule
1366
UpdateEntry *ue = e;
1369
int max_concurrent_updates;
1371
UPDATE_READ_DYN_STAT(update_state_machines_stat, count, sum);
1372
if (_CP->ConcurrentUpdates() < _CP->MaxUpdateSM()) {
1373
max_concurrent_updates = _CP->ConcurrentUpdates();
1375
max_concurrent_updates = _CP->MaxUpdateSM();
1377
allow_schedule = (sum < max_concurrent_updates);
1379
if (allow_schedule) {
1380
ue = ue ? ue : _CL->RemovePending();
1382
++_update_state_machines;
1383
UPDATE_INCREMENT_DYN_STAT(update_state_machines_stat);
1384
usm = NEW(new UpdateSM(this, _CP, ue));
1387
Debug("update", "%s %s start update id: %d [%s]",
1388
(_recursive_update ? "(R)" : ""), (e ? "directed" : "speculative"), ue->_id, ue->_url);
1390
return 1; // UpdateEntry scheduled
1392
return -1; // Scheduling allowed but nothing to schedule
1397
_CL->AddPending(ue);
1399
return 0; // Scheduling not allowed
1403
/////////////////////////////////////////////////////////////////////////////
1405
// State machine which handles object update action
1406
/////////////////////////////////////////////////////////////////////////////
1407
UpdateSM::UpdateSM(UpdateScheduler * us, Ptr<UpdateConfigParams> p, UpdateEntry * e)
1408
:Continuation(new_ProxyMutex()), _state(USM_INIT), _return_status(0), _retries(0)
1410
SET_HANDLER((UpdateSMContHandler) & UpdateSM::HandleSMEvent);
1416
UpdateSM::~UpdateSM()
1418
_CP = NULL; // drop reference
1424
eventProcessor.schedule_imm(this, ET_CACHE);
1428
UpdateSM::HandleSMEvent(int event, Event * e)
1435
////////////////////////////////////////////////////////////////////
1436
// Cluster considerations.
1437
// For non-recursive URL(s), only process it if the cluster
1438
// hash returns this node. Recursive URL(s) are processed by
1439
// all nodes in the cluster.
1440
////////////////////////////////////////////////////////////////////
1441
if (_EN->_max_depth > 0) {
1442
// Recursive URL(s) are processed by all nodes.
1443
_state = USM_PROCESS_URL;
1448
Cache::generate_key(&url_md5, &_EN->_URLhandle, (_EN->_num_request_headers ? _EN->_http_hdr : NULL));
1449
Cache::generate_key(&url_md5, &_EN->_URLhandle, _EN->_http_hdr);
1450
ClusterMachine *m = cluster_machine_at_depth(cache_hash(url_md5));
1452
// URL hashed to remote node, do nothing.
1454
_EN->_update_event_status = UPDATE_EVENT_SUCCESS_NOACTION;
1457
// URL hashed to local node, start processing.
1458
_state = USM_PROCESS_URL;
1462
case USM_PROCESS_URL:
1464
///////////////////////////////////
1465
// Dispatch to target handler
1466
///////////////////////////////////
1470
_state = USM_PROCESS_URL_COMPLETION;
1471
scheme = _EN->_URLhandle.scheme_get(&scheme_len);
1472
for (n = 0; n < N_SCHEMES; ++n) {
1473
if (scheme == *scheme_dispatch_table[n].scheme) {
1474
_EN->_scheme_index = n;
1475
if ((*scheme_dispatch_table[n].func) (this)) {
1476
break; // Error in initiation
1481
// Error in initiation or bad scheme.
1484
_EN->_update_event_status = UPDATE_EVENT_FAILED;
1487
case USM_PROCESS_URL_COMPLETION:
1489
///////////////////////////////////
1490
// Await URL update completion
1491
///////////////////////////////////
1493
_EN->_update_event_status = event;
1494
(*scheme_post_dispatch_table[_EN->_scheme_index].func) (this);
1499
/////////////////////////////////////////////
1500
// Operation complete
1501
/////////////////////////////////////////////
1502
if ((_return_status == UPDATE_EVENT_FAILED)
1503
&& (_retries < _CP->RetryCount())) {
1508
_state = USM_PROCESS_URL;
1509
eventProcessor.schedule_in(this, HRTIME_SECONDS(_CP->RetryInterval()), ET_CACHE);
1513
MUTEX_TRY_LOCK(lock, _US->mutex, this_ethread());
1515
_US->handleEvent(EVENT_IMMEDIATE, (void *) _EN);
1520
// Missed lock, try again later
1521
eventProcessor.schedule_in(this, HRTIME_MSECONDS(10), ET_CACHE);
1532
struct dispatch_entry scheme_dispatch_table[UpdateSM::N_SCHEMES] = {
1533
{&URL_SCHEME_HTTP, UpdateSM::http_scheme},
1534
{&URL_SCHEME_RTSP, UpdateSM::rtsp_scheme}
1537
struct dispatch_entry scheme_post_dispatch_table[UpdateSM::N_SCHEMES] = {
1538
{&URL_SCHEME_HTTP, UpdateSM::http_scheme_postproc},
1539
{&URL_SCHEME_RTSP, UpdateSM::rtsp_scheme_postproc}
1543
UpdateSM::http_scheme(UpdateSM * sm)
1545
if (sm->_EN->_max_depth > 0) {
1546
////////////////////////////////////
1548
////////////////////////////////////
1549
Debug("update", "Start recursive HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
1550
sm->_EN->_indirect_list = NEW(new UpdateConfigList);
1551
RecursiveHttpGet *RHttpGet = NEW(new RecursiveHttpGet);
1553
RHttpGet->Init(sm, sm->_EN->_url, sm->_EN->_request_headers,
1554
&sm->_EN->_URLhandle, sm->_EN->_http_hdr,
1555
sm->_EN->_max_depth, sm->_EN->_indirect_list, &update_allowable_html_tags[0]);
1557
////////////////////////////////////
1559
////////////////////////////////////
1560
Debug("update", "Start HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
1561
HttpUpdateSM *current_reader;
1563
current_reader = HttpUpdateSM::allocate();
1564
current_reader->init();
1565
// TODO: Do anything with the returned Action* ?
1566
current_reader->start_scheduled_update(sm, sm->_EN->_http_hdr);
1572
UpdateSM::http_scheme_postproc(UpdateSM * sm)
1574
// Map HttpUpdateSM return event code to internal status code
1576
switch (sm->_EN->_update_event_status) {
1577
case UPDATE_EVENT_SUCCESS:
1578
case UPDATE_EVENT_FAILED:
1579
// Returned only by RecursiveHttpGet
1580
sm->_return_status = sm->_EN->_update_event_status;
1583
case HTTP_SCH_UPDATE_EVENT_WRITTEN:
1584
case HTTP_SCH_UPDATE_EVENT_UPDATED:
1585
case HTTP_SCH_UPDATE_EVENT_DELETED:
1586
case HTTP_SCH_UPDATE_EVENT_NOT_CACHED:
1587
case HTTP_SCH_UPDATE_EVENT_NO_ACTION:
1588
sm->_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
1589
sm->_return_status = UPDATE_EVENT_SUCCESS;
1592
case HTTP_SCH_UPDATE_EVENT_ERROR:
1594
sm->_EN->_update_event_status = UPDATE_EVENT_FAILED;
1595
sm->_return_status = UPDATE_EVENT_FAILED;
1601
// Not used anywhere.
1604
UpdateSM::rtsp_scheme(UpdateSM * sm)
1607
int ret = INKMCOPreload(sm, INKContCreate(rtsp_progress_cont, NULL),
1609
(5 * 30720) /* MAX_PRELOAD_BANDWIDTH */ ,
1612
Debug("update", "Start RTSP GET id: %d", sm->_EN->_id);
1622
UpdateSM::rtsp_scheme_postproc(UpdateSM * sm)
1624
// Map MCO return event code to internal status code
1626
if (sm->_EN->_update_event_status == VC_EVENT_READ_COMPLETE) {
1627
sm->_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
1628
sm->_return_status = UPDATE_EVENT_SUCCESS;
1630
sm->_EN->_update_event_status = UPDATE_EVENT_FAILED;
1631
sm->_return_status = UPDATE_EVENT_FAILED;
1636
/////////////////////////////////////////////////////////////////////////////
1637
// Class RecursiveHttpGet
1638
// Generate URL list by recursively traversing non-terminal URL(s)
1639
// up to the specified depth.
1640
/////////////////////////////////////////////////////////////////////////////
1642
HtmlParser::default_zero_char = '\0';
1644
RecursiveHttpGet::RecursiveHttpGet()
1645
:Continuation(new_ProxyMutex()), _id(0), _caller_cont(0),
1646
_request_headers(0), _http_hdr(0), _recursion_depth(0), _OL(0), _group_link_head(0), _active_child_state_machines(0)
1648
SET_HANDLER((RecursiveHttpGetContHandler)
1649
& RecursiveHttpGet::RecursiveHttpGetEvent);
1652
RecursiveHttpGet::~RecursiveHttpGet()
1658
RecursiveHttpGet::Init(Continuation * cont, char *url, char *request_headers,
1659
URL * url_data, HTTPHdr * http_hdr, int recursion_depth,
1660
Ptr<UpdateConfigList> L, struct html_tag *allowed_html_tags)
1662
/////////////////////////////////////////////////////////////////////////
1663
// Note: URL and request header data pointers are assumed to be
1664
// valid during the life of this class.
1665
/////////////////////////////////////////////////////////////////////////
1666
_id = ink_atomic_increment(&global_id, 1);
1667
_caller_cont = cont;
1668
_request_headers = request_headers;
1669
_url_data = url_data;
1670
_http_hdr = http_hdr;
1671
_recursion_depth = recursion_depth;
1673
_OL = ObjectReloadContAllocator.alloc();
1674
_OL->Init(this, url, strlen(url), _request_headers, (_request_headers ? strlen(_request_headers) : 0), 1, 1);
1676
html_parser.Init(url, allowed_html_tags);
1678
Debug("update", "Start recursive read rid: %d [%s]", _id, html_parser._url);
1682
RecursiveHttpGet::RecursiveHttpGetEvent(int event, Event * d)
1684
char *url, *url_end;
1687
IOBufferReader *r = (IOBufferReader *) d;
1690
case NET_EVENT_OPEN_FAILED:
1692
Debug("update", "RecursiveHttpGetEvent connect failed id: %d [%s]", _id, html_parser._url);
1695
case VC_EVENT_ERROR:
1697
Debug("update", "RecursiveHttpGetEvent connect event error id: %d [%s]", _id, html_parser._url);
1700
case VC_EVENT_READ_READY:
1701
case VC_EVENT_READ_COMPLETE:
1704
while ((status = html_parser.ParseHtml(r, &url, &url_end))) {
1705
// Validate given URL.
1707
ue = NEW(new UpdateEntry);
1708
if (ue->ValidURL(url, url_end + 1 /* Point to null */ )) {
1713
// Complete remaining UpdateEntry initializations
1715
ue->_request_headers = xstrdup(_request_headers);
1716
ue->BuildHttpRequest();
1717
ue->Init(1); // Derived URL
1719
// Discard remote URL(s)
1721
const char *ue_host = ue->_URLhandle.host_get(&ue_host_len);
1723
const char *url_host = _url_data->host_get(&url_host_len);
1725
if (ue_host == NULL || url_host == NULL || ptr_len_casecmp(ue_host, ue_host_len, url_host, url_host_len)) {
1730
// I think we're generating the cache key just to get
1731
// a hash of the URL. Used to use Cache::generate_key
1732
// that no longer works with vary_on_user_agent
1734
// Cache::generate_key(&ue->_url_md5, &ue->_URLhandle, _http_hdr);
1735
ue->_URLhandle.MD5_get(&ue->_url_md5);
1737
if (_CL->HashAdd(ue)) {
1738
// Entry already exists
1744
// Entry is unique and has been added to hash table.
1745
// Set terminal URL status and add to current
1746
// recursion level list.
1748
ue->SetTerminalStatus(((status < 0) ? 1 : 0));
1749
Debug("update", "Recursive find rid: %d id: %d %s\n [%s]",
1750
_id, ue->_id, (ue->TerminalURL()? "T " : ""), ue->_url);
1752
if (_group_link_head) {
1753
ue->_group_link = _group_link_head;
1754
_group_link_head = ue;
1756
_group_link_head = ue;
1757
ue->_group_link = NULL;
1762
ink_release_assert(r->read_avail() == 0);
1764
if ((event == VC_EVENT_READ_COMPLETE)
1765
|| (event == VC_EVENT_EOS)) {
1772
case UPDATE_EVENT_SUCCESS:
1773
case UPDATE_EVENT_FAILED:
1775
// Child state machine completed.
1777
ink_release_assert(_active_child_state_machines > 0);
1778
_active_child_state_machines--;
1783
ink_release_assert(!"RecursiveHttpGetEvent invalid event");
1789
if (_group_link_head) {
1790
// At this point, we have a list of valid terminal
1791
// and non-terminal URL(s).
1792
// Sequentially initiate the read on the non-terminal URL(s).
1794
while (_group_link_head) {
1795
ue = _group_link_head;
1796
_group_link_head = ue->_group_link;
1798
if (!ue->TerminalURL()) {
1799
if (_recursion_depth <= 1) {
1803
Debug("update", "(R) start non-terminal HTTP GET rid: %d id: %d [%s]", _id, ue->_id, ue->_url);
1805
_active_child_state_machines++;
1806
RecursiveHttpGet *RHttpGet = NEW(new RecursiveHttpGet());
1807
RHttpGet->Init(this, ue->_url, _request_headers,
1808
_url_data, _http_hdr, (_recursion_depth - 1), _CL, &update_allowable_html_tags[0]);
1814
// All child state machines have completed, tell our parent
1815
// and delete ourself.
1817
SET_HANDLER((RecursiveHttpGetContHandler)
1818
& RecursiveHttpGet::ExitEventHandler);
1819
handleEvent(EVENT_IMMEDIATE, 0);
1824
RecursiveHttpGet::ExitEventHandler(int event, Event * e)
1828
case EVENT_IMMEDIATE:
1829
case EVENT_INTERVAL:
1831
MUTEX_TRY_LOCK(lock, _caller_cont->mutex, this_ethread());
1833
Debug("update", "Exiting recursive read rid: %d [%s]", _id, html_parser._url);
1834
_caller_cont->handleEvent(UPDATE_EVENT_SUCCESS, 0);
1838
// Lock miss, try again later.
1839
eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
1845
ink_release_assert(!"RecursiveHttpGet::ExitEventHandler invalid event");
1853
HtmlParser::ParseHtml(IOBufferReader * r, char **url, char **url_end)
1857
if ((status = ScanHtmlForURL(r, url, url_end))) {
1858
status = ConstructURL(url, url_end);
1862
return 0; // No more bytes
1868
HtmlParser::ScanHtmlForURL(IOBufferReader * r, char **url, char **url_end)
1874
switch (_scan_state) {
1880
_attr_value.clear();
1881
_attr_value_hash_char_index = -1;
1882
_attr_value_quoted = 0;
1883
_attr_matched = false;
1885
_scan_state = SCAN_START;
1891
while ((n = r->read((char *) &c, 1))) {
1893
_scan_state = FIND_TAG_START;
1899
case FIND_TAG_START:
1901
while ((n = r->read((char *) &c, 1))) {
1904
////////////////////////////////////////////////////
1905
// '< >' with >= 0 embedded spaces, ignore it.
1906
////////////////////////////////////////////////////
1907
_scan_state = SCAN_INIT;
1911
_tag(_tag.length()) = c;
1912
_scan_state = COPY_TAG;
1921
while ((n = r->read((char *) &c, 1))) {
1924
/////////////////////////////
1926
/////////////////////////////
1927
_scan_state = SCAN_INIT;
1930
} else if (c == '=') {
1931
///////////////////////////////
1932
// <tag=something>, ignore it
1933
///////////////////////////////
1934
_scan_state = SCAN_INIT;
1938
if (_tag.length() < MAX_TAG_NAME_LENGTH) {
1939
_tag(_tag.length()) = c;
1942
///////////////////////////////////
1943
// Tag name to long, ignore it
1944
///////////////////////////////////
1945
_scan_state = SCAN_INIT;
1951
_tag(_tag.length()) = 0;
1952
if (strcmp(_tag, HTML_COMMENT_TAG) == 0) {
1953
_scan_state = IGNORE_COMMENT_START;
1955
_scan_state = FIND_ATTR_START;
1962
case IGNORE_COMMENT_START:
1964
_comment_end_ptr = (char *) HTML_COMMENT_END;
1965
_scan_state = IGNORE_COMMENT;
1968
case IGNORE_COMMENT:
1970
while ((n = r->read((char *) &c, 1))) {
1972
if (c == *_comment_end_ptr) {
1974
if (!*_comment_end_ptr) {
1975
_scan_state = SCAN_INIT;
1979
_comment_end_ptr = (char *) HTML_COMMENT_END;
1985
case FIND_ATTR_START:
1987
while ((n = r->read((char *) &c, 1))) {
1990
////////////////////////////////////////////////
1991
// <tag > with >=1 embedded spaces, ignore it
1992
////////////////////////////////////////////////
1993
_scan_state = SCAN_INIT;
1996
} else if (c == '=') {
1997
//////////////////////////////////////////////////////////
1998
// <tag =something> with >=1 embedded spaces, ignore it
1999
//////////////////////////////////////////////////////////
2000
_scan_state = SCAN_INIT;
2004
_attr(_attr.length()) = c;
2005
_scan_state = COPY_ATTR;
2014
while ((n = r->read((char *) &c, 1))) {
2017
/////////////////////////////
2018
// <tag attr>, ignore it
2019
/////////////////////////////
2020
_scan_state = SCAN_INIT;
2023
} else if (c == '=') {
2024
///////////////////////////////
2025
// <tag attr=something>
2026
///////////////////////////////
2027
_attr(_attr.length()) = 0;
2028
_scan_state = FIND_ATTR_VALUE_START;
2032
if (_attr.length() < MAX_ATTR_NAME_LENGTH) {
2033
_attr(_attr.length()) = c;
2036
///////////////////////////////////
2037
// Attr name to long, ignore it
2038
///////////////////////////////////
2039
_scan_state = SCAN_INIT;
2045
_attr(_attr.length()) = 0;
2046
_scan_state = FIND_ATTR_VALUE_DELIMITER;
2052
case FIND_ATTR_VALUE_DELIMITER:
2054
while ((n = r->read((char *) &c, 1))) {
2055
if (isspace(c) || (c == '=')) {
2057
_scan_state = FIND_ATTR_VALUE_START;
2061
_scan_state = SCAN_INIT;
2067
case FIND_ATTR_VALUE_START:
2069
while ((n = r->read((char *) &c, 1))) {
2072
/////////////////////////////
2073
// <tag attr= >, ignore
2074
/////////////////////////////
2075
_scan_state = SCAN_INIT;
2078
} else if ((c == '\'') || (c == '\"')) {
2079
_attr_value_quoted = c;
2080
_scan_state = COPY_ATTR_VALUE;
2084
_attr_value_quoted = 0;
2085
_attr_value(_attr_value.length()) = c;
2086
_scan_state = COPY_ATTR_VALUE;
2093
case COPY_ATTR_VALUE:
2095
while ((n = r->read((char *) &c, 1))) {
2096
if (_attr_value_quoted) {
2097
if (c == _attr_value_quoted) {
2098
///////////////////////////////////////////
2099
// We have a complete <tag attr='value'
2100
///////////////////////////////////////////
2101
_attr_value(_attr_value.length()) = 0;
2102
_scan_state = VALIDATE_ENTRY;
2105
} else if (c == '\n') {
2106
_scan_state = TERMINATE_COPY_ATTR_VALUE;
2109
_attr_value(_attr_value.length()) = c;
2111
_attr_value_hash_char_index = _attr_value.length() - 1;
2117
///////////////////////////////////////////
2118
// We have a complete <tag attr=value
2119
///////////////////////////////////////////
2120
_attr_value(_attr_value.length()) = 0;
2121
_scan_state = VALIDATE_ENTRY;
2124
} else if (c == '>') {
2125
/////////////////////////////////////////
2126
// We have a complete <tag attr=value>
2127
/////////////////////////////////////////
2128
_attr_value(_attr_value.length()) = 0;
2129
_scan_state = VALIDATE_ENTRY_RESTART;
2133
_attr_value(_attr_value.length()) = c;
2135
_attr_value_hash_char_index = _attr_value.length() - 1;
2142
case VALIDATE_ENTRY:
2143
case VALIDATE_ENTRY_RESTART:
2145
if (_scan_state == VALIDATE_ENTRY) {
2146
_scan_state = RESUME_ATTR_VALUE_SCAN;
2148
_scan_state = SCAN_INIT;
2150
if (AllowTagAttrValue()) {
2151
if (ExtractURL(url, url_end)) {
2152
return 1; // valid URL
2155
break; // resume scan
2157
case RESUME_ATTR_VALUE_SCAN:
2160
_attr_value.clear();
2161
_attr_value_hash_char_index = -1;
2162
_attr_value_quoted = 0;
2164
_scan_state = FIND_ATTR_START;
2168
case TERMINATE_COPY_ATTR_VALUE:
2170
while ((n = r->read((char *) &c, 1))) {
2171
if (c == _attr_value_quoted) {
2172
_scan_state = RESUME_ATTR_VALUE_SCAN;
2180
ink_release_assert(!"HtmlParser::ScanHtmlForURL bad state");
2185
return 0; // No more data
2192
HtmlParser::AllowTagAttrValue()
2194
struct html_tag *p_tag = allowable_html_tags;
2195
struct html_tag *p_attr = allowable_html_attrs;
2197
if (!_tag || !_attr)
2200
while (p_tag->tag && p_tag->attr) {
2201
if (!strcasecmp(_tag, p_tag->tag)
2202
&& !strcasecmp(_attr, p_tag->attr)) {
2203
if (p_attr == NULL || p_attr->tag == NULL)
2205
else if (_attr_matched) {
2208
// attributes don't match
2212
if (p_attr && p_attr->tag && p_attr->attr && _attr_value.length() > 0) {
2213
if (!strcasecmp(_attr, p_attr->tag)
2214
&& !strcasecmp(_attr_value, p_attr->attr)) {
2215
_attr_matched = true;
2227
HtmlParser::ValidProtoScheme(char *p)
2230
for (n = 0; proto_schemes[n].tag; ++n) {
2231
if (!strncasecmp(p, proto_schemes[n].tag, proto_schemes[n].tag_len)) {
2239
HtmlParser::ValidSupportedProtoScheme(char *p)
2242
for (n = 0; supported_proto_schemes[n].tag; ++n) {
2243
if (!strncasecmp(p, supported_proto_schemes[n].tag, supported_proto_schemes[n].tag_len)) {
2251
HtmlParser::ExtractURL(char **url, char **url_end)
2255
// '#' considerations
2256
if (_attr_value_hash_char_index >= 0) {
2257
if (!_attr_value_hash_char_index) {
2261
// '#' terminates _attr_value
2262
_attr_value.set_length(_attr_value_hash_char_index + 1);
2263
_attr_value[_attr_value_hash_char_index] = 0;
2267
if (!strcasecmp(_tag, "base") && !strcasecmp(_attr, "href")) {
2268
if (_html_doc_base) {
2269
_html_doc_base.clear();
2271
for (n = 0; n < _attr_value.length(); ++n) {
2272
_html_doc_base(_html_doc_base.length()) = _attr_value[n];
2274
_html_doc_base(_html_doc_base.length()) = 0;
2277
} else if (!strcasecmp(_tag, "meta") && !strcasecmp(_attr, "content")) {
2278
/////////////////////////////////////////////////////////////////
2280
// <META HTTP-EQUIV=Refresh CONTENT="0; URL=index.html">
2281
/////////////////////////////////////////////////////////////////
2282
if (_attr_value.length()) {
2283
// Locate start of URL
2284
for (n = 0; n < _attr_value.length(); ++n) {
2285
if (!ParseRules::is_digit((unsigned char) _attr_value[n])) {
2289
if ((n < _attr_value.length()) && (((unsigned char) _attr_value[n]) == ';')) {
2291
for (; n < _attr_value.length(); ++n) {
2292
if (!isspace((unsigned char) _attr_value[n])) {
2296
if ((n < _attr_value.length()) && (!strncasecmp(&_attr_value[n], "URL=", 4))) {
2298
if ((n < _attr_value.length())
2299
&& ((_attr_value.length() - n) > 1)) {
2300
*url = &_attr_value[n];
2301
*url_end = &_attr_value[_attr_value.length() - 2];
2313
if (_attr_value.length() > 1) {
2314
*url = &_attr_value[(intptr_t)0];
2315
*url_end = &_attr_value[_attr_value.length() - 2];
2324
HtmlParser::ConstructURL(char **url, char **url_end)
2326
unsigned char *p_url = (unsigned char *) *url;
2327
unsigned char *p_url_end = (unsigned char *) *url_end;
2329
/////////////////////////////////////////////////////////////////////
2330
// Handle the <a href="[spaces]URI"> case by skipping over spaces
2331
/////////////////////////////////////////////////////////////////////
2332
while (p_url < p_url_end) {
2333
if (isspace(*p_url)) {
2340
////////////////////////////////////////////////////
2341
// Determine if we have a relative or absolute URI
2342
////////////////////////////////////////////////////
2343
int relative_URL = 0;
2344
int http_needed = 0;
2345
if (ValidProtoScheme((char *) p_url)) {
2346
if (!strncasecmp((char *) p_url, "http:", 5)
2347
&& (strncasecmp((char *) p_url, "http://", 7) != 0)) {
2349
//////////////////////////////////////////////////////////
2350
// Bad relative URI references of the form http:URL.
2351
// Skip over the "http:" part.
2352
//////////////////////////////////////////////////////////
2353
p_url += strlen("http:");
2354
if (p_url > p_url_end) {
2355
return 0; // Invalid URL
2361
// problem found with www.slashdot.com
2362
if (strncasecmp((char *) p_url, "//", 2) == 0)
2366
//////////////////////////////////////////////
2367
// Only handle supported protocol schemes
2368
//////////////////////////////////////////////
2369
if (!relative_URL && !ValidSupportedProtoScheme((char *) p_url)) {
2370
return 0; // Invalid URL
2374
////////////////////////////////////
2375
// Compute document base path
2376
////////////////////////////////////
2377
DynArray<char>*base = 0;
2378
DynArray<char>*absolute_url = 0;
2381
absolute_url = PrependString("http:", 5, (char *) p_url, (p_url_end - p_url + 2));
2382
} else if (_html_doc_base.length()) {
2383
///////////////////////////////////////////////////////////////
2384
// Document base specified via <base href="...">
2385
///////////////////////////////////////////////////////////////
2386
base = MakeURL(_url, _html_doc_base, _html_doc_base.length(), !ValidProtoScheme(_html_doc_base));
2387
absolute_url = MakeURL(*base, (char *) p_url, (p_url_end - p_url + 2), 1);
2389
absolute_url = MakeURL(_url, (char *) p_url, (p_url_end - p_url + 2), 1);
2392
_result = *absolute_url;
2393
absolute_url->detach();
2395
// fix INKqa07208; need to reclaim memory
2396
delete absolute_url;
2400
*url = &_result[(intptr_t)0];
2401
*url_end = &_result[_result.length() - 3]; // -1 (real len)
2405
*url = (char *) p_url;
2406
*url_end = (char *) p_url_end;
2409
//////////////////////////////////////////////////////////////////
2410
// Determine if we have a terminal or non-terminal URL.
2411
// URL ending with '/', .htm or .html is considered non-terminal.
2412
// Return < 0 ==> Terminal URL
2413
// Return > 0 ==> Non terminal URL
2414
//////////////////////////////////////////////////////////////////
2415
if (!strncasecmp((char *) (p_url_end - 4), ".html", 5)
2416
|| !strncasecmp((char *) (p_url_end - 3), ".htm", 4)
2417
|| !strncasecmp((char *) (p_url_end), "/", 1)) {
2418
return 1; // Non-terminal URL
2420
return -1; // Terminal URL
2425
HtmlParser::MakeURL(char *url, char *sub, int subsize, int relative_url)
2428
int skip_slashslash;
2430
DynArray<char>*result = NEW(new DynArray<char>(&default_zero_char, 128));
2435
int url_len = strlen(url);
2437
// Locate last '/' in url
2438
for (i = url_len; i && url[i] != '/'; i--);
2440
if (i && (url[i] == url[i - 1])) {
2441
// http://hostname case with no terminating '/'
2443
for (n = 0; n < url_len; ++n) {
2444
(*result) (result->length()) = url[n];
2446
(*result) (result->length()) = '/';
2449
for (n = 0; n < (i + 1); ++n) {
2450
(*result) (result->length()) = url[n];
2454
for (n = 0; n < subsize; ++n) {
2455
(*result) (result->length()) = sub[n];
2457
(*result) (result->length()) = '\0';
2462
// Locate leading '/'
2463
for (; url[i] && url[i] != '/'; i++);
2468
// Skip over '<scheme>://'
2469
skip_slashslash = ((url[i] == url[i + 1]) && (url[i + 1] == '/'));
2471
if (skip_slashslash) {
2474
} while (skip_slashslash);
2476
for (n = 0; n < (i - 1); ++n) {
2477
(*result) (result->length()) = url[n];
2480
if (url[n] != '/') {
2481
(*result) (result->length()) = url[n];
2484
for (n = 0; n < subsize; ++n) {
2485
(*result) (result->length()) = sub[n];
2487
(*result) (result->length()) = '\0';
2491
for (n = 0; n < subsize; ++n) {
2492
(*result) (result->length()) = sub[n];
2494
(*result) (result->length()) = '\0';
2500
HtmlParser::PrependString(const char *pre, int presize, char *sub, int subsize)
2504
DynArray<char>*result = NEW(new DynArray<char>(&default_zero_char, 128));
2506
for (n = 0; n < presize; ++n) {
2507
(*result) (result->length()) = pre[n];
2509
for (n = 0; n < subsize; ++n) {
2510
(*result) (result->length()) = sub[n];
2512
(*result) (result->length()) = '\0';
2517
///////////////////////////////////////////////////////////////////
2518
// Class ObjectReloadCont
2519
// Background load URL into local cache
2520
///////////////////////////////////////////////////////////////////
2521
ClassAllocator<ObjectReloadCont> ObjectReloadContAllocator("ObjectReloadCont");
2523
ObjectReloadCont::ObjectReloadCont():Continuation(0),
2524
_caller_cont(0), _request_id(0), _send_data(0),
2525
_receive_data(0), _start_event(0),
2526
_state(START), _cur_action(0), _netvc(0), _write_vio(0), _read_vio(0), _read_event_callback(0)
2528
SET_HANDLER((ObjectReloadContHandler) & ObjectReloadCont::ObjectReloadEvent);
2531
ObjectReloadCont::~ObjectReloadCont()
2536
ObjectReloadCont::Init(Continuation * cont, char *url, int url_len,
2537
char *headers, int headers_len, int http_case, int read_event_callback)
2541
mutex = new_ProxyMutex();
2542
_caller_cont = cont;
2543
_request_id = ink_atomic_increment(&global_id, 1);
2544
_read_event_callback = read_event_callback;
2546
// Setup send data buffer by prepending the HTTP GET method to the
2547
// given NULL terminated URL and terminating with HTTP version
2551
total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR;
2553
total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR;
2555
_send_data = new_MIOBuffer(buffer_size_to_index(total_len + 1)); // allow for NULL
2557
memcpy(_send_data->end(), GET_METHOD, len_GET_METHOD);
2558
memcpy(&(_send_data->end())[len_GET_METHOD], url, url_len);
2559
memcpy(&(_send_data->end())[len_GET_METHOD + url_len], HTTP_VERSION, len_HTTP_VERSION);
2562
memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION], TERMINATOR, len_TERMINATOR);
2563
memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR], headers, headers_len);
2564
memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
2565
len_HTTP_VERSION + len_TERMINATOR +
2566
headers_len], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
2568
// Add NULL for Debug URL output
2569
(_send_data->end())[len_GET_METHOD + url_len +
2570
len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR] = 0;
2572
memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
2573
len_HTTP_VERSION], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
2575
// Add NULL for Debug URL output
2576
(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR] = 0;
2578
_send_data->fill(total_len);
2581
// Unhandled case... TODO: Do we need to actually handle this?
2582
ink_debug_assert(false);
2584
handleEvent(EVENT_IMMEDIATE, (void *) NULL);
2588
ObjectReloadCont::free()
2592
free_MIOBuffer(_send_data);
2595
if (_receive_data) {
2596
free_MIOBuffer(_receive_data);
2602
ObjectReloadCont::ObjectReloadEvent(int event, void *d)
2607
// Schedule connect to localhost:<proxy port>
2608
Debug("update-reload", "Connect start id=%d", _request_id);
2609
_state = ObjectReloadCont::ATTEMPT_CONNECT;
2610
MUTEX_TRY_LOCK(lock, this->mutex, this_ethread());
2611
ink_release_assert(lock);
2612
_cur_action = netProcessor.connect_re(this, inet_addr("127.0.0.1"), local_http_server_port);
2615
case ATTEMPT_CONNECT:
2617
if (event != NET_EVENT_OPEN) {
2618
// Connect error, terminate processing
2619
Debug("update-reload", "Connect fail id=%d", _request_id);
2620
CallBackUser(event, 0);
2622
ObjectReloadContAllocator.free(this);
2625
_netvc = (class NetVConnection *) d;
2628
Debug("update-reload", "Write start id=%d [%s]", _request_id, _send_data->start());
2629
_state = ObjectReloadCont::WRITING_URL;
2630
IOBufferReader *r = _send_data->alloc_reader();
2631
_write_vio = _netvc->do_io_write(this, r->read_avail(), r);
2636
ink_release_assert(_write_vio == (VIO *) d);
2637
if (event == VC_EVENT_WRITE_READY) {
2638
_write_vio->reenable();
2640
} else if (event == VC_EVENT_WRITE_COMPLETE) {
2641
// Write successful, start read
2642
Debug("update-reload", "Read start id=%d", _request_id);
2643
_state = ObjectReloadCont::READING_DATA;
2644
_receive_data = new_MIOBuffer(max_iobuffer_size);
2645
_receive_data_reader = _receive_data->alloc_reader();
2646
_read_vio = _netvc->do_io_read(this, INT64_MAX, _receive_data);
2649
// Write error, terminate processing
2650
Debug("update-reload", "Write fail id=%d", _request_id);
2651
_netvc->do_io(VIO::CLOSE);
2652
CallBackUser(event, 0);
2654
ObjectReloadContAllocator.free(this);
2660
ink_release_assert(_read_vio == (VIO *) d);
2662
case VC_EVENT_READ_READY:
2664
if (_read_event_callback) {
2665
_caller_cont->handleEvent(event, _receive_data_reader);
2668
int64_t read_bytes = _receive_data_reader->read_avail();
2669
_receive_data_reader->consume(read_bytes);
2670
_read_vio->reenable();
2674
case VC_EVENT_READ_COMPLETE:
2677
if (_read_event_callback) {
2678
_caller_cont->handleEvent(event, _receive_data_reader);
2680
// Object injected into local cache
2681
Debug("update-reload", "Fill success id=%d", _request_id);
2686
Debug("update-reload", "Fill read fail id=%d", _request_id);
2687
CallBackUser(event, 0);
2692
_netvc->do_io(VIO::CLOSE);
2694
ObjectReloadContAllocator.free(this);
2699
ink_release_assert(!"ObjectReloadEvent invalid state");
2707
ObjectReloadCont::CallBackUser(int event, void *d)
2709
_caller_cont->handleEvent(event, d);