450
* Attempt to parse the request line.
452
* This will set the values in hmsg that it determines. One may end up
453
* with a partially-parsed buffer; the return value tells you whether
454
* the values are valid or not.
456
* \retval 1 if parsed correctly
457
* \retval 0 if more is needed
458
* \retval -1 if error
461
* * have it indicate "error" and "not enough" as two separate conditions!
462
* * audit this code as off-by-one errors are probably everywhere!
454
HttpParser::parseRequestFirstLine()
456
int second_word = -1; // track the suspected URI start
457
int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
458
int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
460
debugs(74, 5, HERE << "parsing possible request: " << buf);
462
// Single-pass parse: (provided we have the whole line anyways)
465
if (Config.onoff.relaxed_header_parser) {
466
if (Config.onoff.relaxed_header_parser < 0 && buf[req_start] == ' ')
467
debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
468
"Whitespace bytes received ahead of method. " <<
469
"Ignored due to relaxed_header_parser.");
470
// Be tolerant of prefix spaces (other bytes are valid method values)
471
for (; req_start < bufsiz && buf[req_start] == ' '; req_start++);
474
for (int i = 0; i < bufsiz; i++) {
475
// track first and last whitespace (SP only)
478
if (first_whitespace < req_start)
479
first_whitespace = i;
482
// track next non-SP/non-HT byte after first_whitespace
483
if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
487
// locate line terminator
488
if (buf[i] == '\n') {
493
if (i < bufsiz - 1 && buf[i] == '\r') {
494
if (Config.onoff.relaxed_header_parser) {
495
if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
496
debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
497
"Series of carriage-return bytes received prior to line terminator. " <<
498
"Ignored due to relaxed_header_parser.");
500
// Be tolerant of invalid multiple \r prior to terminal \n
501
if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
503
while (i < bufsiz - 1 && buf[i + 1] == '\r')
506
if (buf[i + 1] == '\n') {
511
if (buf[i + 1] == '\n') {
518
// RFC 2616 section 5.1
519
// "No CR or LF is allowed except in the final CRLF sequence"
524
debugs(74, 5, "Parser: retval 0: from " << req_start <<
525
"->" << req_end << ": needs more data to complete first line.");
529
// NP: we have now seen EOL, more-data (0) cannot occur.
530
// From here on any failure is -1, success is 1
535
// Process what we now know about the line structure into field offsets
536
// generating HTTP status for any aborts as we go.
538
// First non-whitespace = beginning of method
539
if (req_start > line_end) {
544
// First whitespace = end of method
545
if (first_whitespace > line_end || first_whitespace < req_start) {
548
m_end = first_whitespace - 1;
549
if (m_end < m_start) {
553
// First non-whitespace after first SP = beginning of URL+Version
554
if (second_word > line_end || second_word < req_start) {
557
u_start = second_word;
559
// RFC 1945: SP and version following URI are optional, marking version 0.9
560
// we identify this by the last whitespace being earlier than URI start
561
if (last_whitespace < second_word && last_whitespace >= req_start) {
567
// otherwise last whitespace is somewhere after end of URI.
568
u_end = last_whitespace;
569
// crop any trailing whitespace in the area we think of as URI
570
for (; u_end >= u_start && xisspace(buf[u_end]); u_end--);
572
if (u_end < u_start) {
576
// Last whitespace SP = before start of protocol/version
577
if (last_whitespace >= line_end) {
580
v_start = last_whitespace + 1;
583
// We only accept HTTP protocol requests right now.
584
// TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
585
if ((v_end - v_start +1) < 5 || strncasecmp(&buf[v_start], "HTTP/", 5) != 0) {
586
#if USE_HTTP_VIOLATIONS
587
// being lax; old parser accepted strange versions
588
// there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
598
int i = v_start + sizeof("HTTP/") -1;
600
/* next should be 1 or more digits */
601
if (!isdigit(buf[i])) {
605
for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; i++) {
607
maj = maj + (buf[i]) - '0';
609
// catch too-big values or missing remainders
610
if (maj >= 65536 || i > line_end) {
615
/* next should be .; we -have- to have this as we have a whole line.. */
619
// catch missing minor part
620
if (++i > line_end) {
624
/* next should be one or more digits */
625
if (!isdigit(buf[i])) {
629
for (; i <= line_end && (isdigit(buf[i])) && min < 65536; i++) {
631
min = min + (buf[i]) - '0';
633
// catch too-big values or trailing garbage
634
if (min >= 65536 || i < line_end) {
640
* Rightio - we have all the schtuff. Return true; we've got enough.
465
646
HttpParserParseReqLine(HttpParser *hmsg)
469
unsigned int maj = 0, min = 0;
470
int last_whitespace = -1, line_end = -1;
472
debugs(74, 5, "httpParserParseReqLine: parsing " << hmsg->buf);
474
648
PROF_start(HttpParserParseReqLine);
475
/* Find \r\n - end of URL+Version (and the request) */
477
for (i = 0; i < hmsg->bufsiz; i++) {
478
if (hmsg->buf[i] == '\n') {
482
if (i < hmsg->bufsiz - 1 && hmsg->buf[i] == '\r' && hmsg->buf[i + 1] == '\n') {
483
hmsg->req_end = i + 1;
487
if (hmsg->req_end == -1) {
491
assert(hmsg->buf[hmsg->req_end] == '\n');
492
/* Start at the beginning again */
495
/* Find first non-whitespace - beginning of method */
496
for (; i < hmsg->req_end && (xisspace(hmsg->buf[i])); i++);
497
if (i >= hmsg->req_end) {
504
/* Find first whitespace - end of method */
505
for (; i < hmsg->req_end && (! xisspace(hmsg->buf[i])); i++);
506
if (i >= hmsg->req_end) {
512
/* Find first non-whitespace - beginning of URL+Version */
513
for (; i < hmsg->req_end && (xisspace(hmsg->buf[i])); i++);
514
if (i >= hmsg->req_end) {
520
/* Find \r\n or \n - thats the end of the line. Keep track of the last whitespace! */
521
for (; i <= hmsg->req_end; i++) {
522
/* If \n - its end of line */
523
if (hmsg->buf[i] == '\n') {
527
/* XXX could be off-by-one wrong! */
528
if (hmsg->buf[i] == '\r' && (i + 1) <= hmsg->req_end && hmsg->buf[i+1] == '\n') {
532
/* If its a whitespace, note it as it'll delimit our version */
533
if (hmsg->buf[i] == ' ' || hmsg->buf[i] == '\t') {
537
if (i > hmsg->req_end) {
542
/* At this point we don't need the 'i' value; so we'll recycle it for version parsing */
545
* At this point: line_end points to the first eol char (\r or \n);
546
* last_whitespace points to the last whitespace char in the URL.
547
* We know we have a full buffer here!
549
if (last_whitespace == -1) {
552
hmsg->u_end = line_end - 1;
553
assert(hmsg->u_end >= hmsg->u_start);
555
/* Find the first non-whitespace after last_whitespace */
556
/* XXX why <= vs < ? I do need to really re-audit all of this ..*/
557
for (i = last_whitespace; i <= hmsg->req_end && xisspace(hmsg->buf[i]); i++);
558
if (i > hmsg->req_end) {
563
/* is it http/ ? if so, we try parsing. If not, the URL is the whole line; version is 0.9 */
564
if (i + 5 >= hmsg->req_end || (strncasecmp(&hmsg->buf[i], "HTTP/", 5) != 0)) {
567
hmsg->u_end = line_end - 1;
568
assert(hmsg->u_end >= hmsg->u_start);
570
/* Ok, lets try parsing! Yes, this needs refactoring! */
574
/* next should be 1 or more digits */
576
for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])) && maj < 65536; i++) {
578
maj = maj + (hmsg->buf[i]) - '0';
584
if (i >= hmsg->req_end) {
589
/* next should be .; we -have- to have this as we have a whole line.. */
590
if (hmsg->buf[i] != '.') {
594
if (i + 1 >= hmsg->req_end) {
599
/* next should be one or more digits */
602
for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])) && min < 65536; i++) {
604
min = min + (hmsg->buf[i]) - '0';
612
/* Find whitespace, end of version */
614
hmsg->u_end = last_whitespace - 1;
619
* Rightio - we have all the schtuff. Return true; we've got enough.
626
PROF_stop(HttpParserParseReqLine);
649
int retcode = hmsg->parseRequestFirstLine();
627
650
debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req_start <<
628
651
"->" << hmsg->req_end << ": method " << hmsg->m_start << "->" <<
629
652
hmsg->m_end << "; url " << hmsg->u_start << "->" << hmsg->u_end <<
630
"; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << maj <<
653
"; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << hmsg->v_maj <<
654
"/" << hmsg->v_min << ")");
655
PROF_stop(HttpParserParseReqLine);