Skip to content

Commit

Permalink
http_parser upgrade (#272)
Browse files Browse the repository at this point in the history
  • Loading branch information
saiHemak authored and ianpartridge committed Jul 4, 2018
1 parent bee43a0 commit f09bc9b
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 34 deletions.
118 changes: 85 additions & 33 deletions Sources/CHTTPParser/http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <assert.h>
#include <stddef.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

Expand Down Expand Up @@ -53,13 +52,15 @@

#define SET_ERRNO(e) \
do { \
parser->nread = nread; \
parser->http_errno = (e); \
} while(0)

#define CURRENT_STATE() p_state
#define UPDATE_STATE(V) p_state = (enum state) (V);
#define RETURN(V) \
do { \
parser->nread = nread; \
parser->state = CURRENT_STATE(); \
return (V); \
} while (0);
Expand Down Expand Up @@ -153,8 +154,8 @@ do { \
*/
#define COUNT_HEADER_SIZE(V) \
do { \
parser->nread += (V); \
if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
nread += (V); \
if (UNLIKELY(nread > (HTTP_MAX_HEADER_SIZE))) { \
SET_ERRNO(HPE_HEADER_OVERFLOW); \
goto error; \
} \
Expand Down Expand Up @@ -196,7 +197,7 @@ static const char tokens[256] = {
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
0, 0, 0, 0, 0, 0, 0, 0,
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
0, '!', 0, '#', '$', '%', '&', '\'',
' ', '!', 0, '#', '$', '%', '&', '\'',
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
0, 0, '*', '+', 0, '-', '.', 0,
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
Expand Down Expand Up @@ -374,6 +375,8 @@ enum header_states

, h_connection
, h_content_length
, h_content_length_num
, h_content_length_ws
, h_transfer_encoding
, h_upgrade

Expand Down Expand Up @@ -421,14 +424,14 @@ enum http_host_state
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
(c) == '$' || (c) == ',')

#define STRICT_TOKEN(c) (tokens[(unsigned char)c])
#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])

#if HTTP_PARSER_STRICT
#define TOKEN(c) (tokens[(unsigned char)c])
#define TOKEN(c) STRICT_TOKEN(c)
#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
#else
#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
#define TOKEN(c) tokens[(unsigned char)c]
#define IS_URL_CHAR(c) \
(BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
#define IS_HOST_CHAR(c) \
Expand Down Expand Up @@ -542,7 +545,7 @@ parse_url_char(enum state s, const char ch)
return s_dead;
}

/* FALLTHROUGH */
/* fall through */
case s_req_server_start:
case s_req_server:
if (ch == '/') {
Expand Down Expand Up @@ -646,6 +649,7 @@ size_t http_parser_execute (http_parser *parser,
const char *status_mark = 0;
enum state p_state = (enum state) parser->state;
const unsigned int lenient = parser->lenient_http_headers;
uint32_t nread = parser->nread;

/* We're in an error state. Don't bother doing anything. */
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
Expand Down Expand Up @@ -980,7 +984,7 @@ size_t http_parser_execute (http_parser *parser,
/* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
break;
case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
case 'T': parser->method = HTTP_TRACE; break;
case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
default:
Expand Down Expand Up @@ -1023,6 +1027,7 @@ size_t http_parser_execute (http_parser *parser,
XX(MKCOL, 2, 'A', MKACTIVITY)
XX(MKCOL, 3, 'A', MKCALENDAR)
XX(SUBSCRIBE, 1, 'E', SEARCH)
XX(SUBSCRIBE, 1, '0', SOURCE)
XX(REPORT, 2, 'B', REBIND)
XX(POST, 1, 'R', PROPFIND)
XX(PROPFIND, 4, 'P', PROPPATCH)
Expand Down Expand Up @@ -1306,8 +1311,14 @@ size_t http_parser_execute (http_parser *parser,
break;

switch (parser->header_state) {
case h_general:
case h_general: {
size_t limit = data + len - p;
limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
while (p+1 < data + limit && TOKEN(p[1])) {
p++;
}
break;
}

case h_C:
parser->index++;
Expand Down Expand Up @@ -1406,14 +1417,14 @@ size_t http_parser_execute (http_parser *parser,
break;
}
}

COUNT_HEADER_SIZE(p - start);

if (p == data + len) {
--p;
COUNT_HEADER_SIZE(p - start);
break;
}

COUNT_HEADER_SIZE(p - start);

if (ch == ':') {
UPDATE_STATE(s_header_value_discard_ws);
CALLBACK_DATA(header_field);
Expand All @@ -1437,7 +1448,7 @@ size_t http_parser_execute (http_parser *parser,
break;
}

/* FALLTHROUGH */
/* fall through */

case s_header_value_start:
{
Expand Down Expand Up @@ -1473,9 +1484,9 @@ size_t http_parser_execute (http_parser *parser,
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}

parser->flags |= F_CONTENTLENGTH;
parser->content_length = ch - '0';
parser->header_state = h_content_length_num;
break;

case h_connection:
Expand Down Expand Up @@ -1563,10 +1574,18 @@ size_t http_parser_execute (http_parser *parser,
break;

case h_content_length:
if (ch == ' ') break;
h_state = h_content_length_num;
/* fall through */

case h_content_length_num:
{
uint64_t t;

if (ch == ' ') break;
if (ch == ' ') {
h_state = h_content_length_ws;
break;
}

if (UNLIKELY(!IS_NUM(ch))) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
Expand All @@ -1588,6 +1607,11 @@ size_t http_parser_execute (http_parser *parser,
parser->content_length = t;
break;
}
case h_content_length_ws:
if (ch == ' ') break;
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
parser->header_state = h_state;
goto error;

/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_chunked:
Expand Down Expand Up @@ -1687,11 +1711,10 @@ size_t http_parser_execute (http_parser *parser,
}
parser->header_state = h_state;

COUNT_HEADER_SIZE(p - start);

if (p == data + len)
--p;
break;
COUNT_HEADER_SIZE(p - start);
break;
}

case s_header_almost_done:
Expand Down Expand Up @@ -1839,6 +1862,7 @@ size_t http_parser_execute (http_parser *parser,
STRICT_CHECK(ch != LF);

parser->nread = 0;
nread = 0;

hasBody = parser->flags & F_CHUNKED ||
(parser->content_length > 0 && parser->content_length != ULLONG_MAX);
Expand Down Expand Up @@ -1933,7 +1957,7 @@ size_t http_parser_execute (http_parser *parser,

case s_chunk_size_start:
{
assert(parser->nread == 1);
assert(nread == 1);
assert(parser->flags & F_CHUNKED);

unhex_val = unhex[(unsigned char)ch];
Expand All @@ -1952,7 +1976,6 @@ size_t http_parser_execute (http_parser *parser,
uint64_t t;

assert(parser->flags & F_CHUNKED);

if (ch == CR) {
UPDATE_STATE(s_chunk_size_almost_done);
break;
Expand Down Expand Up @@ -1999,8 +2022,8 @@ size_t http_parser_execute (http_parser *parser,
{
assert(parser->flags & F_CHUNKED);
STRICT_CHECK(ch != LF);

parser->nread = 0;
nread = 0;

if (parser->content_length == 0) {
parser->flags |= F_TRAILING;
Expand Down Expand Up @@ -2047,6 +2070,7 @@ size_t http_parser_execute (http_parser *parser,
assert(parser->flags & F_CHUNKED);
STRICT_CHECK(ch != LF);
parser->nread = 0;
nread = 0;
UPDATE_STATE(s_chunk_size_start);
CALLBACK_NOTIFY(chunk_complete);
break;
Expand Down Expand Up @@ -2140,6 +2164,16 @@ http_method_str (enum http_method m)
return ELEM_AT(method_strings, m, "<unknown>");
}

const char *
http_status_str (enum http_status s)
{
switch (s) {
#define XX(num, name, string) case HTTP_STATUS_##name: return #string;
HTTP_STATUS_MAP(XX)
#undef XX
default: return "<unknown>";
}
}

void
http_parser_init (http_parser *parser, enum http_parser_type t)
Expand Down Expand Up @@ -2200,7 +2234,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
return s_http_host;
}

/* FALLTHROUGH */
/* fall through */
case s_http_host_v6_end:
if (ch == ':') {
return s_http_host_port_start;
Expand All @@ -2213,7 +2247,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
return s_http_host_v6_end;
}

/* FALLTHROUGH */
/* fall through */
case s_http_host_v6_start:
if (IS_HEX(ch) || ch == ':' || ch == '.') {
return s_http_host_v6;
Expand All @@ -2229,7 +2263,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
return s_http_host_v6_end;
}

/* FALLTHROUGH */
/* fall through */
case s_http_host_v6_zone_start:
/* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
Expand Down Expand Up @@ -2347,6 +2381,9 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
const char *p;
enum http_parser_url_fields uf, old_uf;
int found_at = 0;
if (buflen == 0) {
return 1;
}

u->port = u->field_set = 0;
s = is_connect ? s_req_server_start : s_req_spaces_before_url;
Expand Down Expand Up @@ -2375,7 +2412,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
case s_req_server_with_at:
found_at = 1;

/* FALLTROUGH */
/* fall through */
case s_req_server:
uf = UF_HOST;
break;
Expand Down Expand Up @@ -2429,14 +2466,28 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
}

if (u->field_set & (1 << UF_PORT)) {
/* Don't bother with endp; we've already validated the string */
unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);

/* Ports have a max value of 2^16 */
if (v > 0xffff) {
return 1;
uint16_t off;
uint16_t len;
const char* p;
const char* end;
unsigned long v;

off = u->field_data[UF_PORT].off;
len = u->field_data[UF_PORT].len;
end = buf + off + len;

/* NOTE: The characters are already validated and are in the [0-9] range */
assert(off + len <= buflen && "Port number overflow");
v = 0;
for (p = buf + off; p < end; p++) {
v *= 10;
v += *p - '0';

/* Ports have a max value of 2^16 */
if (v > 0xffff) {
return 1;
}
}

u->port = (uint16_t) v;
}

Expand All @@ -2451,6 +2502,7 @@ http_parser_pause(http_parser *parser, int paused) {
*/
if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
} else {
assert(0 && "Attempting to pause parser in error state");
Expand Down
Loading

0 comments on commit f09bc9b

Please sign in to comment.