diff options
author | Anas Nashif <anas.nashif@intel.com> | 2012-12-07 02:53:31 -0800 |
---|---|---|
committer | Anas Nashif <anas.nashif@intel.com> | 2012-12-07 02:53:31 -0800 |
commit | cbb6286cb92020dd7ae88798ed831ed76fd2130e (patch) | |
tree | 782a01c00d5e064aa67ea3f9241a8ef1de1060c6 /http.c | |
download | links-cbb6286cb92020dd7ae88798ed831ed76fd2130e.tar.gz links-cbb6286cb92020dd7ae88798ed831ed76fd2130e.tar.bz2 links-cbb6286cb92020dd7ae88798ed831ed76fd2130e.zip |
Imported Upstream version 2.6upstream/2.6upstream
Diffstat (limited to 'http.c')
-rw-r--r-- | http.c | 1042 |
1 files changed, 1042 insertions, 0 deletions
@@ -0,0 +1,1042 @@ +/* http.c + * HTTP protocol client implementation + * (c) 2002 Mikulas Patocka + * This file is a part of the Links program, released under GPL. + */ + +#include "links.h" + +struct http_connection_info { + int bl_flags; + int http10; + int close; + off_t length; + int version; + int chunk_remaining; +}; + +/* prototypes */ +static void http_send_header(struct connection *); +static void http_get_header(struct connection *); +static void add_user_agent(unsigned char **hdr, int *l); +static void add_referer(unsigned char **hdr, int *l, unsigned char *url, unsigned char *prev_url); +static void add_accept(unsigned char **hdr, int *l); +static void add_accept_encoding(unsigned char **hdr, int *l, unsigned char *url, struct connection *c); +static void add_accept_charset(unsigned char **hdr, int *l, struct http_connection_info *info); +static void add_accept_language(unsigned char **hdr, int *l, struct http_connection_info *info); +static void add_connection(unsigned char **hdr, int *l, int http10, int proxy, int post); +static void add_if_modified(unsigned char **hdr, int *l, struct connection *c); +static void add_range(unsigned char **hdr, int *l, unsigned char *url, struct connection *c); +static void add_pragma_no_cache(unsigned char **hdr, int *l, int no_cache); +static void add_auth_string(unsigned char **hdr, int *l, unsigned char *url); +static void add_post_header(unsigned char **hdr, int *l, unsigned char **post); +static void add_extra_options(unsigned char **hdr, int *l); + + +/* Returns a string pointer with value of the item. + * The string must be destroyed after usage with mem_free. + */ +unsigned char *parse_http_header(unsigned char *head, unsigned char *item, unsigned char **ptr) +{ + unsigned char *i, *f, *g, *h; + if (!head) return NULL; + for (f = head; *f; f++) { + if (*f != 10) continue; + f++; + for (i = item; *i && *f; i++, f++) + if (upcase(*i) != upcase(*f)) goto cont; + if (!*f) break; + if (f[0] == ':') { + while (f[1] == ' ') f++; + for (g = ++f; *g >= ' '; g++) + ; + while (g > f && g[-1] == ' ') g--; + h = mem_alloc(g - f + 1); + memcpy(h, f, g - f); + h[g - f] = 0; + if (ptr) { + *ptr = f; + } + return h; + } + cont:; + f--; + } + return NULL; +} + +unsigned char *parse_header_param(unsigned char *x, unsigned char *e, int all) +{ + unsigned char u; + size_t le = strlen(e); + int lp; + unsigned char *y = x; + if (!all) { + a: + if (!(y = strchr(y, ';'))) return NULL; + } + while (*y && (*y == ';' || *y <= ' ')) y++; + if (strlen(y) < le) return NULL; + if (casecmp(y, e, le)) goto a; + y += le; + while (*y && (*y <= ' ' || *y == '=')) y++; + u = ';'; + if (*y == '\'' || *y == '"') u = *y++; + lp = 0; + while (y[lp] >= ' ' && y[lp] != u) { + lp++; + if (lp == MAXINT) overalloc(); + } + return memacpy(y, lp); +} + +int get_http_code(unsigned char *head, int *code, int *version) +{ + if (!head) return -1; + while (head[0] == ' ') head++; + if (upcase(head[0]) != 'H' || upcase(head[1]) != 'T' || upcase(head[2]) != 'T' || + upcase(head[3]) != 'P') return -1; + if (head[4] == '/' && head[5] >= '0' && head[5] <= '9' + && head[6] == '.' && head[7] >= '0' && head[7] <= '9' && head[8] <= ' ') { + if (version) *version = (head[5] - '0') * 10 + head[7] - '0'; + } else if (version) *version = 0; + for (head += 4; *head > ' '; head++) + ; + if (*head++ != ' ') return -1; + if (head[0] < '1' || head [0] > '9' || head[1] < '0' || head[1] > '9' || + head[2] < '0' || head [2] > '9') { + if (code) *code = 200; + return 0; + } + if (code) *code = (head[0]-'0')*100 + (head[1]-'0')*10 + head[2]-'0'; + return 0; +} + +static struct { + unsigned char *name; + int bugs; +} buggy_servers[] = { + { "mod_czech/3.1.0", BL_HTTP10 }, + { "Purveyor", BL_HTTP10 }, + { "Netscape-Enterprise", BL_HTTP10 | BL_NO_ACCEPT_LANGUAGE }, + { "Apache Coyote", BL_HTTP10 }, + { "lighttpd", BL_HTTP10 }, + { "FORPSI", BL_NO_RANGE }, + { "Sausalito", BL_HTTP10 }, + { NULL, 0 } +}; + +static int check_http_server_bugs(unsigned char *url, struct http_connection_info *info, unsigned char *head) +{ + unsigned char *server; + int i, bugs; + if (!http_options.allow_blacklist || info->http10) return 0; + if (!(server = parse_http_header(head, "Server", NULL))) return 0; + bugs = 0; + for (i = 0; buggy_servers[i].name; i++) if (strstr(server, buggy_servers[i].name)) bugs |= buggy_servers[i].bugs; + mem_free(server); + if (bugs && (server = get_host_name(url))) { + add_blacklist_entry(server, bugs); + mem_free(server); + return bugs & ~BL_NO_RANGE; + } + return 0; +} + +static void http_end_request(struct connection *c, int notrunc, int nokeepalive, int state) +{ + if (state == S__OK) { + if (c->cache) { + if (!notrunc) truncate_entry(c->cache, c->from, 1); + c->cache->incomplete = 0; + } + } + setcstate(c, state); + if (c->info && !((struct http_connection_info *)c->info)->close +#ifdef HAVE_SSL + && !c->ssl /* We won't keep alive ssl connections */ +#endif + && !nokeepalive + && (!http_options.bug_post_no_keepalive || !strchr(c->url, POST_CHAR))) { + add_keepalive_socket(c, HTTP_KEEPALIVE_TIMEOUT); + } else { + abort_connection(c); + } +} + +void http_func(struct connection *c) +{ + /*setcstate(c, S_CONN);*/ + /*set_timeout(c);*/ + if (get_keepalive_socket(c)) { + int p; + if ((p = get_port(c->url)) == -1) { + setcstate(c, S_INTERNAL); + abort_connection(c); + return; + } + make_connection(c, p, &c->sock1, http_send_header); + } else http_send_header(c); +} + +void proxy_func(struct connection *c) +{ + http_func(c); +} + +static void add_url_to_str(unsigned char **str, int *l, unsigned char *url) +{ + unsigned char *sp; + for (sp = url; *sp && *sp != POST_CHAR; sp++) { + if (*sp <= ' ') { + unsigned char esc[4]; + sprintf(esc, "%%%02X", (int)*sp); + add_to_str(str, l, esc); + } else { + add_chr_to_str(str, l, *sp); + } + } +} + +static void http_send_header(struct connection *c) +{ + struct http_connection_info *info; + int http10 = http_options.http10; + unsigned char *hdr; + unsigned char *h, *u; + unsigned char *u2; + int l = 0; + unsigned char *post; + unsigned char *host; + + if (!c->cache) { + if (!find_in_cache(c->url, &c->cache)) + c->cache->refcount--; + } + + host = upcase(c->url[0]) != 'P' ? c->url : get_url_data(c->url); + set_timeout(c); + info = mem_calloc(sizeof(struct http_connection_info)); + c->info = info; + if ((h = get_host_name(host))) { + info->bl_flags = get_blacklist_flags(h); + mem_free(h); + } + if (info->bl_flags & BL_HTTP10) http10 = 1; + info->http10 = http10; + post = strchr(host, POST_CHAR); + if (post) post++; + hdr = init_str(); + if (!post) add_to_str(&hdr, &l, "GET "); + else { + add_to_str(&hdr, &l, "POST "); + c->unrestartable = 2; + } + if (upcase(c->url[0]) != 'P') add_to_str(&hdr, &l, "/"); + if (!(u = get_url_data(c->url))) { + http_bad_url: + mem_free(hdr); + http_end_request(c, 0, 1, S_BAD_URL); + return; + } + if (post && post < u) { + goto http_bad_url; + } + u2 = u; + if (upcase(c->url[0]) == 'P' && !*c->socks_proxy && *proxies.dns_append) { + unsigned char *u_host; + int u_host_len; + int u2_len = 0; + if (parse_url(u, NULL, NULL, NULL, NULL, NULL, &u_host, &u_host_len, NULL, NULL, NULL, NULL, NULL)) goto http_bad_url; + u2 = init_str(); + add_bytes_to_str(&u2, &u2_len, u, u_host + u_host_len - u); + add_to_str(&u2, &u2_len, proxies.dns_append); + add_to_str(&u2, &u2_len, u_host + u_host_len); + } + add_url_to_str(&hdr, &l, u2); + if (u2 != u) mem_free(u2); + if (!http10) add_to_str(&hdr, &l, " HTTP/1.1\r\n"); + else add_to_str(&hdr, &l, " HTTP/1.0\r\n"); + if ((h = get_host_name(host))) { + add_to_str(&hdr, &l, "Host: "); + add_to_str(&hdr, &l, h); + mem_free(h); + if ((h = get_port_str(host))) { + add_to_str(&hdr, &l, ":"); + add_to_str(&hdr, &l, h); + mem_free(h); + } + add_to_str(&hdr, &l, "\r\n"); + } + add_user_agent(&hdr, &l); + add_referer(&hdr, &l, host, c->prev_url); + add_accept(&hdr, &l); + add_accept_encoding(&hdr, &l, host, c); + add_accept_charset(&hdr, &l, info); + add_accept_language(&hdr, &l, info); + add_connection(&hdr, &l, http10, upcase(c->url[0]) == 'P', !!post); + add_if_modified(&hdr, &l, c); + add_range(&hdr, &l, host, c); + add_pragma_no_cache(&hdr, &l, c->no_cache); + add_auth_string(&hdr, &l, c->url); + add_post_header(&hdr, &l, &post); + add_cookies(&hdr, &l, host); + add_extra_options(&hdr, &l); + add_to_str(&hdr, &l, "\r\n"); + if (post) { + while (post[0] && post[1]) { + int h1, h2; + h1 = post[0] <= '9' ? (unsigned)post[0] - '0' : post[0] >= 'A' ? upcase(post[0]) - 'A' + 10 : 0; + if (h1 < 0 || h1 >= 16) h1 = 0; + h2 = post[1] <= '9' ? (unsigned)post[1] - '0' : post[1] >= 'A' ? upcase(post[1]) - 'A' + 10 : 0; + if (h2 < 0 || h2 >= 16) h2 = 0; + add_chr_to_str(&hdr, &l, h1 * 16 + h2); + post += 2; + } + } + write_to_socket(c, c->sock1, hdr, l, http_get_header); + mem_free(hdr); + setcstate(c, S_SENT); +} + +static void add_user_agent(unsigned char **hdr, int *l) +{ + add_to_str(hdr, l, "User-Agent: "); + if (!(*http_options.header.fake_useragent)) { + add_to_str(hdr, l, "Links (" VERSION_STRING "; "); + add_to_str(hdr, l, system_name); + add_to_str(hdr, l, "; "); + add_to_str(hdr, l, compiler_name); + add_to_str(hdr, l, "; "); + if (!F && !list_empty(terminals)) { + struct terminal *term; + unsigned char *t = "text"; + foreach(term, terminals) if (term->spec->braille) t = "braille"; + add_to_str(hdr, l, t); + } +#ifdef G + else if (F && drv) { + add_to_str(hdr, l, drv->name); + } +#endif + else { + add_to_str(hdr, l, "dump"); + } + add_to_str(hdr, l, ")\r\n"); + } else { + add_to_str(hdr, l, http_options.header.fake_useragent); + add_to_str(hdr, l, "\r\n"); + } +} + +static void add_referer(unsigned char **hdr, int *l, unsigned char *url, unsigned char *prev_url) +{ + switch (http_options.header.referer) + { + case REFERER_FAKE: + add_to_str(hdr, l, "Referer: "); + add_to_str(hdr, l, http_options.header.fake_referer); + add_to_str(hdr, l, "\r\n"); + break; + + case REFERER_SAME_URL: + add_to_str(hdr, l, "Referer: "); + add_url_to_str(hdr, l, url); + add_to_str(hdr, l, "\r\n"); + break; + + case REFERER_REAL_SAME_SERVER: + { + unsigned char *h, *j; + int brk = 1; + if ((h = get_host_name(url))) { + if ((j = get_host_name(prev_url))) { + if (!strcasecmp(h, j)) brk = 0; + mem_free(j); + } + mem_free(h); + } + if (brk) break; + /* fall through */ + } + case REFERER_REAL: + { + unsigned char *ref; + unsigned char *user, *ins; + int ulen; + if (!prev_url) break; /* no referrer */ + + ref = stracpy(prev_url); + if (!parse_url(ref, NULL, &user, &ulen, NULL, NULL, &ins, NULL, NULL, NULL, NULL, NULL, NULL) && ulen && ins) { + memmove(user, ins, strlen(ins) + 1); + } + add_to_str(hdr, l, "Referer: "); + add_url_to_str(hdr, l, ref); + add_to_str(hdr, l, "\r\n"); + mem_free(ref); + } + break; + } +} + +static void add_accept(unsigned char **hdr, int *l) +{ + add_to_str(hdr, l, "Accept: */*\r\n"); +} + +#ifdef HAVE_ANY_COMPRESSION +static int advertise_compression(unsigned char *url, struct connection *c) +{ + struct http_connection_info *info = c->info; + unsigned char *extd; + if (c->no_compress || http_options.no_compression || info->bl_flags & BL_NO_COMPRESSION) + return 0; + extd = strrchr(url, '.'); + if (extd && get_compress_by_extension(extd + 1, strchr(extd + 1, 0))) + return 0; + return 1; +} +#endif + +static void add_accept_encoding(unsigned char **hdr, int *l, unsigned char *url, struct connection *c) +{ +#if defined(HAVE_ZLIB) || defined(HAVE_BZIP2) || defined(HAVE_LZMA) +#define info ((struct http_connection_info *)c->info) + if (advertise_compression(url, c)) { + int orig_l = *l; + int l1; + add_to_str(hdr, l, "Accept-Encoding: "); + l1 = *l; +#if defined(HAVE_ZLIB) + if (*l != l1) add_chr_to_str(hdr, l, ','); + add_to_str(hdr, l, "gzip,deflate"); +#endif +#if defined(HAVE_BZIP2) + if (!(info->bl_flags & BL_NO_BZIP2)) { + if (*l != l1) add_chr_to_str(hdr, l, ','); + add_to_str(hdr, l, "bzip2"); + } +#endif +#if defined(HAVE_LZMA) + if (!(info->bl_flags & BL_NO_BZIP2)) { + if (*l != l1) add_chr_to_str(hdr, l, ','); + add_to_str(hdr, l, "lzma,lzma2"); + } +#endif + if (*l != l1) add_to_str(hdr, l, "\r\n"); + else *l = orig_l; + } +#undef info +#endif +} + +static void add_accept_charset(unsigned char **hdr, int *l, struct http_connection_info *info) +{ + static unsigned char *accept_charset = NULL; + if (!accept_charset) { + int i; + unsigned char *cs, *ac; + int aclen = 0; + ac = init_str(); + for (i = 0; (cs = get_cp_mime_name(i)); i++) { + if (aclen) add_to_str(&ac, &aclen, ","); + else add_to_str(&ac, &aclen, "Accept-Charset: "); + add_to_str(&ac, &aclen, cs); + } + if (aclen) add_to_str(&ac, &aclen, "\r\n"); + retry: + if (!(accept_charset = malloc(strlen(ac) + 1))) { + if (out_of_memory(NULL, 0)) + goto retry; + mem_free(ac); + return; + } + strcpy(accept_charset, ac); + mem_free(ac); + } + if (!(info->bl_flags & BL_NO_CHARSET) && !http_options.no_accept_charset) add_to_str(hdr, l, accept_charset); +} + +static void add_accept_language(unsigned char **hdr, int *l, struct http_connection_info *info) +{ + if (!(info->bl_flags & BL_NO_ACCEPT_LANGUAGE)) { + int la; + add_to_str(hdr, l, "Accept-Language: "); + la = *l; + add_to_str(hdr, l, _(TEXT_(T__ACCEPT_LANGUAGE), NULL)); + add_to_str(hdr, l, ","); + if (!strstr(*hdr + la, "en,") && !strstr(*hdr + la, "en;")) add_to_str(hdr, l, "en;q=0.2,"); + add_to_str(hdr, l, "*;q=0.1\r\n"); + } +} + +static void add_connection(unsigned char **hdr, int *l, int http10, int proxy, int post) +{ + if (!http10) { + if (!proxy) add_to_str(hdr, l, "Connection: "); + else add_to_str(hdr, l, "Proxy-Connection: "); + if (!post || !http_options.bug_post_no_keepalive) add_to_str(hdr, l, "keep-alive\r\n"); + else add_to_str(hdr, l, "close\r\n"); + } +} + +static void add_if_modified(unsigned char **hdr, int *l, struct connection *c) +{ + struct cache_entry *e; + if ((e = c->cache)) { + int code = 0; /* against warning */ + if (get_http_code(e->head, &code, NULL) || code >= 400) goto skip_ifmod; + if (!e->incomplete && e->head && c->no_cache <= NC_IF_MOD) { + unsigned char *m; + if (e->last_modified) m = stracpy(e->last_modified); + else if ((m = parse_http_header(e->head, "Date", NULL))) + ; + else if ((m = parse_http_header(e->head, "Expires", NULL))) + ; + else goto skip_ifmod; + add_to_str(hdr, l, "If-Modified-Since: "); + add_to_str(hdr, l, m); + add_to_str(hdr, l, "\r\n"); + mem_free(m); + } + skip_ifmod:; + } +} + +static void add_range(unsigned char **hdr, int *l, unsigned char *url, struct connection *c) +{ + struct cache_entry *e; + struct http_connection_info *info = c->info; + if ((e = c->cache)) { + int code = 0; /* against warning */ + if (!get_http_code(e->head, &code, NULL) && code >= 300) + return; + } + if (c->from /*&& (c->est_length == -1 || c->from < c->est_length)*/ && c->no_cache < NC_IF_MOD && !(info->bl_flags & BL_NO_RANGE)) { +/* If the cached entity is compressed and we turned off compression, + request the whole file */ +#ifdef HAVE_ANY_COMPRESSION + if (!advertise_compression(url, c) && e) { + unsigned char *d; + if ((d = parse_http_header(e->head, "Transfer-Encoding", NULL))) { + mem_free(d); + return; + } + } +#endif + add_to_str(hdr, l, "Range: bytes="); + add_num_to_str(hdr, l, c->from); + add_to_str(hdr, l, "-\r\n"); + } +} + +static void add_pragma_no_cache(unsigned char **hdr, int *l, int no_cache) +{ + if (no_cache >= NC_PR_NO_CACHE) add_to_str(hdr, l, "Pragma: no-cache\r\nCache-Control: no-cache\r\n"); +} + +static void add_auth_string(unsigned char **hdr, int *l, unsigned char *url) +{ + unsigned char *h; + if ((h = get_auth_string(url))) { + add_to_str(hdr, l, h); + mem_free(h); + } +} + +static void add_post_header(unsigned char **hdr, int *l, unsigned char **post) +{ + if (*post) { + unsigned char *pd = strchr(*post, '\n'); + if (pd) { + add_to_str(hdr, l, "Content-Type: "); + add_bytes_to_str(hdr, l, *post, pd - *post); + add_to_str(hdr, l, "\r\n"); + *post = pd + 1; + } + add_to_str(hdr, l, "Content-Length: "); + add_num_to_str(hdr, l, strlen(*post) / 2); + add_to_str(hdr, l, "\r\n"); + } +} + +static void add_extra_options(unsigned char **hdr, int *l) +{ + unsigned char *p = http_options.header.extra_header; + while (1) { + unsigned char *q = p + strcspn(p, "\\"); + if (p != q) { + unsigned char *c; + unsigned char *s = memacpy(p, q - p); + c = strchr(s, ':'); + if (c && casecmp(s, "Cookie:", 7)) { + unsigned char *v = NULL; /* against warning */ + unsigned char *cc = memacpy(s, c - s); + unsigned char *x = parse_http_header(*hdr, cc, &v); + mem_free(cc); + if (x) { + unsigned char *new_hdr; + int new_l; + mem_free(x); + new_hdr = init_str(); + new_l = 0; + add_bytes_to_str(&new_hdr, &new_l, *hdr, v - *hdr); + while (*++c == ' ') + ; + add_to_str(&new_hdr, &new_l, c); + add_to_str(&new_hdr, &new_l, v + strcspn(v, "\r\n")); + mem_free(*hdr); + *hdr = new_hdr; + *l = new_l; + goto already_added; + } + } + add_to_str(hdr, l, s); + add_to_str(hdr, l, "\r\n"); + already_added: + mem_free(s); + } + if (!*q) break; + p = q + 1; + } +} + +static int is_line_in_buffer(struct read_buffer *rb) +{ + int l; + for (l = 0; l < rb->len; l++) { + if (rb->data[l] == 10) return l + 1; + if (l < rb->len - 1 && rb->data[l] == 13 && rb->data[l + 1] == 10) return l + 2; + if (l == rb->len - 1 && rb->data[l] == 13) return 0; + if (rb->data[l] < ' ') return -1; + } + return 0; +} + +static void read_http_data(struct connection *c, struct read_buffer *rb) +{ + struct http_connection_info *info = c->info; + int a; + set_timeout(c); + if (rb->close == 2) { + http_end_request(c, 0, 0, S__OK); + return; + } + if (info->length != -2) { + int l = rb->len; + if (info->length >= 0 && info->length < l) l = info->length; + if ((off_t)(0UL + c->from + l) < 0) { + setcstate(c, S_LARGE_FILE); + abort_connection(c); + return; + } + c->received += l; + a = add_fragment(c->cache, c->from, rb->data, l); + if (a < 0) { + setcstate(c, a); + abort_connection(c); + return; + } + if (a == 1) c->tries = 0; + if (info->length >= 0) info->length -= l; + c->from += l; + kill_buffer_data(rb, l); + if (!info->length && !rb->close) { + http_end_request(c, 0, 0, S__OK); + return; + } + } else { + next_chunk: + if (info->chunk_remaining == -2) { + int l; + if ((l = is_line_in_buffer(rb))) { + if (l == -1) { + setcstate(c, S_HTTP_ERROR); + abort_connection(c); + return; + } + kill_buffer_data(rb, l); + if (l <= 2) { + http_end_request(c, 0, 0, S__OK); + return; + } + goto next_chunk; + } + } else if (info->chunk_remaining == -1) { + int l; + if ((l = is_line_in_buffer(rb))) { + unsigned char *de; + long n = 0; /* warning, go away */ + if (l != -1) n = strtol(rb->data, (char **)(void *)&de, 16); + if (l == -1 || n < 0 || n >= MAXINT || de == rb->data) { + setcstate(c, S_HTTP_ERROR); + abort_connection(c); + return; + } + kill_buffer_data(rb, l); + if (!(info->chunk_remaining = n)) info->chunk_remaining = -2; + goto next_chunk; + } + } else { + int l = info->chunk_remaining; + if (l > rb->len) l = rb->len; + if ((off_t)(0UL + c->from + l) < 0) { + setcstate(c, S_LARGE_FILE); + abort_connection(c); + return; + } + c->received += l; + a = add_fragment(c->cache, c->from, rb->data, l); + if (a < 0) { + setcstate(c, a); + abort_connection(c); + return; + } + if (a == 1) c->tries = 0; + info->chunk_remaining -= l; + c->from += l; + kill_buffer_data(rb, l); + if (!info->chunk_remaining && rb->len >= 1) { + if (rb->data[0] == 10) kill_buffer_data(rb, 1); + else { + if (rb->data[0] != 13 || (rb->len >= 2 && rb->data[1] != 10)) { + setcstate(c, S_HTTP_ERROR); + abort_connection(c); + return; + } + if (rb->len < 2) goto read_more; + kill_buffer_data(rb, 2); + } + info->chunk_remaining = -1; + goto next_chunk; + } + } + + } + read_more: + read_from_socket(c, c->sock1, rb, read_http_data); + setcstate(c, S_TRANS); +} + +static int get_header(struct read_buffer *rb) +{ + int i; + if (rb->len <= 0) return 0; + if (rb->data[0] != 'H') return -2; + if (rb->len <= 1) return 0; + if (rb->data[1] != 'T') return -2; + if (rb->len <= 2) return 0; + if (rb->data[2] != 'T') return -2; + if (rb->len <= 3) return 0; + if (rb->data[3] != 'P') return -2; + for (i = 0; i < rb->len; i++) { + unsigned char a = rb->data[i]; + if (/*a < ' ' && a != 10 && a != 13*/!a) return -1; + if (i < rb->len - 1 && a == 10 && rb->data[i + 1] == 10) return i + 2; + if (i < rb->len - 3 && a == 13) { + if (rb->data[i + 1] != 10) return -1; + if (rb->data[i + 2] == 13) { + if (rb->data[i + 3] != 10) return -1; + return i + 4; + } + } + } + return 0; +} + +static void http_got_header(struct connection *c, struct read_buffer *rb) +{ + off_t cf; + int state = c->state != S_PROC ? S_GETH : S_PROC; + unsigned char *head; + unsigned char *cookie, *ch; + int a, h = 0, version = 0; /* against warning */ + unsigned char *d; + struct cache_entry *e; + int previous_http_code; + struct http_connection_info *info; + unsigned char *host = upcase(c->url[0]) != 'P' ? c->url : get_url_data(c->url); + set_timeout(c); + info = c->info; + if (rb->close == 2) { + unsigned char *h; + if (!c->tries && (h = get_host_name(host))) { + if (info->bl_flags & BL_NO_CHARSET) { + del_blacklist_entry(h, BL_NO_CHARSET); + } else { + add_blacklist_entry(h, BL_NO_CHARSET); + c->tries = -1; + } + mem_free(h); + } + setcstate(c, S_CANT_READ); + retry_connection(c); + return; + } + rb->close = 0; + again: + if ((a = get_header(rb)) == -1) { + setcstate(c, S_HTTP_ERROR); + abort_connection(c); + return; + } + if (!a) { + read_from_socket(c, c->sock1, rb, http_got_header); + setcstate(c, state); + return; + } + if (a != -2) { + head = mem_alloc(a + 1); + memcpy(head, rb->data, a); head[a] = 0; + kill_buffer_data(rb, a); + } else { + head = stracpy("HTTP/0.9 200 OK\r\nContent-Type: text/html\r\n\r\n"); + } + if (get_http_code(head, &h, &version) || h == 101) { + mem_free(head); + setcstate(c, S_HTTP_ERROR); + abort_connection(c); + return; + } + if (check_http_server_bugs(host, c->info, head) && is_connection_restartable(c)) { + mem_free(head); + setcstate(c, S_RESTART); + retry_connection(c); + return; + } + ch = head; + while ((cookie = parse_http_header(ch, "Set-Cookie", &ch))) { + unsigned char *host = upcase(c->url[0]) != 'P' ? c->url : get_url_data(c->url); + set_cookie(NULL, host, cookie); + mem_free(cookie); + } + if (h == 100) { + mem_free(head); + state = S_PROC; + goto again; + } + if (h < 200) { + mem_free(head); + setcstate(c, S_HTTP_ERROR); + abort_connection(c); + return; + } + if (h == 204) { + mem_free(head); + http_end_request(c, 0, 0, S_HTTP_204); + return; + } + if (h == 304) { + mem_free(head); + http_end_request(c, 1, 0, S__OK); + return; + } + if (h == 416 && c->from) { + mem_free(head); + http_end_request(c, 0, 1, S__OK); + return; + } + if ((h == 500 || h == 502 || h == 503 || h == 504) && http_options.retry_internal_errors && is_connection_restartable(c)) { + /* !!! FIXME: wait some time ... */ + if (is_last_try(c)) { + unsigned char *h; + if ((h = get_host_name(host))) { + add_blacklist_entry(h, BL_NO_BZIP2); + mem_free(h); + } + } + mem_free(head); + setcstate(c, S_RESTART); + retry_connection(c); + return; + } + if (!c->cache) { + if (get_cache_entry(c->url, &c->cache)) { + mem_free(head); + setcstate(c, S_OUT_OF_MEM); + abort_connection(c); + return; + } + c->cache->refcount--; + } + e = c->cache; + previous_http_code = e->http_code; + e->http_code = h; + if (e->head) mem_free(e->head); + e->head = head; + if ((d = parse_http_header(head, "Expires", NULL))) { + time_t t = parse_http_date(d); + if (t && e->expire_time != 1) e->expire_time = t; + mem_free(d); + } + if ((d = parse_http_header(head, "Pragma", NULL))) { + if (!casecmp(d, "no-cache", 8)) e->expire_time = 1; + mem_free(d); + } + if ((d = parse_http_header(head, "Cache-Control", NULL))) { + unsigned char *f = d; + while (1) { + while (*f && (*f == ' ' || *f == ',')) f++; + if (!*f) break; + if (!casecmp(f, "no-cache", 8) || !casecmp(f, "must-revalidate", 15)) { + e->expire_time = 1; + } + if (!casecmp(f, "max-age=", 8)) { + if (e->expire_time != 1) e->expire_time = time(NULL) + atoi(f + 8); + } + while (*f && *f != ',') f++; + } + mem_free(d); + } +#ifdef HAVE_SSL + if (c->ssl) { + int l = 0; + if (e->ssl_info) mem_free(e->ssl_info); + e->ssl_info = init_str(); + add_num_to_str(&e->ssl_info, &l, SSL_get_cipher_bits(c->ssl, NULL)); + add_to_str(&e->ssl_info, &l, "-bit "); + add_to_str(&e->ssl_info, &l, SSL_get_cipher_version(c->ssl)); + add_to_str(&e->ssl_info, &l, " "); + add_to_str(&e->ssl_info, &l, (unsigned char *)SSL_get_cipher_name(c->ssl)); + } +#endif + if (e->redirect) mem_free(e->redirect), e->redirect = NULL; + if (h == 301 || h == 302 || h == 303 || h == 307) { + if ((h == 302 || h == 303 || h == 307) && !e->expire_time) e->expire_time = 1; + if ((d = parse_http_header(e->head, "Location", NULL))) { + unsigned char *user, *ins; + unsigned char *newuser, *newpassword; + if (!parse_url(d, NULL, &user, NULL, NULL, NULL, &ins, NULL, NULL, NULL, NULL, NULL, NULL) && !user && ins && (newuser = get_user_name(host))) { + if (*newuser) { + int ins_off = ins - d; + newpassword = get_pass(host); + if (!newpassword) newpassword = stracpy(""); + add_to_strn(&newuser, ":"); + add_to_strn(&newuser, newpassword); + add_to_strn(&newuser, "@"); + extend_str(&d, strlen(newuser)); + ins = d + ins_off; + memmove(ins + strlen(newuser), ins, strlen(ins) + 1); + memcpy(ins, newuser, strlen(newuser)); + mem_free(newpassword); + } + mem_free(newuser); + } + if (e->redirect) mem_free(e->redirect); + e->redirect = d; + e->redirect_get = h == 303; + } + } + if (!e->expire_time && strchr(c->url, POST_CHAR)) e->expire_time = 1; + info->close = 0; + info->length = -1; + info->version = version; + if ((d = parse_http_header(e->head, "Connection", NULL)) || (d = parse_http_header(e->head, "Proxy-Connection", NULL))) { + if (!strcasecmp(d, "close")) info->close = 1; + mem_free(d); + } else if (version < 11) info->close = 1; + cf = c->from; + c->from = 0; + if ((d = parse_http_header(e->head, "Content-Range", NULL))) { + if (strlen(d) > 6) { + d[5] = 0; + if (!(strcasecmp(d, "bytes")) && d[6] >= '0' && d[6] <= '9') { +#if defined(HAVE_STRTOLL) + long long f = strtoll(d + 6, NULL, 10); + if (f == MAXLLONG) f = -1; +#elif defined(HAVE_STRTOQ) + longlong f = strtoq(d + 6, NULL, 10); +#else + long f = strtol(d + 6, NULL, 10); + if (f == MAXLONG) f = -1; +#endif + if (f >= 0 && (off_t)f >= 0 && (off_t)f == f) c->from = f; + } + } + mem_free(d); + } else if (h == 206) { +/* Hmm ... some servers send 206 partial but don't sent Content-Range */ + c->from = cf; + } + if (cf && !c->from && !c->unrestartable) c->unrestartable = 1; + if (c->from > cf || c->from < 0) { + setcstate(c, S_HTTP_ERROR); + abort_connection(c); + return; + } + if ((d = parse_http_header(e->head, "Content-Length", NULL))) { + unsigned char *ep; +#if defined(HAVE_STRTOLL) + long long l = strtoll(d, (char **)(void *)&ep, 10); + if (l == MAXLLONG) l = -1; +#elif defined(HAVE_STRTOQ) + longlong l = strtoq(d, (char **)(void *)&ep, 10); +#else + long l = strtol(d, (char **)(void *)&ep, 10); + if (l == MAXLONG) l = -1; +#endif + if (!*ep && l >= 0 && (off_t)l >= 0 && (off_t)l == l) { + if (!info->close || version >= 11) info->length = l; + if (c->from + l >= 0) c->est_length = c->from + l; + } + mem_free(d); + } + if ((d = parse_http_header(e->head, "Accept-Ranges", NULL))) { + if (!strcasecmp(d, "none") && !c->unrestartable) c->unrestartable = 1; + mem_free(d); + } else { + if (!c->unrestartable && !c->from) c->unrestartable = 1; + } + if (info->bl_flags & BL_NO_RANGE && !c->unrestartable) c->unrestartable = 1; + if ((d = parse_http_header(e->head, "Transfer-Encoding", NULL))) { + if (!strcasecmp(d, "chunked")) { + info->length = -2; + info->chunk_remaining = -1; + } + mem_free(d); + } + if (!info->close && info->length == -1) info->close = 1; + if ((d = parse_http_header(e->head, "Last-Modified", NULL))) { + if (e->last_modified && strcasecmp(e->last_modified, d)) { + delete_entry_content(e); + if (c->from) { + c->from = 0; + mem_free(d); + setcstate(c, S_MODIFIED); + retry_connection(c); + return; + } + } + if (!e->last_modified) e->last_modified = d; + else mem_free(d); + } + if (!e->last_modified && (d = parse_http_header(e->head, "Date", NULL))) + e->last_modified = d; + if (info->length == -1 || (version < 11 && info->close)) rb->close = 1; + + + /* + * Truncate entry if: + * - it is compressed (the mix of an old and new document + * would likely produce decompression error). + * - it was http authentication (the user doesn't need to see the + * authentication message). + */ + if ((d = parse_http_header(e->head, "Content-Encoding", NULL))) { + mem_free(d); + truncate_entry(e, c->from, 0); + } else if (previous_http_code == 401 || previous_http_code == 407) { + truncate_entry(e, c->from, 0); + } + + read_http_data(c, rb); +} + +static void http_get_header(struct connection *c) +{ + struct read_buffer *rb; + set_timeout(c); + if (!(rb = alloc_read_buffer(c))) return; + rb->close = 1; + read_from_socket(c, c->sock1, rb, http_got_header); +} |