0、简介

1、定义

RFC定义 https://tools.ietf.org/html/rfc2616#section-3.6.1

Chunked-Body   = *chunk
last-chunk
trailer
CRLF chunk = chunk-size [ chunk-extension ] CRLF
chunk-data CRLF
chunk-size = 1*HEX
last-chunk = 1*("0") [ chunk-extension ] CRLF chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
chunk-ext-name = token
chunk-ext-val = token | quoted-string
chunk-data = chunk-size(OCTET)
trailer = *(entity-header CRLF)

2.1、Entity Header Fields

https://tools.ietf.org/html/rfc2616#section-7.1

2、解析

解码伪代码 https://tools.ietf.org/html/rfc2616#section-19.4.6

length := 0  //body总长度初始化为0
read chunk-size, chunk-extension (if any) and CRLF //读取第一行 获取 第一块 chunked 数据的大小(chunk扩展项)
while (chunk-size > 0) {
read chunk-data and CRLF //读取chunk-data, chunk-data 的长度为 chunk-size, 后面跟 \r\n 表示结束, chunk-size不包含\r\n
append chunk-data to entity-body //将chunk-data 追加到 实体body 中(解码后)
length := length + chunk-size //body总长度更新
read chunk-size and CRLF //读取下一个 chunk头 获取chunk-size
}
//退出循环说明 chunk-size 为0, 即last-chunk, last-chunk后面可能会跟有trailer
read entity-header //读取 entity-header
while (entity-header not empty) { //读到空行,即整行内容只是\r\n这两个字节
append entity-header to existing header fields
read entity-header
}
Content-Length := length
Remove "chunked" from Transfer-Encoding

运行方式 ./a.out -u http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx -t target.jpg

#define _GNU_SOURCE /* for memmem */
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h> /* chunked-encoding example URL
GET http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx
*/ void parse_paramters(int argc, char* argv[], char** url, char** target);
void parse_req_url(char* url, char** host, char** service, char** uri, char** target); /*
* host can be domain-name or ip-address
* service can be well-known service name("http"/"ftp") or port number
*/
int connect_to_server(char* host, char* service); void send_req_to_server(int fd, char* uri, char* host, char* service); void recv_res_from_server(int fd, char* store_path); int main(int argc, char* argv[])
{
int sfd; /* socket file descriptor */
char *url, *host, *service, *uri, *target = NULL; /* parse request */
parse_paramters(argc, argv, &url, &target);
parse_req_url(url, &host, &service, &uri, target ? NULL : &target);
printf("Host : [%s]\n", host);
printf("Port : [%s]\n", service);
printf("Uri : [%s]\n", uri);
printf("Target : [%s]\n", target); /* create the connection to server */
sfd = connect_to_server(host, service); /* send http req to server */
send_req_to_server(sfd, uri, host, service);
free(uri);
free(host);
free(service); /* get response from server */
recv_res_from_server(sfd, target);
free(target); /* cleanup */
shutdown(sfd, SHUT_RDWR);
close(sfd); exit(EXIT_SUCCESS);
} void parse_paramters(int argc, char* argv[], char** url, char** target)
{
int opt; if (!(url && target && (argc > ) && argv)) {
fprintf(stderr, "Usage: %s [-u url] [-t store_path]\n", argv[]);
exit(EXIT_FAILURE);
} while ((opt = getopt(argc, argv, "u:t:")) != -) {
switch (opt) {
case 'u':
*url = optarg;
break;
case 't':
*target = strdup(optarg);
break;
default: /* '?' */
fprintf(stderr, "Usage: %s [-u url] [-t store_path]\n", argv[]);
exit(EXIT_FAILURE);
}
}
} void parse_req_url(char* url, char** host, char** service, char** uri, char** target)
{
char* tmp;
char* token; /* skip scheme */
token = strstr(url, "://");
if (token) {
url = token + sizeof("://") - ;
} /* find uri */
token = strchr(url, '/');
if (NULL == token) {
*uri = strdup("/");
if (target) {
*target = strdup("index.html");
}
}
else {
*uri = strdup(token);
*token = '\0';
if (target) {
token = strrchr(*uri, '/');
if (token) {
*target = strdup(token + );
}
}
} /* find port */
token = strchr(url, ':');
if (token) {/* find port */
*token = '\0';
}
else {
*service = strdup("");
}
*host = strdup(url); } int connect_to_server(char* host, char* service)
{
int ret;
int sfd;
struct addrinfo hints;
struct addrinfo *result, *rp; memset(&hints, , sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
hints.ai_socktype = SOCK_STREAM; /* stream socket */
hints.ai_flags = AI_ADDRCONFIG; /* return the addr type same wtih the local system addr type */
hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */
ret = getaddrinfo(host, service, &hints, &result);
if (ret != ) {
fprintf(stderr, "getaddrinfo() failed: %s\n", gai_strerror(ret));
exit(EXIT_FAILURE);
} /* getaddrinfo() returns a list of address structures.
Try each address until we successfully connect(2).
If socket(2) (or connect(2)) fails, we (close the socket and) try the next address. */
for (rp = result; rp != NULL; rp = rp->ai_next) {
sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if (sfd == -)
continue; if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -)
break; /* Success */ close(sfd);
} if (rp == NULL) { /* No address succeeded */
fprintf(stderr, "Could not connect to server %s:%s\n", host, service);
exit(EXIT_FAILURE);
} /* No longer needed */
freeaddrinfo(result); return sfd;
} void send_req_to_server(int fd, char* uri, char* host, char* service)
{
int sn_len;
char* send_buf;
size_t buf_len;
char req_header[] = "GET %s HTTP/1.1\r\n"
"Host: %s\r\n"
"Connection: close\r\n\r\n"; buf_len = sizeof(req_header) + strlen(uri) + strlen(host) + ;
send_buf = (char*)calloc(buf_len, );
sn_len = snprintf(send_buf, buf_len, req_header, uri, host); if (send(fd, send_buf, sn_len, )== -) {
perror("send() failed");
exit(EXIT_FAILURE);
} /* print req header */
printf("\n%s", send_buf); free(send_buf);
} typedef int (*DATA_PROC_CB)(void* data, ssize_t len); typedef enum _srv_header_state {
HEADER_STATE_LINE_START = ,
HEADER_STATE_LINE_DATA,
HEADER_STATE_LINE_END, /* meet '\r' */
HEADER_STATE_LINE_DONE, /* meet '\n' */
HEADER_STATE_HEAD_END, /* meet '\r' */
HEADER_STATE_HEAD_DONE /* meet '\r' */
} srv_header_state_t; typedef enum _srv_body_state {
BODY_STATE_CHUNK_LINE_START = ,
BODY_STATE_CHUNK_LINE_DATA, /* the first chunk-body line chunk-size [ chunk-extension ] CRLF */
BODY_STATE_CHUNK_LINE_END, /* meet '\r' */
BODY_STATE_CHUNK_LINE_DONE, /* meet '\n' */
BODY_STATE_CHUNK_DATA_START,
BODY_STATE_CHUNK_DATA_END,
BODY_STATE_CHUNK_DATA_DONE
} srv_body_state_t; typedef struct _srv_res {
/* recv buf */
unsigned char* buf_ptr;
unsigned char* buf_start;
size_t buf_len; /* total lenght */
size_t buf_remain; /* unused lenght */ /* buf proc */
DATA_PROC_CB data_proc;
DATA_PROC_CB res_header_proc;
srv_header_state_t header_state;
unsigned char* header_line_start; DATA_PROC_CB res_body_proc;
srv_body_state_t body_state;
unsigned char* body_chunk_start; FILE* store_file;
/* body */
int is_chunked_encoding;
unsigned long chunked_size;
unsigned long content_length;
} srv_res_t; #define CHUNKED_ENCODING "Transfer-Encoding: chunked"
#define CONTETN_LENGTH "Content-Length: " int proc_res_header(void* data, ssize_t len)
{
int i;
int field_len;
unsigned char ch;
unsigned char* content_length;
srv_res_t* res = (srv_res_t*)data; for (i = ; i < len && res->header_state != HEADER_STATE_HEAD_DONE; i++) {
ch = res->buf_ptr[i];
switch (res->header_state) {
case HEADER_STATE_LINE_START:
res->header_state = HEADER_STATE_LINE_DATA;
res->header_line_start = res->buf_ptr + i;
break;
case HEADER_STATE_LINE_DATA:
if (ch == '\r') {
res->header_state = HEADER_STATE_LINE_END;
}
break;
case HEADER_STATE_LINE_END:
if (ch == '\n') {
res->header_state = HEADER_STATE_LINE_DONE;
field_len = res->buf_ptr + i - res->header_line_start - ;
/* search Transfer-Encoding */
if (!res->is_chunked_encoding && (field_len == sizeof(CHUNKED_ENCODING)-) && \
!memcmp(res->header_line_start, CHUNKED_ENCODING, sizeof(CHUNKED_ENCODING)-)) {
res->is_chunked_encoding = ;
}
/* search Content-Length */
if (!res->content_length) {
content_length = memmem(res->header_line_start, field_len, CONTETN_LENGTH, sizeof(CONTETN_LENGTH)-);
if (content_length) {
res->content_length = strtoul(content_length + sizeof(CONTETN_LENGTH) - , NULL, );
}
} /* print header line */
fprintf(stdout, "\033[45m"); /* color start */
fwrite(res->header_line_start, , field_len, stdout);
fprintf(stdout, "\033[0m\n"); /* color end */
}
else {
fprintf(stderr, "invalid header found\n");
exit(EXIT_FAILURE);
}
break;
case HEADER_STATE_LINE_DONE:
if (ch == '\r') {
res->header_state = HEADER_STATE_HEAD_END;
}
else {
res->header_state = HEADER_STATE_LINE_DATA;
res->header_line_start = res->buf_ptr + i;
}
break;
case HEADER_STATE_HEAD_END:
if (ch == '\n') {
res->header_state = HEADER_STATE_HEAD_DONE;
fprintf(stdout, "\n\033[31m=== parse header done, chunked[%d] content-length[%lu] === \033[0m\n\n", \
res->is_chunked_encoding, res->content_length);
}
default:
break;
}
} if (res->header_state == HEADER_STATE_HEAD_DONE) {
res->data_proc = res->res_body_proc;
if ((i + ) < len) {/* found body data */
res->buf_ptr += i;
res->buf_remain -= len;
return res->data_proc(res, len - i);
}
}
else { /* header not finish */
res->buf_remain -= len;
if (res->buf_remain <= ) {
fprintf(stderr, "large header found\n");
exit(EXIT_FAILURE);
}
res->buf_ptr += len;
} return ;
} int proc_res_body(void* data, ssize_t len)
{
int i;
int data_left;
unsigned char ch;
srv_res_t* res = (srv_res_t*)data; /* not chunked encoding */
if (!res->is_chunked_encoding) {
fwrite(res->buf_ptr, , len, res->store_file);
res->content_length -= len;
if (res->content_length == ) {/* get all body data */
return ;
}
return ;
} /* parse chunked-encoding */
for (i = ; i < len; i++) {
ch = res->buf_ptr[i];
switch (res->body_state) {
case BODY_STATE_CHUNK_LINE_START:
res->body_chunk_start = res->buf_ptr + i;
res->body_state = BODY_STATE_CHUNK_LINE_DATA;
break;
case BODY_STATE_CHUNK_LINE_DATA:
if (ch == '\r') {
res->body_state = BODY_STATE_CHUNK_LINE_DONE;
}
break;
case BODY_STATE_CHUNK_LINE_DONE:
if (ch == '\n') {/* ignore chunk-extension */
res->body_state = BODY_STATE_CHUNK_DATA_START;
res->chunked_size = strtoul(res->body_chunk_start, NULL, );
if ( == res->chunked_size) { /* last chunk */
fprintf(stdout, "=== last-chunk found(total body size = %lu) ===\n", res->content_length);
/* ignore trailer */
return ;
}
res->content_length += res->chunked_size;
fprintf(stdout, "=== chunk-size %lu ===\n", res->chunked_size);
}
else {
fprintf(stderr, "invalid chunk-body line found\n");
exit(EXIT_FAILURE);
}
break;
case BODY_STATE_CHUNK_DATA_START:
data_left = len - i;
if (data_left < res->chunked_size) {
fwrite(res->buf_ptr + i, , data_left, res->store_file);
res->chunked_size -= data_left;
i = len; /* end loop */
/* reset buf */
res->buf_ptr = res->buf_start;
res->buf_remain = res->buf_len;
}
else {
fwrite(res->buf_ptr + i, , res->chunked_size, res->store_file);
i += res->chunked_size - ;
res->chunked_size = ;
res->body_state = BODY_STATE_CHUNK_DATA_END;
}
break;
case BODY_STATE_CHUNK_DATA_END:
if (ch == '\r') {
res->body_state = BODY_STATE_CHUNK_DATA_DONE;
}
else {
fprintf(stderr, "invalid chunk-body data found\n");
exit(EXIT_FAILURE);
}
break;
case BODY_STATE_CHUNK_DATA_DONE:
if (ch == '\n') {
res->body_state = BODY_STATE_CHUNK_LINE_START;
}
else {
fprintf(stderr, "invalid chunk-body data found\n");
exit(EXIT_FAILURE);
}
break;
}
} /* dont break the size line */
if ((res->body_state == BODY_STATE_CHUNK_LINE_DATA) || \
(res->body_state == BODY_STATE_CHUNK_LINE_DONE)) {
res->buf_ptr += len;
res->buf_remain -= len;
}
} void recv_res_from_server(int fd, char* store_path)
{
ssize_t ret = ;
srv_res_t response; memset(&response, 0x00, sizeof(response)); response.store_file = fopen(store_path, "wb");
if (NULL == response.store_file) {
perror("fopen() failed");
exit(EXIT_SUCCESS);
} response.buf_len = 0x1000; /* 4k */
response.buf_ptr = calloc(, 0x1000); /* alloc 4k memory */
response.buf_remain = response.buf_len;
response.buf_start = response.buf_ptr;
response.res_header_proc = proc_res_header;
response.res_body_proc = proc_res_body;
response.data_proc = response.res_header_proc; while (ret > ) {
ret = recv(fd, response.buf_ptr, response.buf_remain, );
if (ret > ) {
if (response.data_proc(&response, ret) == ) { /* get all response */
break;
}
}
} if (ret == ) {
printf("server shutdown the connection\n");
}
else if (ret < ) {
perror("recv() failed");
exit(EXIT_FAILURE);
} /* cleanup */
fclose(response.store_file);
free(response.buf_start); }
05-11 02:36