From 90bd4f226e2ba03634d31c73df06bf0a90fa0231 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 11 Feb 2024 17:17:09 -0700 Subject: [PATCH] std.http: remove the ability to heap-allocate headers The buffer for HTTP headers is now always provided via a static buffer. As a consequence, OutOfMemory is no longer a member of the read() error set, and the API and implementation of Client and Server are simplified. error.HttpHeadersExceededSizeLimit is renamed to error.HttpHeadersOversize. --- lib/std/http/Client.zig | 132 ++++++++++++++++++++-------------- lib/std/http/Server.zig | 77 +++++++++----------- lib/std/http/protocol.zig | 146 ++++++++++++++++++-------------------- src/Package/Fetch.zig | 21 ++++-- src/Package/Fetch/git.zig | 15 +++- 5 files changed, 211 insertions(+), 180 deletions(-) diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig index a50e814fd4..dae83ea2d2 100644 --- a/lib/std/http/Client.zig +++ b/lib/std/http/Client.zig @@ -20,9 +20,7 @@ const proto = @import("protocol.zig"); pub const disable_tls = std.options.http_disable_tls; -/// Allocator used for all allocations made by the client. -/// -/// This allocator must be thread-safe. +/// Used for all client allocations. Must be thread-safe. allocator: Allocator, ca_bundle: if (disable_tls) void else std.crypto.Certificate.Bundle = if (disable_tls) {} else .{}, @@ -35,10 +33,12 @@ next_https_rescan_certs: bool = true, /// The pool of connections that can be reused (and currently in use). connection_pool: ConnectionPool = .{}, -/// This is the proxy that will handle http:// connections. It *must not* be modified when the client has any active connections. +/// This is the proxy that will handle http:// connections. It *must not* be +/// modified when the client has any active connections. http_proxy: ?Proxy = null, -/// This is the proxy that will handle https:// connections. It *must not* be modified when the client has any active connections. +/// This is the proxy that will handle https:// connections. It *must not* be +/// modified when the client has any active connections. https_proxy: ?Proxy = null, /// A set of linked lists of connections that can be reused. @@ -609,10 +609,6 @@ pub const Request = struct { req.headers.deinit(); req.response.headers.deinit(); - if (req.response.parser.header_bytes_owned) { - req.response.parser.header_bytes.deinit(req.client.allocator); - } - if (req.connection) |connection| { if (!req.response.parser.done) { // If the response wasn't fully read, then we need to close the connection. @@ -810,27 +806,38 @@ pub const Request = struct { return index; } - pub const WaitError = RequestError || SendError || TransferReadError || proto.HeadersParser.CheckCompleteHeadError || Response.ParseError || Uri.ParseError || error{ TooManyHttpRedirects, RedirectRequiresResend, HttpRedirectMissingLocation, CompressionInitializationFailed, CompressionNotSupported }; + pub const WaitError = RequestError || SendError || TransferReadError || + proto.HeadersParser.CheckCompleteHeadError || Response.ParseError || Uri.ParseError || + error{ // TODO: file zig fmt issue for this bad indentation + TooManyHttpRedirects, + RedirectRequiresResend, + HttpRedirectMissingLocation, + CompressionInitializationFailed, + CompressionNotSupported, + }; /// Waits for a response from the server and parses any headers that are sent. /// This function will block until the final response is received. /// - /// If `handle_redirects` is true and the request has no payload, then this function will automatically follow - /// redirects. If a request payload is present, then this function will error with error.RedirectRequiresResend. + /// If `handle_redirects` is true and the request has no payload, then this + /// function will automatically follow redirects. If a request payload is + /// present, then this function will error with + /// error.RedirectRequiresResend. /// - /// Must be called after `send` and, if any data was written to the request body, then also after `finish`. + /// Must be called after `send` and, if any data was written to the request + /// body, then also after `finish`. pub fn wait(req: *Request) WaitError!void { while (true) { // handle redirects while (true) { // read headers try req.connection.?.fill(); - const nchecked = try req.response.parser.checkCompleteHead(req.client.allocator, req.connection.?.peek()); + const nchecked = try req.response.parser.checkCompleteHead(req.connection.?.peek()); req.connection.?.drop(@intCast(nchecked)); if (req.response.parser.state.isContent()) break; } - try req.response.parse(req.response.parser.header_bytes.items, false); + try req.response.parse(req.response.parser.get(), false); if (req.response.status == .@"continue") { req.response.parser.done = true; // we're done parsing the continue response, reset to prepare for the real response @@ -891,7 +898,8 @@ pub const Request = struct { if (req.response.status.class() == .redirect and req.handle_redirects) { req.response.skip = true; - // skip the body of the redirect response, this will at least leave the connection in a known good state. + // skip the body of the redirect response, this will at least + // leave the connection in a known good state. const empty = @as([*]u8, undefined)[0..0]; assert(try req.transferRead(empty) == 0); // we're skipping, no buffer is necessary @@ -908,7 +916,10 @@ pub const Request = struct { const resolved_url = try req.uri.resolve(new_url, false, arena); // is the redirect location on the same domain, or a subdomain of the original request? - const is_same_domain_or_subdomain = std.ascii.endsWithIgnoreCase(resolved_url.host.?, req.uri.host.?) and (resolved_url.host.?.len == req.uri.host.?.len or resolved_url.host.?[resolved_url.host.?.len - req.uri.host.?.len - 1] == '.'); + const is_same_domain_or_subdomain = + std.ascii.endsWithIgnoreCase(resolved_url.host.?, req.uri.host.?) and + (resolved_url.host.?.len == req.uri.host.?.len or + resolved_url.host.?[resolved_url.host.?.len - req.uri.host.?.len - 1] == '.'); if (resolved_url.host == null or !is_same_domain_or_subdomain or !std.ascii.eqlIgnoreCase(resolved_url.scheme, req.uri.scheme)) { // we're redirecting to a different domain, strip privileged headers like cookies @@ -957,7 +968,8 @@ pub const Request = struct { } } - pub const ReadError = TransferReadError || proto.HeadersParser.CheckCompleteHeadError || error{ DecompressionFailure, InvalidTrailers }; + pub const ReadError = TransferReadError || proto.HeadersParser.CheckCompleteHeadError || + error{ DecompressionFailure, InvalidTrailers }; pub const Reader = std.io.Reader(*Request, ReadError, read); @@ -980,14 +992,16 @@ pub const Request = struct { while (!req.response.parser.state.isContent()) { // read trailing headers try req.connection.?.fill(); - const nchecked = try req.response.parser.checkCompleteHead(req.client.allocator, req.connection.?.peek()); + const nchecked = try req.response.parser.checkCompleteHead(req.connection.?.peek()); req.connection.?.drop(@intCast(nchecked)); } if (has_trail) { - // The response headers before the trailers are already guaranteed to be valid, so they will always be parsed again and cannot return an error. + // The response headers before the trailers are already + // guaranteed to be valid, so they will always be parsed again + // and cannot return an error. // This will *only* fail for a malformed trailer. - req.response.parse(req.response.parser.header_bytes.items, true) catch return error.InvalidTrailers; + req.response.parse(req.response.parser.get(), true) catch return error.InvalidTrailers; } } @@ -1362,13 +1376,11 @@ pub fn connectTunnel( .fragment = null, }; - // we can use a small buffer here because a CONNECT response should be very small var buffer: [8096]u8 = undefined; - var req = client.open(.CONNECT, uri, proxy.headers, .{ .handle_redirects = false, .connection = conn, - .header_strategy = .{ .static = &buffer }, + .server_header_buffer = &buffer, }) catch |err| { std.log.debug("err {}", .{err}); break :tunnel err; @@ -1445,7 +1457,9 @@ pub fn connect(client: *Client, host: []const u8, port: u16, protocol: Connectio return client.connectTcp(host, port, protocol); } -pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendError || std.fmt.ParseIntError || Connection.WriteError || error{ +pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendError || + std.fmt.ParseIntError || Connection.WriteError || + error{ // TODO: file a zig fmt issue for this bad indentation UnsupportedUrlScheme, UriMissingHost, @@ -1456,36 +1470,29 @@ pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendE pub const RequestOptions = struct { version: http.Version = .@"HTTP/1.1", - /// Automatically ignore 100 Continue responses. This assumes you don't care, and will have sent the body before you - /// wait for the response. + /// Automatically ignore 100 Continue responses. This assumes you don't + /// care, and will have sent the body before you wait for the response. /// - /// If this is not the case AND you know the server will send a 100 Continue, set this to false and wait for a - /// response before sending the body. If you wait AND the server does not send a 100 Continue before you finish the - /// request, then the request *will* deadlock. + /// If this is not the case AND you know the server will send a 100 + /// Continue, set this to false and wait for a response before sending the + /// body. If you wait AND the server does not send a 100 Continue before + /// you finish the request, then the request *will* deadlock. handle_continue: bool = true, - /// Automatically follow redirects. This will only follow redirects for repeatable requests (ie. with no payload or the server has acknowledged the payload) + /// Automatically follow redirects. This will only follow redirects for + /// repeatable requests (ie. with no payload or the server has acknowledged + /// the payload). handle_redirects: bool = true, /// How many redirects to follow before returning an error. max_redirects: u32 = 3, - header_strategy: StorageStrategy = .{ .dynamic = 16 * 1024 }, + /// Externally-owned memory used to store the server's entire HTTP header. + /// `error.HttpHeadersOversize` is returned from read() when a + /// client sends too many bytes of HTTP headers. + server_header_buffer: []u8, /// Must be an already acquired connection. connection: ?*Connection = null, - - pub const StorageStrategy = union(enum) { - /// In this case, the client's Allocator will be used to store the - /// entire HTTP header. This value is the maximum total size of - /// HTTP headers allowed, otherwise - /// error.HttpHeadersExceededSizeLimit is returned from read(). - dynamic: usize, - /// This is used to store the entire HTTP header. If the HTTP - /// header is too big to fit, `error.HttpHeadersExceededSizeLimit` - /// is returned from read(). When this is used, `error.OutOfMemory` - /// cannot be returned from `read()`. - static: []u8, - }; }; pub const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{ @@ -1502,7 +1509,13 @@ pub const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{ /// /// The caller is responsible for calling `deinit()` on the `Request`. /// This function is threadsafe. -pub fn open(client: *Client, method: http.Method, uri: Uri, headers: http.Headers, options: RequestOptions) RequestError!Request { +pub fn open( + client: *Client, + method: http.Method, + uri: Uri, + headers: http.Headers, + options: RequestOptions, +) RequestError!Request { const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUrlScheme; const port: u16 = uri.port orelse switch (protocol) { @@ -1541,10 +1554,7 @@ pub fn open(client: *Client, method: http.Method, uri: Uri, headers: http.Header .reason = undefined, .version = undefined, .headers = http.Headers{ .allocator = client.allocator, .owned = false }, - .parser = switch (options.header_strategy) { - .dynamic => |max| proto.HeadersParser.initDynamic(max), - .static => |buf| proto.HeadersParser.initStatic(buf), - }, + .parser = proto.HeadersParser.init(options.server_header_buffer), }, .arena = undefined, }; @@ -1568,17 +1578,30 @@ pub const FetchOptions = struct { }; pub const ResponseStrategy = union(enum) { - storage: RequestOptions.StorageStrategy, + storage: StorageStrategy, file: std.fs.File, none, }; - header_strategy: RequestOptions.StorageStrategy = .{ .dynamic = 16 * 1024 }, + pub const StorageStrategy = union(enum) { + /// In this case, the client's Allocator will be used to store the + /// entire HTTP header. This value is the maximum total size of + /// HTTP headers allowed, otherwise + /// error.HttpHeadersExceededSizeLimit is returned from read(). + dynamic: usize, + /// This is used to store the entire HTTP header. If the HTTP + /// header is too big to fit, `error.HttpHeadersExceededSizeLimit` + /// is returned from read(). When this is used, `error.OutOfMemory` + /// cannot be returned from `read()`. + static: []u8, + }; + + server_header_buffer: ?[]u8 = null, response_strategy: ResponseStrategy = .{ .storage = .{ .dynamic = 16 * 1024 * 1024 } }, location: Location, method: http.Method = .GET, - headers: http.Headers = http.Headers{ .allocator = std.heap.page_allocator, .owned = false }, + headers: http.Headers = .{ .allocator = std.heap.page_allocator, .owned = false }, payload: Payload = .none, raw_uri: bool = false, }; @@ -1613,9 +1636,10 @@ pub fn fetch(client: *Client, allocator: Allocator, options: FetchOptions) !Fetc .url => |u| try Uri.parse(u), .uri => |u| u, }; + var server_header_buffer: [16 * 1024]u8 = undefined; var req = try open(client, options.method, uri, options.headers, .{ - .header_strategy = options.header_strategy, + .server_header_buffer = options.server_header_buffer orelse &server_header_buffer, .handle_redirects = options.payload == .none, }); defer req.deinit(); diff --git a/lib/std/http/Server.zig b/lib/std/http/Server.zig index e139ea5979..8447c4e03e 100644 --- a/lib/std/http/Server.zig +++ b/lib/std/http/Server.zig @@ -1,6 +1,7 @@ //! HTTP Server implementation. //! -//! This server assumes *all* clients are well behaved and standard compliant; it can and will deadlock if a client holds a connection open without sending a request. +//! This server assumes clients are well behaved and standard compliant; it +//! deadlocks if a client holds a connection open without sending a request. //! //! Example usage: //! @@ -17,7 +18,7 @@ //! while (res.reset() != .closing) { //! res.wait() catch |err| switch (err) { //! error.HttpHeadersInvalid => break, -//! error.HttpHeadersExceededSizeLimit => { +//! error.HttpHeadersOversize => { //! res.status = .request_header_fields_too_large; //! res.send() catch break; //! break; @@ -39,6 +40,7 @@ //! } //! ``` +const builtin = @import("builtin"); const std = @import("../std.zig"); const testing = std.testing; const http = std.http; @@ -86,7 +88,7 @@ pub const Connection = struct { const nread = try conn.rawReadAtLeast(conn.read_buf[0..], 1); if (nread == 0) return error.EndOfStream; conn.read_start = 0; - conn.read_end = @as(u16, @intCast(nread)); + conn.read_end = @intCast(nread); } pub fn peek(conn: *Connection) []const u8 { @@ -382,10 +384,6 @@ pub const Response = struct { res.headers.deinit(); res.request.headers.deinit(); - - if (res.request.parser.header_bytes_owned) { - res.request.parser.header_bytes.deinit(res.allocator); - } } pub const ResetState = enum { reset, closing }; @@ -548,17 +546,24 @@ pub const Response = struct { return index; } - pub const WaitError = Connection.ReadError || proto.HeadersParser.CheckCompleteHeadError || Request.ParseError || error{ CompressionInitializationFailed, CompressionNotSupported }; + pub const WaitError = Connection.ReadError || + proto.HeadersParser.CheckCompleteHeadError || Request.ParseError || + error{ CompressionInitializationFailed, CompressionNotSupported }; /// Wait for the client to send a complete request head. /// /// For correct behavior, the following rules must be followed: /// - /// * If this returns any error in `Connection.ReadError`, you MUST immediately close the connection by calling `deinit`. - /// * If this returns `error.HttpHeadersInvalid`, you MAY immediately close the connection by calling `deinit`. - /// * If this returns `error.HttpHeadersExceededSizeLimit`, you MUST respond with a 431 status code and then call `deinit`. - /// * If this returns any error in `Request.ParseError`, you MUST respond with a 400 status code and then call `deinit`. - /// * If this returns any other error, you MUST respond with a 400 status code and then call `deinit`. + /// * If this returns any error in `Connection.ReadError`, you MUST + /// immediately close the connection by calling `deinit`. + /// * If this returns `error.HttpHeadersInvalid`, you MAY immediately close + /// the connection by calling `deinit`. + /// * If this returns `error.HttpHeadersOversize`, you MUST + /// respond with a 431 status code and then call `deinit`. + /// * If this returns any error in `Request.ParseError`, you MUST respond + /// with a 400 status code and then call `deinit`. + /// * If this returns any other error, you MUST respond with a 400 status + /// code and then call `deinit`. /// * If the request has an Expect header containing 100-continue, you MUST either: /// * Respond with a 100 status code, then call `wait` again. /// * Respond with a 417 status code. @@ -571,14 +576,14 @@ pub const Response = struct { while (true) { try res.connection.fill(); - const nchecked = try res.request.parser.checkCompleteHead(res.allocator, res.connection.peek()); - res.connection.drop(@as(u16, @intCast(nchecked))); + const nchecked = try res.request.parser.checkCompleteHead(res.connection.peek()); + res.connection.drop(@intCast(nchecked)); if (res.request.parser.state.isContent()) break; } res.request.headers = .{ .allocator = res.allocator, .owned = true }; - try res.request.parse(res.request.parser.header_bytes.items); + try res.request.parse(res.request.parser.get()); if (res.request.transfer_encoding != .none) { switch (res.request.transfer_encoding) { @@ -641,16 +646,18 @@ pub const Response = struct { while (!res.request.parser.state.isContent()) { // read trailing headers try res.connection.fill(); - const nchecked = try res.request.parser.checkCompleteHead(res.allocator, res.connection.peek()); - res.connection.drop(@as(u16, @intCast(nchecked))); + const nchecked = try res.request.parser.checkCompleteHead(res.connection.peek()); + res.connection.drop(@intCast(nchecked)); } if (has_trail) { res.request.headers = http.Headers{ .allocator = res.allocator, .owned = false }; - // The response headers before the trailers are already guaranteed to be valid, so they will always be parsed again and cannot return an error. + // The response headers before the trailers are already + // guaranteed to be valid, so they will always be parsed again + // and cannot return an error. // This will *only* fail for a malformed trailer. - res.request.parse(res.request.parser.header_bytes.items) catch return error.InvalidTrailers; + res.request.parse(res.request.parser.get()) catch return error.InvalidTrailers; } } @@ -751,29 +758,19 @@ pub fn listen(server: *Server, address: net.Address) ListenError!void { pub const AcceptError = net.StreamServer.AcceptError || Allocator.Error; -pub const HeaderStrategy = union(enum) { - /// In this case, the client's Allocator will be used to store the - /// entire HTTP header. This value is the maximum total size of - /// HTTP headers allowed, otherwise - /// error.HttpHeadersExceededSizeLimit is returned from read(). - dynamic: usize, - /// This is used to store the entire HTTP header. If the HTTP - /// header is too big to fit, `error.HttpHeadersExceededSizeLimit` - /// is returned from read(). When this is used, `error.OutOfMemory` - /// cannot be returned from `read()`. - static: []u8, -}; - pub const AcceptOptions = struct { allocator: Allocator, - header_strategy: HeaderStrategy = .{ .dynamic = 8192 }, + /// Externally-owned memory used to store the client's entire HTTP header. + /// `error.HttpHeadersOversize` is returned from read() when a + /// client sends too many bytes of HTTP headers. + client_header_buffer: []u8, }; /// Accept a new connection. pub fn accept(server: *Server, options: AcceptOptions) AcceptError!Response { const in = try server.socket.accept(); - return Response{ + return .{ .allocator = options.allocator, .address = in.address, .connection = .{ @@ -786,17 +783,12 @@ pub fn accept(server: *Server, options: AcceptOptions) AcceptError!Response { .method = undefined, .target = undefined, .headers = .{ .allocator = options.allocator, .owned = false }, - .parser = switch (options.header_strategy) { - .dynamic => |max| proto.HeadersParser.initDynamic(max), - .static => |buf| proto.HeadersParser.initStatic(buf), - }, + .parser = proto.HeadersParser.init(options.client_header_buffer), }, }; } test "HTTP server handles a chunked transfer coding request" { - const builtin = @import("builtin"); - // This test requires spawning threads. if (builtin.single_threaded) { return error.SkipZigTest; @@ -823,9 +815,10 @@ test "HTTP server handles a chunked transfer coding request" { const server_thread = try std.Thread.spawn(.{}, (struct { fn apply(s: *std.http.Server) !void { + var header_buffer: [max_header_size]u8 = undefined; var res = try s.accept(.{ .allocator = allocator, - .header_strategy = .{ .dynamic = max_header_size }, + .client_header_buffer = &header_buffer, }); defer res.deinit(); defer _ = res.reset(); diff --git a/lib/std/http/protocol.zig b/lib/std/http/protocol.zig index 0ccafd2ee5..0caa4211cd 100644 --- a/lib/std/http/protocol.zig +++ b/lib/std/http/protocol.zig @@ -34,54 +34,49 @@ pub const State = enum { pub const HeadersParser = struct { state: State = .start, - /// Whether or not `header_bytes` is allocated or was provided as a fixed buffer. - header_bytes_owned: bool, - /// Either a fixed buffer of len `max_header_bytes` or a dynamic buffer that can grow up to `max_header_bytes`. + /// A fixed buffer of len `max_header_bytes`. /// Pointers into this buffer are not stable until after a message is complete. - header_bytes: std.ArrayListUnmanaged(u8), - /// The maximum allowed size of `header_bytes`. - max_header_bytes: usize, - next_chunk_length: u64 = 0, + header_bytes_buffer: []u8, + header_bytes_len: u32, + next_chunk_length: u64, /// Whether this parser is done parsing a complete message. /// A message is only done when the entire payload has been read. - done: bool = false, - - /// Initializes the parser with a dynamically growing header buffer of up to `max` bytes. - pub fn initDynamic(max: usize) HeadersParser { - return .{ - .header_bytes = .{}, - .max_header_bytes = max, - .header_bytes_owned = true, - }; - } + done: bool, /// Initializes the parser with a provided buffer `buf`. - pub fn initStatic(buf: []u8) HeadersParser { + pub fn init(buf: []u8) HeadersParser { return .{ - .header_bytes = .{ .items = buf[0..0], .capacity = buf.len }, - .max_header_bytes = buf.len, - .header_bytes_owned = false, + .header_bytes_buffer = buf, + .header_bytes_len = 0, + .done = false, + .next_chunk_length = 0, }; } - /// Completely resets the parser to it's initial state. - /// This must be called after a message is complete. - pub fn reset(r: *HeadersParser) void { - assert(r.done); // The message must be completely read before reset, otherwise the parser is in an invalid state. - - r.header_bytes.clearRetainingCapacity(); - - r.* = .{ - .header_bytes = r.header_bytes, - .max_header_bytes = r.max_header_bytes, - .header_bytes_owned = r.header_bytes_owned, + /// Reinitialize the parser. + /// Asserts the parser is in the "done" state. + pub fn reset(hp: *HeadersParser) void { + assert(hp.done); + hp.* = .{ + .state = .start, + .header_bytes_buffer = hp.header_bytes_buffer, + .header_bytes_len = 0, + .done = false, + .next_chunk_length = 0, }; } - /// Returns the number of bytes consumed by headers. This is always less than or equal to `bytes.len`. - /// You should check `r.state.isContent()` after this to check if the headers are done. + pub fn get(hp: HeadersParser) []u8 { + return hp.header_bytes_buffer[0..hp.header_bytes_len]; + } + + /// Returns the number of bytes consumed by headers. This is always less + /// than or equal to `bytes.len`. + /// You should check `r.state.isContent()` after this to check if the + /// headers are done. /// - /// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the + /// If the amount returned is less than `bytes.len`, you may assume that + /// the parser is in a content state and the /// first byte of content is located at `bytes[result]`. pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 { const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8); @@ -410,11 +405,14 @@ pub const HeadersParser = struct { } } - /// Returns the number of bytes consumed by the chunk size. This is always less than or equal to `bytes.len`. - /// You should check `r.state == .chunk_data` after this to check if the chunk size has been fully parsed. + /// Returns the number of bytes consumed by the chunk size. This is always + /// less than or equal to `bytes.len`. + /// You should check `r.state == .chunk_data` after this to check if the + /// chunk size has been fully parsed. /// - /// If the amount returned is less than `bytes.len`, you may assume that the parser is in the `chunk_data` state - /// and that the first byte of the chunk is at `bytes[result]`. + /// If the amount returned is less than `bytes.len`, you may assume that + /// the parser is in the `chunk_data` state and that the first byte of the + /// chunk is at `bytes[result]`. pub fn findChunkedLen(r: *HeadersParser, bytes: []const u8) u32 { const len = @as(u32, @intCast(bytes.len)); @@ -488,30 +486,27 @@ pub const HeadersParser = struct { return len; } - /// Returns whether or not the parser has finished parsing a complete message. A message is only complete after the - /// entire body has been read and any trailing headers have been parsed. + /// Returns whether or not the parser has finished parsing a complete + /// message. A message is only complete after the entire body has been read + /// and any trailing headers have been parsed. pub fn isComplete(r: *HeadersParser) bool { return r.done and r.state == .finished; } - pub const CheckCompleteHeadError = mem.Allocator.Error || error{HttpHeadersExceededSizeLimit}; + pub const CheckCompleteHeadError = error{HttpHeadersOversize}; - /// Pushes `in` into the parser. Returns the number of bytes consumed by the header. Any header bytes are appended - /// to the `header_bytes` buffer. - /// - /// This function only uses `allocator` if `r.header_bytes_owned` is true, and may be undefined otherwise. - pub fn checkCompleteHead(r: *HeadersParser, allocator: std.mem.Allocator, in: []const u8) CheckCompleteHeadError!u32 { - if (r.state.isContent()) return 0; + /// Pushes `in` into the parser. Returns the number of bytes consumed by + /// the header. Any header bytes are appended to `header_bytes_buffer`. + pub fn checkCompleteHead(hp: *HeadersParser, in: []const u8) CheckCompleteHeadError!u32 { + if (hp.state.isContent()) return 0; - const i = r.findHeadersEnd(in); + const i = hp.findHeadersEnd(in); const data = in[0..i]; - if (r.header_bytes.items.len + data.len > r.max_header_bytes) { - return error.HttpHeadersExceededSizeLimit; - } else { - if (r.header_bytes_owned) try r.header_bytes.ensureUnusedCapacity(allocator, data.len); + if (hp.header_bytes_len + data.len > hp.header_bytes_buffer.len) + return error.HttpHeadersOversize; - r.header_bytes.appendSliceAssumeCapacity(data); - } + @memcpy(hp.header_bytes_buffer[hp.header_bytes_len..][0..data.len], data); + hp.header_bytes_len += @intCast(data.len); return i; } @@ -520,7 +515,8 @@ pub const HeadersParser = struct { HttpChunkInvalid, }; - /// Reads the body of the message into `buffer`. Returns the number of bytes placed in the buffer. + /// Reads the body of the message into `buffer`. Returns the number of + /// bytes placed in the buffer. /// /// If `skip` is true, the buffer will be unused and the body will be skipped. /// @@ -718,7 +714,7 @@ test "HeadersParser.findHeadersEnd" { const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\nHello"; for (0..36) |i| { - r = HeadersParser.initDynamic(0); + r = HeadersParser.init(&.{}); try std.testing.expectEqual(@as(u32, @intCast(i)), r.findHeadersEnd(data[0..i])); try std.testing.expectEqual(@as(u32, @intCast(35 - i)), r.findHeadersEnd(data[i..])); } @@ -728,7 +724,7 @@ test "HeadersParser.findChunkedLen" { var r: HeadersParser = undefined; const data = "Ff\r\nf0f000 ; ext\n0\r\nffffffffffffffffffffffffffffffffffffffff\r\n"; - r = HeadersParser.initDynamic(0); + r = HeadersParser.init(&.{}); r.state = .chunk_head_size; r.next_chunk_length = 0; @@ -761,9 +757,9 @@ test "HeadersParser.findChunkedLen" { test "HeadersParser.read length" { // mock BufferedConnection for read + var headers_buf: [256]u8 = undefined; - var r = HeadersParser.initDynamic(256); - defer r.header_bytes.deinit(std.testing.allocator); + var r = HeadersParser.init(&headers_buf); const data = "GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\nHello"; var conn: MockBufferedConnection = .{ @@ -773,8 +769,8 @@ test "HeadersParser.read length" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } @@ -786,14 +782,14 @@ test "HeadersParser.read length" { try std.testing.expectEqual(@as(usize, 5), len); try std.testing.expectEqualStrings("Hello", buf[0..len]); - try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\n", r.header_bytes.items); + try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\n", r.get()); } test "HeadersParser.read chunked" { // mock BufferedConnection for read - var r = HeadersParser.initDynamic(256); - defer r.header_bytes.deinit(std.testing.allocator); + var headers_buf: [256]u8 = undefined; + var r = HeadersParser.init(&headers_buf); const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\n\r\n"; var conn: MockBufferedConnection = .{ @@ -803,8 +799,8 @@ test "HeadersParser.read chunked" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } @@ -815,14 +811,14 @@ test "HeadersParser.read chunked" { try std.testing.expectEqual(@as(usize, 5), len); try std.testing.expectEqualStrings("Hello", buf[0..len]); - try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\n", r.header_bytes.items); + try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\n", r.get()); } test "HeadersParser.read chunked trailer" { // mock BufferedConnection for read - var r = HeadersParser.initDynamic(256); - defer r.header_bytes.deinit(std.testing.allocator); + var headers_buf: [256]u8 = undefined; + var r = HeadersParser.init(&headers_buf); const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\nContent-Type: text/plain\r\n\r\n"; var conn: MockBufferedConnection = .{ @@ -832,8 +828,8 @@ test "HeadersParser.read chunked trailer" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } @@ -847,11 +843,11 @@ test "HeadersParser.read chunked trailer" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } - try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\nContent-Type: text/plain\r\n\r\n", r.header_bytes.items); + try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\nContent-Type: text/plain\r\n\r\n", r.get()); } diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index ed3c6b099f..ae2b834a72 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -354,7 +354,8 @@ pub fn run(f: *Fetch) RunError!void { .{ path_or_url, @errorName(file_err), @errorName(uri_err) }, )); }; - var resource = try f.initResource(uri); + var server_header_buffer: [header_buffer_size]u8 = undefined; + var resource = try f.initResource(uri, &server_header_buffer); return runResource(f, uri.path, &resource, null); } }, @@ -415,7 +416,8 @@ pub fn run(f: *Fetch) RunError!void { f.location_tok, try eb.printString("invalid URI: {s}", .{@errorName(err)}), ); - var resource = try f.initResource(uri); + var server_header_buffer: [header_buffer_size]u8 = undefined; + var resource = try f.initResource(uri, &server_header_buffer); return runResource(f, uri.path, &resource, remote.hash); } @@ -876,7 +878,9 @@ const FileType = enum { } }; -fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource { +const header_buffer_size = 16 * 1024; + +fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Resource { const gpa = f.arena.child_allocator; const arena = f.arena.allocator(); const eb = &f.error_bundle; @@ -894,10 +898,12 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource { if (ascii.eqlIgnoreCase(uri.scheme, "http") or ascii.eqlIgnoreCase(uri.scheme, "https")) { - var h = std.http.Headers{ .allocator = gpa }; + var h: std.http.Headers = .{ .allocator = gpa }; defer h.deinit(); - var req = http_client.open(.GET, uri, h, .{}) catch |err| { + var req = http_client.open(.GET, uri, h, .{ + .server_header_buffer = server_header_buffer, + }) catch |err| { return f.fail(f.location_tok, try eb.printString( "unable to connect to server: {s}", .{@errorName(err)}, @@ -935,7 +941,7 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource { transport_uri.scheme = uri.scheme["git+".len..]; var redirect_uri: []u8 = undefined; var session: git.Session = .{ .transport = http_client, .uri = transport_uri }; - session.discoverCapabilities(gpa, &redirect_uri) catch |err| switch (err) { + session.discoverCapabilities(gpa, &redirect_uri, server_header_buffer) catch |err| switch (err) { error.Redirected => { defer gpa.free(redirect_uri); return f.fail(f.location_tok, try eb.printString( @@ -961,6 +967,7 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource { var ref_iterator = session.listRefs(gpa, .{ .ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag }, .include_peeled = true, + .server_header_buffer = server_header_buffer, }) catch |err| { return f.fail(f.location_tok, try eb.printString( "unable to list refs: {s}", @@ -1003,7 +1010,7 @@ fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource { _ = std.fmt.bufPrint(&want_oid_buf, "{}", .{ std.fmt.fmtSliceHexLower(&want_oid), }) catch unreachable; - var fetch_stream = session.fetch(gpa, &.{&want_oid_buf}) catch |err| { + var fetch_stream = session.fetch(gpa, &.{&want_oid_buf}, server_header_buffer) catch |err| { return f.fail(f.location_tok, try eb.printString( "unable to create fetch stream: {s}", .{@errorName(err)}, diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index ee8f1ba543..b6c47eabc7 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -494,8 +494,9 @@ pub const Session = struct { session: *Session, allocator: Allocator, redirect_uri: *[]u8, + http_headers_buffer: []u8, ) !void { - var capability_iterator = try session.getCapabilities(allocator, redirect_uri); + var capability_iterator = try session.getCapabilities(allocator, redirect_uri, http_headers_buffer); defer capability_iterator.deinit(); while (try capability_iterator.next()) |capability| { if (mem.eql(u8, capability.key, "agent")) { @@ -521,6 +522,7 @@ pub const Session = struct { session: Session, allocator: Allocator, redirect_uri: *[]u8, + http_headers_buffer: []u8, ) !CapabilityIterator { var info_refs_uri = session.uri; info_refs_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "info/refs" }); @@ -534,6 +536,7 @@ pub const Session = struct { var request = try session.transport.open(.GET, info_refs_uri, headers, .{ .max_redirects = 3, + .server_header_buffer = http_headers_buffer, }); errdefer request.deinit(); try request.send(.{}); @@ -620,6 +623,7 @@ pub const Session = struct { include_symrefs: bool = false, /// Whether to include the peeled object ID for returned tag refs. include_peeled: bool = false, + server_header_buffer: []u8, }; /// Returns an iterator over refs known to the server. @@ -658,6 +662,7 @@ pub const Session = struct { var request = try session.transport.open(.POST, upload_pack_uri, headers, .{ .handle_redirects = false, + .server_header_buffer = options.server_header_buffer, }); errdefer request.deinit(); request.transfer_encoding = .{ .content_length = body.items.len }; @@ -721,7 +726,12 @@ pub const Session = struct { /// Fetches the given refs from the server. A shallow fetch (depth 1) is /// performed if the server supports it. - pub fn fetch(session: Session, allocator: Allocator, wants: []const []const u8) !FetchStream { + pub fn fetch( + session: Session, + allocator: Allocator, + wants: []const []const u8, + http_headers_buffer: []u8, + ) !FetchStream { var upload_pack_uri = session.uri; upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" }); defer allocator.free(upload_pack_uri.path); @@ -758,6 +768,7 @@ pub const Session = struct { var request = try session.transport.open(.POST, upload_pack_uri, headers, .{ .handle_redirects = false, + .server_header_buffer = http_headers_buffer, }); errdefer request.deinit(); request.transfer_encoding = .{ .content_length = body.items.len };