diff --git a/lib/std/Uri.zig b/lib/std/Uri.zig index cbd3d42741..2f28402b0a 100644 --- a/lib/std/Uri.zig +++ b/lib/std/Uri.zig @@ -1,156 +1,157 @@ //! Uniform Resource Identifier (URI) parsing roughly adhering to . //! Does not do perfect grammar and character class checking, but should be robust against URIs in the wild. -const Uri = @This(); -const std = @import("std.zig"); -const testing = std.testing; -const Allocator = std.mem.Allocator; - scheme: []const u8, -user: ?[]const u8 = null, -password: ?[]const u8 = null, -host: ?[]const u8 = null, +user: ?Component = null, +password: ?Component = null, +host: ?Component = null, port: ?u16 = null, -path: []const u8, -query: ?[]const u8 = null, -fragment: ?[]const u8 = null, +path: Component = Component.empty, +query: ?Component = null, +fragment: ?Component = null, -/// Applies URI encoding and replaces all reserved characters with their respective %XX code. -pub fn escapeString(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 { - return escapeStringWithFn(allocator, input, isUnreserved); -} +pub const Component = union(enum) { + /// Invalid characters in this component must be percent encoded + /// before being printed as part of a URI. + raw: []const u8, + /// This component is already percent-encoded, it can be printed + /// directly as part of a URI. + percent_encoded: []const u8, -pub fn escapePath(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 { - return escapeStringWithFn(allocator, input, isPathChar); -} + pub const empty: Component = .{ .percent_encoded = "" }; -pub fn escapeQuery(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 { - return escapeStringWithFn(allocator, input, isQueryChar); -} - -pub fn writeEscapedString(writer: anytype, input: []const u8) !void { - return writeEscapedStringWithFn(writer, input, isUnreserved); -} - -pub fn writeEscapedPath(writer: anytype, input: []const u8) !void { - return writeEscapedStringWithFn(writer, input, isPathChar); -} - -pub fn writeEscapedQuery(writer: anytype, input: []const u8) !void { - return writeEscapedStringWithFn(writer, input, isQueryChar); -} - -pub fn escapeStringWithFn(allocator: Allocator, input: []const u8, comptime keepUnescaped: fn (c: u8) bool) Allocator.Error![]u8 { - var outsize: usize = 0; - for (input) |c| { - outsize += if (keepUnescaped(c)) @as(usize, 1) else 3; + pub fn isEmpty(component: Component) bool { + return switch (component) { + .raw, .percent_encoded => |string| string.len == 0, + }; } - var output = try allocator.alloc(u8, outsize); - var outptr: usize = 0; - for (input) |c| { - if (keepUnescaped(c)) { - output[outptr] = c; - outptr += 1; - } else { - var buf: [2]u8 = undefined; - _ = std.fmt.bufPrint(&buf, "{X:0>2}", .{c}) catch unreachable; + /// Allocates the result with `arena` only if needed, so the result should not be freed. + pub fn toRawMaybeAlloc( + component: Component, + arena: std.mem.Allocator, + ) std.mem.Allocator.Error![]const u8 { + return switch (component) { + .raw => |raw| raw, + .percent_encoded => |percent_encoded| if (std.mem.indexOfScalar(u8, percent_encoded, '%')) |_| + try std.fmt.allocPrint(arena, "{raw}", .{component}) + else + percent_encoded, + }; + } - output[outptr + 0] = '%'; - output[outptr + 1] = buf[0]; - output[outptr + 2] = buf[1]; - outptr += 3; + pub fn format( + component: Component, + comptime fmt_str: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + if (fmt_str.len == 0) { + try writer.print("std.Uri.Component{{ .{s} = \"{}\" }}", .{ + @tagName(component), + std.zig.fmtEscapes(switch (component) { + .raw, .percent_encoded => |string| string, + }), + }); + } else if (comptime std.mem.eql(u8, fmt_str, "raw")) switch (component) { + .raw => |raw| try writer.writeAll(raw), + .percent_encoded => |percent_encoded| { + var start: usize = 0; + var index: usize = 0; + while (std.mem.indexOfScalarPos(u8, percent_encoded, index, '%')) |percent| { + index = percent + 1; + if (percent_encoded.len - index < 2) continue; + const percent_encoded_char = + std.fmt.parseInt(u8, percent_encoded[index..][0..2], 16) catch continue; + try writer.print("{s}{c}", .{ + percent_encoded[start..percent], + percent_encoded_char, + }); + start = percent + 3; + index = percent + 3; + } + try writer.writeAll(percent_encoded[start..]); + }, + } else if (comptime std.mem.eql(u8, fmt_str, "%")) switch (component) { + .raw => |raw| try percentEncode(writer, raw, isUnreserved), + .percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded), + } else if (comptime std.mem.eql(u8, fmt_str, "user")) switch (component) { + .raw => |raw| try percentEncode(writer, raw, isUserChar), + .percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded), + } else if (comptime std.mem.eql(u8, fmt_str, "password")) switch (component) { + .raw => |raw| try percentEncode(writer, raw, isPasswordChar), + .percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded), + } else if (comptime std.mem.eql(u8, fmt_str, "host")) switch (component) { + .raw => |raw| try percentEncode(writer, raw, isHostChar), + .percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded), + } else if (comptime std.mem.eql(u8, fmt_str, "path")) switch (component) { + .raw => |raw| try percentEncode(writer, raw, isPathChar), + .percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded), + } else if (comptime std.mem.eql(u8, fmt_str, "query")) switch (component) { + .raw => |raw| try percentEncode(writer, raw, isQueryChar), + .percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded), + } else if (comptime std.mem.eql(u8, fmt_str, "fragment")) switch (component) { + .raw => |raw| try percentEncode(writer, raw, isFragmentChar), + .percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded), + } else @compileError("invalid format string '" ++ fmt_str ++ "'"); + } + + pub fn percentEncode( + writer: anytype, + raw: []const u8, + comptime isValidChar: fn (u8) bool, + ) @TypeOf(writer).Error!void { + var start: usize = 0; + for (raw, 0..) |char, index| { + if (isValidChar(char)) continue; + try writer.print("{s}%{X:0>2}", .{ raw[start..index], char }); + start = index + 1; } + try writer.writeAll(raw[start..]); } - return output; -} +}; -pub fn writeEscapedStringWithFn(writer: anytype, input: []const u8, comptime keepUnescaped: fn (c: u8) bool) @TypeOf(writer).Error!void { - for (input) |c| { - if (keepUnescaped(c)) { - try writer.writeByte(c); - } else { - try writer.print("%{X:0>2}", .{c}); - } - } -} - -/// Parses a URI string and unescapes all %XX where XX is a valid hex number. Otherwise, verbatim copies -/// them to the output. -pub fn unescapeString(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 { - var outsize: usize = 0; - var inptr: usize = 0; - while (inptr < input.len) { - if (input[inptr] == '%') { - inptr += 1; - if (inptr + 2 <= input.len) { - _ = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch { - outsize += 3; - inptr += 2; +/// Percent decodes all %XX where XX is a valid hex number. +/// `output` may alias `input` if `output.ptr <= input.ptr`. +/// Mutates and returns a subslice of `output`. +pub fn percentDecodeBackwards(output: []u8, input: []const u8) []u8 { + var input_index = input.len; + var output_index = output.len; + while (input_index > 0) { + if (input_index >= 3) { + const maybe_percent_encoded = input[input_index - 3 ..][0..3]; + if (maybe_percent_encoded[0] == '%') { + if (std.fmt.parseInt(u8, maybe_percent_encoded[1..], 16)) |percent_encoded_char| { + input_index -= maybe_percent_encoded.len; + output_index -= 1; + output[output_index] = percent_encoded_char; continue; - }; - inptr += 2; - outsize += 1; - } else { - outsize += 1; + } else |_| {} } - } else { - inptr += 1; - outsize += 1; } + input_index -= 1; + output_index -= 1; + output[output_index] = input[input_index]; } + return output[output_index..]; +} - var output = try allocator.alloc(u8, outsize); - var outptr: usize = 0; - inptr = 0; - while (inptr < input.len) { - if (input[inptr] == '%') { - inptr += 1; - if (inptr + 2 <= input.len) { - const value = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch { - output[outptr + 0] = input[inptr + 0]; - output[outptr + 1] = input[inptr + 1]; - inptr += 2; - outptr += 2; - continue; - }; - - output[outptr] = value; - - inptr += 2; - outptr += 1; - } else { - output[outptr] = input[inptr - 1]; - outptr += 1; - } - } else { - output[outptr] = input[inptr]; - inptr += 1; - outptr += 1; - } - } - return output; +/// Percent decodes all %XX where XX is a valid hex number. +/// Mutates and returns a subslice of `buffer`. +pub fn percentDecodeInPlace(buffer: []u8) []u8 { + return percentDecodeBackwards(buffer, buffer); } pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort }; /// Parses the URI or returns an error. This function is not compliant, but is required to parse /// some forms of URIs in the wild, such as HTTP Location headers. -/// The return value will contain unescaped strings pointing into the -/// original `text`. Each component that is provided, will be non-`null`. -pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { +/// The return value will contain strings pointing into the original `text`. +/// Each component that is provided, will be non-`null`. +pub fn parseAfterScheme(scheme: []const u8, text: []const u8) ParseError!Uri { var reader = SliceReader{ .slice = text }; - var uri = Uri{ - .scheme = "", - .user = null, - .password = null, - .host = null, - .port = null, - .path = "", // path is always set, but empty by default. - .query = null, - .fragment = null, - }; + var uri: Uri = .{ .scheme = scheme, .path = undefined }; if (reader.peekPrefix("//")) a: { // authority part std.debug.assert(reader.get().? == '/'); @@ -167,12 +168,12 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { const user_info = authority[0..index]; if (std.mem.indexOf(u8, user_info, ":")) |idx| { - uri.user = user_info[0..idx]; + uri.user = .{ .percent_encoded = user_info[0..idx] }; if (idx < user_info.len - 1) { // empty password is also "no password" - uri.password = user_info[idx + 1 ..]; + uri.password = .{ .percent_encoded = user_info[idx + 1 ..] }; } } else { - uri.user = user_info; + uri.user = .{ .percent_encoded = user_info }; uri.password = null; } } @@ -205,19 +206,19 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { } if (start_of_host >= end_of_host) return error.InvalidFormat; - uri.host = authority[start_of_host..end_of_host]; + uri.host = .{ .percent_encoded = authority[start_of_host..end_of_host] }; } - uri.path = reader.readUntil(isPathSeparator); + uri.path = .{ .percent_encoded = reader.readUntil(isPathSeparator) }; if ((reader.peek() orelse 0) == '?') { // query part std.debug.assert(reader.get().? == '?'); - uri.query = reader.readUntil(isQuerySeparator); + uri.query = .{ .percent_encoded = reader.readUntil(isQuerySeparator) }; } if ((reader.peek() orelse 0) == '#') { // fragment part std.debug.assert(reader.get().? == '#'); - uri.fragment = reader.readUntilEof(); + uri.fragment = .{ .percent_encoded = reader.readUntilEof() }; } return uri; @@ -241,9 +242,6 @@ pub const WriteToStreamOptions = struct { /// When true, include the fragment part of the URI. Ignored when `path` is false. fragment: bool = false, - - /// When true, do not escape any part of the URI. - raw: bool = false, }; pub fn writeToStream( @@ -252,80 +250,51 @@ pub fn writeToStream( writer: anytype, ) @TypeOf(writer).Error!void { if (options.scheme) { - try writer.writeAll(uri.scheme); - try writer.writeAll(":"); - + try writer.print("{s}:", .{uri.scheme}); if (options.authority and uri.host != null) { try writer.writeAll("//"); } } - if (options.authority) { if (options.authentication and uri.host != null) { if (uri.user) |user| { - try writer.writeAll(user); + try writer.print("{user}", .{user}); if (uri.password) |password| { - try writer.writeAll(":"); - try writer.writeAll(password); + try writer.print(":{password}", .{password}); } - try writer.writeAll("@"); + try writer.writeByte('@'); } } - if (uri.host) |host| { - try writer.writeAll(host); - - if (uri.port) |port| { - try writer.writeAll(":"); - try std.fmt.formatInt(port, 10, .lower, .{}, writer); - } + try writer.print("{host}", .{host}); + if (uri.port) |port| try writer.print(":{d}", .{port}); } } - if (options.path) { - if (uri.path.len == 0) { - try writer.writeAll("/"); - } else if (options.raw) { - try writer.writeAll(uri.path); - } else { - try writeEscapedPath(writer, uri.path); + try writer.print("{path}", .{ + if (uri.path.isEmpty()) Uri.Component{ .percent_encoded = "/" } else uri.path, + }); + if (options.query) { + if (uri.query) |query| try writer.print("?{query}", .{query}); + } + if (options.fragment) { + if (uri.fragment) |fragment| try writer.print("#{fragment}", .{fragment}); } - - if (options.query) if (uri.query) |q| { - try writer.writeAll("?"); - if (options.raw) { - try writer.writeAll(q); - } else { - try writeEscapedQuery(writer, q); - } - }; - - if (options.fragment) if (uri.fragment) |f| { - try writer.writeAll("#"); - if (options.raw) { - try writer.writeAll(f); - } else { - try writeEscapedQuery(writer, f); - } - }; } } pub fn format( uri: Uri, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, + comptime fmt_str: []const u8, + _: std.fmt.FormatOptions, writer: anytype, ) @TypeOf(writer).Error!void { - _ = options; - - const scheme = comptime std.mem.indexOf(u8, fmt, ";") != null or fmt.len == 0; - const authentication = comptime std.mem.indexOf(u8, fmt, "@") != null or fmt.len == 0; - const authority = comptime std.mem.indexOf(u8, fmt, "+") != null or fmt.len == 0; - const path = comptime std.mem.indexOf(u8, fmt, "/") != null or fmt.len == 0; - const query = comptime std.mem.indexOf(u8, fmt, "?") != null or fmt.len == 0; - const fragment = comptime std.mem.indexOf(u8, fmt, "#") != null or fmt.len == 0; - const raw = comptime std.mem.indexOf(u8, fmt, "r") != null or fmt.len == 0; + const scheme = comptime std.mem.indexOfScalar(u8, fmt_str, ';') != null or fmt_str.len == 0; + const authentication = comptime std.mem.indexOfScalar(u8, fmt_str, '@') != null or fmt_str.len == 0; + const authority = comptime std.mem.indexOfScalar(u8, fmt_str, '+') != null or fmt_str.len == 0; + const path = comptime std.mem.indexOfScalar(u8, fmt_str, '/') != null or fmt_str.len == 0; + const query = comptime std.mem.indexOfScalar(u8, fmt_str, '?') != null or fmt_str.len == 0; + const fragment = comptime std.mem.indexOfScalar(u8, fmt_str, '#') != null or fmt_str.len == 0; return writeToStream(uri, .{ .scheme = scheme, @@ -334,12 +303,11 @@ pub fn format( .path = path, .query = query, .fragment = fragment, - .raw = raw, }, writer); } /// Parses the URI or returns an error. -/// The return value will contain unescaped strings pointing into the +/// The return value will contain strings pointing into the /// original `text`. Each component that is provided, will be non-`null`. pub fn parse(text: []const u8) ParseError!Uri { var reader: SliceReader = .{ .slice = text }; @@ -353,42 +321,32 @@ pub fn parse(text: []const u8) ParseError!Uri { return error.InvalidFormat; } - var uri = try parseWithoutScheme(reader.readUntilEof()); - uri.scheme = scheme; - - return uri; + return parseAfterScheme(scheme, reader.readUntilEof()); } -pub const ResolveInplaceError = ParseError || error{OutOfMemory}; +pub const ResolveInPlaceError = ParseError || error{NoSpaceLeft}; /// Resolves a URI against a base URI, conforming to RFC 3986, Section 5. -/// Copies `new` to the beginning of `aux_buf`, allowing the slices to overlap, +/// Copies `new` to the beginning of `aux_buf.*`, allowing the slices to overlap, /// then parses `new` as a URI, and then resolves the path in place. /// If a merge needs to take place, the newly constructed path will be stored -/// in `aux_buf` just after the copied `new`. -pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplaceError!Uri { - std.mem.copyForwards(u8, aux_buf, new); +/// in `aux_buf.*` just after the copied `new`, and `aux_buf.*` will be modified +/// to only contain the remaining unused space. +pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: *[]u8) ResolveInPlaceError!Uri { + std.mem.copyForwards(u8, aux_buf.*, new); // At this point, new is an invalid pointer. - const new_mut = aux_buf[0..new.len]; - - const new_parsed, const has_scheme = p: { - break :p .{ - parse(new_mut) catch |first_err| { - break :p .{ - parseWithoutScheme(new_mut) catch return first_err, - false, - }; - }, - true, - }; - }; + const new_mut = aux_buf.*[0..new.len]; + aux_buf.* = aux_buf.*[new.len..]; + const new_parsed = parse(new_mut) catch |err| + (parseAfterScheme("", new_mut) catch return err); // As you can see above, `new_mut` is not a const pointer. - const new_path: []u8 = @constCast(new_parsed.path); + const new_path: []u8 = @constCast(new_parsed.path.percent_encoded); - if (has_scheme) return .{ + if (new_parsed.scheme.len > 0) return .{ .scheme = new_parsed.scheme, .user = new_parsed.user, + .password = new_parsed.password, .host = new_parsed.host, .port = new_parsed.port, .path = remove_dot_segments(new_path), @@ -399,6 +357,7 @@ pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplace if (new_parsed.host) |host| return .{ .scheme = base.scheme, .user = new_parsed.user, + .password = new_parsed.password, .host = host, .port = new_parsed.port, .path = remove_dot_segments(new_path), @@ -406,28 +365,21 @@ pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplace .fragment = new_parsed.fragment, }; - const path, const query = b: { - if (new_path.len == 0) - break :b .{ - base.path, - new_parsed.query orelse base.query, - }; - - if (new_path[0] == '/') - break :b .{ - remove_dot_segments(new_path), - new_parsed.query, - }; - - break :b .{ - try merge_paths(base.path, new_path, aux_buf[new_mut.len..]), - new_parsed.query, - }; + const path, const query = if (new_path.len == 0) .{ + base.path, + new_parsed.query orelse base.query, + } else if (new_path[0] == '/') .{ + remove_dot_segments(new_path), + new_parsed.query, + } else .{ + try merge_paths(base.path, new_path, aux_buf), + new_parsed.query, }; return .{ .scheme = base.scheme, .user = base.user, + .password = base.password, .host = base.host, .port = base.port, .path = path, @@ -437,7 +389,7 @@ pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplace } /// In-place implementation of RFC 3986, Section 5.2.4. -fn remove_dot_segments(path: []u8) []u8 { +fn remove_dot_segments(path: []u8) Component { var in_i: usize = 0; var out_i: usize = 0; while (in_i < path.len) { @@ -476,28 +428,28 @@ fn remove_dot_segments(path: []u8) []u8 { } } } - return path[0..out_i]; + return .{ .percent_encoded = path[0..out_i] }; } test remove_dot_segments { { var buffer = "/a/b/c/./../../g".*; - try std.testing.expectEqualStrings("/a/g", remove_dot_segments(&buffer)); + try std.testing.expectEqualStrings("/a/g", remove_dot_segments(&buffer).percent_encoded); } } /// 5.2.3. Merge Paths -fn merge_paths(base: []const u8, new: []u8, aux: []u8) error{OutOfMemory}![]u8 { - if (aux.len < base.len + 1 + new.len) return error.OutOfMemory; - if (base.len == 0) { - aux[0] = '/'; - @memcpy(aux[1..][0..new.len], new); - return remove_dot_segments(aux[0 .. new.len + 1]); +fn merge_paths(base: Component, new: []u8, aux_buf: *[]u8) error{NoSpaceLeft}!Component { + var aux = std.io.fixedBufferStream(aux_buf.*); + if (!base.isEmpty()) { + try aux.writer().print("{path}", .{base}); + aux.pos = std.mem.lastIndexOfScalar(u8, aux.getWritten(), '/') orelse + return remove_dot_segments(new); } - const pos = std.mem.lastIndexOfScalar(u8, base, '/') orelse return remove_dot_segments(new); - @memcpy(aux[0 .. pos + 1], base[0 .. pos + 1]); - @memcpy(aux[pos + 1 ..][0..new.len], new); - return remove_dot_segments(aux[0 .. pos + 1 + new.len]); + try aux.writer().print("/{s}", .{new}); + const merged_path = remove_dot_segments(aux.getWritten()); + aux_buf.* = aux_buf.*[merged_path.percent_encoded.len..]; + return merged_path; } const SliceReader = struct { @@ -561,13 +513,6 @@ fn isSchemeChar(c: u8) bool { }; } -fn isAuthoritySeparator(c: u8) bool { - return switch (c) { - '/', '?', '#' => true, - else => false, - }; -} - /// reserved = gen-delims / sub-delims fn isReserved(c: u8) bool { return isGenLimit(c) or isSubLimit(c); @@ -598,6 +543,35 @@ fn isUnreserved(c: u8) bool { }; } +fn isUserChar(c: u8) bool { + return isUnreserved(c) or isSubLimit(c); +} + +fn isPasswordChar(c: u8) bool { + return isUserChar(c) or c == ':'; +} + +fn isHostChar(c: u8) bool { + return isPasswordChar(c) or c == '[' or c == ']'; +} + +fn isPathChar(c: u8) bool { + return isUserChar(c) or c == '/' or c == ':' or c == '@'; +} + +fn isQueryChar(c: u8) bool { + return isPathChar(c) or c == '?'; +} + +const isFragmentChar = isQueryChar; + +fn isAuthoritySeparator(c: u8) bool { + return switch (c) { + '/', '?', '#' => true, + else => false, + }; +} + fn isPathSeparator(c: u8) bool { return switch (c) { '?', '#' => true, @@ -605,14 +579,6 @@ fn isPathSeparator(c: u8) bool { }; } -fn isPathChar(c: u8) bool { - return isUnreserved(c) or isSubLimit(c) or c == '/' or c == ':' or c == '@'; -} - -fn isQueryChar(c: u8) bool { - return isPathChar(c) or c == '?' or c == '%'; -} - fn isQuerySeparator(c: u8) bool { return switch (c) { '#' => true, @@ -623,92 +589,92 @@ fn isQuerySeparator(c: u8) bool { test "basic" { const parsed = try parse("https://ziglang.org/download"); try testing.expectEqualStrings("https", parsed.scheme); - try testing.expectEqualStrings("ziglang.org", parsed.host orelse return error.UnexpectedNull); - try testing.expectEqualStrings("/download", parsed.path); + try testing.expectEqualStrings("ziglang.org", parsed.host.?.percent_encoded); + try testing.expectEqualStrings("/download", parsed.path.percent_encoded); try testing.expectEqual(@as(?u16, null), parsed.port); } test "with port" { const parsed = try parse("http://example:1337/"); try testing.expectEqualStrings("http", parsed.scheme); - try testing.expectEqualStrings("example", parsed.host orelse return error.UnexpectedNull); - try testing.expectEqualStrings("/", parsed.path); + try testing.expectEqualStrings("example", parsed.host.?.percent_encoded); + try testing.expectEqualStrings("/", parsed.path.percent_encoded); try testing.expectEqual(@as(?u16, 1337), parsed.port); } test "should fail gracefully" { - try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://")); + try std.testing.expectError(error.InvalidFormat, parse("foobar://")); } test "file" { const parsed = try parse("file:///"); - try std.testing.expectEqualSlices(u8, "file", parsed.scheme); - try std.testing.expectEqual(@as(?[]const u8, null), parsed.host); - try std.testing.expectEqualSlices(u8, "/", parsed.path); + try std.testing.expectEqualStrings("file", parsed.scheme); + try std.testing.expectEqual(@as(?Component, null), parsed.host); + try std.testing.expectEqualStrings("/", parsed.path.percent_encoded); const parsed2 = try parse("file:///an/absolute/path/to/something"); - try std.testing.expectEqualSlices(u8, "file", parsed2.scheme); - try std.testing.expectEqual(@as(?[]const u8, null), parsed2.host); - try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/something", parsed2.path); + try std.testing.expectEqualStrings("file", parsed2.scheme); + try std.testing.expectEqual(@as(?Component, null), parsed2.host); + try std.testing.expectEqualStrings("/an/absolute/path/to/something", parsed2.path.percent_encoded); const parsed3 = try parse("file://localhost/an/absolute/path/to/another/thing/"); - try std.testing.expectEqualSlices(u8, "file", parsed3.scheme); - try std.testing.expectEqualSlices(u8, "localhost", parsed3.host.?); - try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/another/thing/", parsed3.path); + try std.testing.expectEqualStrings("file", parsed3.scheme); + try std.testing.expectEqualStrings("localhost", parsed3.host.?.percent_encoded); + try std.testing.expectEqualStrings("/an/absolute/path/to/another/thing/", parsed3.path.percent_encoded); } test "scheme" { - try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme); - try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme); - try std.testing.expectEqualSlices(u8, "a.b.c", (try parse("a.b.c:_")).scheme); - try std.testing.expectEqualSlices(u8, "ab+", (try parse("ab+:_")).scheme); - try std.testing.expectEqualSlices(u8, "X+++", (try parse("X+++:_")).scheme); - try std.testing.expectEqualSlices(u8, "Y+-.", (try parse("Y+-.:_")).scheme); + try std.testing.expectEqualStrings("http", (try parse("http:_")).scheme); + try std.testing.expectEqualStrings("scheme-mee", (try parse("scheme-mee:_")).scheme); + try std.testing.expectEqualStrings("a.b.c", (try parse("a.b.c:_")).scheme); + try std.testing.expectEqualStrings("ab+", (try parse("ab+:_")).scheme); + try std.testing.expectEqualStrings("X+++", (try parse("X+++:_")).scheme); + try std.testing.expectEqualStrings("Y+-.", (try parse("Y+-.:_")).scheme); } test "authority" { - try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname")).host.?); + try std.testing.expectEqualStrings("hostname", (try parse("scheme://hostname")).host.?.percent_encoded); - try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname")).host.?); - try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname")).user.?); - try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname")).password); - try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@")).host); + try std.testing.expectEqualStrings("hostname", (try parse("scheme://userinfo@hostname")).host.?.percent_encoded); + try std.testing.expectEqualStrings("userinfo", (try parse("scheme://userinfo@hostname")).user.?.percent_encoded); + try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@hostname")).password); + try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@")).host); - try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname")).host.?); - try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname")).user.?); - try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname")).password.?); + try std.testing.expectEqualStrings("hostname", (try parse("scheme://user:password@hostname")).host.?.percent_encoded); + try std.testing.expectEqualStrings("user", (try parse("scheme://user:password@hostname")).user.?.percent_encoded); + try std.testing.expectEqualStrings("password", (try parse("scheme://user:password@hostname")).password.?.percent_encoded); - try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname:0")).host.?); + try std.testing.expectEqualStrings("hostname", (try parse("scheme://hostname:0")).host.?.percent_encoded); try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://hostname:1234")).port.?); - try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname:1234")).host.?); + try std.testing.expectEqualStrings("hostname", (try parse("scheme://userinfo@hostname:1234")).host.?.percent_encoded); try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://userinfo@hostname:1234")).port.?); - try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?); - try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname:1234")).password); + try std.testing.expectEqualStrings("userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?.percent_encoded); + try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@hostname:1234")).password); - try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname:1234")).host.?); + try std.testing.expectEqualStrings("hostname", (try parse("scheme://user:password@hostname:1234")).host.?.percent_encoded); try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://user:password@hostname:1234")).port.?); - try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname:1234")).user.?); - try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname:1234")).password.?); + try std.testing.expectEqualStrings("user", (try parse("scheme://user:password@hostname:1234")).user.?.percent_encoded); + try std.testing.expectEqualStrings("password", (try parse("scheme://user:password@hostname:1234")).password.?.percent_encoded); } test "authority.password" { - try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username@a")).user.?); - try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username@a")).password); + try std.testing.expectEqualStrings("username", (try parse("scheme://username@a")).user.?.percent_encoded); + try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://username@a")).password); - try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:@a")).user.?); - try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username:@a")).password); + try std.testing.expectEqualStrings("username", (try parse("scheme://username:@a")).user.?.percent_encoded); + try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://username:@a")).password); - try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:password@a")).user.?); - try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://username:password@a")).password.?); + try std.testing.expectEqualStrings("username", (try parse("scheme://username:password@a")).user.?.percent_encoded); + try std.testing.expectEqualStrings("password", (try parse("scheme://username:password@a")).password.?.percent_encoded); - try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username::@a")).user.?); - try std.testing.expectEqualSlices(u8, ":", (try parse("scheme://username::@a")).password.?); + try std.testing.expectEqualStrings("username", (try parse("scheme://username::@a")).user.?.percent_encoded); + try std.testing.expectEqualStrings(":", (try parse("scheme://username::@a")).password.?.percent_encoded); } fn testAuthorityHost(comptime hostlist: anytype) !void { inline for (hostlist) |hostname| { - try std.testing.expectEqualSlices(u8, hostname, (try parse("scheme://" ++ hostname)).host.?); + try std.testing.expectEqualStrings(hostname, (try parse("scheme://" ++ hostname)).host.?.percent_encoded); } } @@ -761,11 +727,11 @@ test "RFC example 1" { .scheme = uri[0..3], .user = null, .password = null, - .host = uri[6..17], + .host = .{ .percent_encoded = uri[6..17] }, .port = 8042, - .path = uri[22..33], - .query = uri[34..45], - .fragment = uri[46..50], + .path = .{ .percent_encoded = uri[22..33] }, + .query = .{ .percent_encoded = uri[34..45] }, + .fragment = .{ .percent_encoded = uri[46..50] }, }, try parse(uri)); } @@ -777,7 +743,7 @@ test "RFC example 2" { .password = null, .host = null, .port = null, - .path = uri[4..], + .path = .{ .percent_encoded = uri[4..] }, .query = null, .fragment = null, }, try parse(uri)); @@ -838,55 +804,60 @@ test "Special test" { _ = try parse("https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=youtu.be&t=0"); } -test "URI escaping" { - const input = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad"; - const expected = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad"; - - const actual = try escapeString(std.testing.allocator, input); - defer std.testing.allocator.free(actual); - - try std.testing.expectEqualSlices(u8, expected, actual); +test "URI percent encoding" { + try std.testing.expectFmt( + "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad", + "{%}", + .{Component{ .raw = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad" }}, + ); } -test "URI unescaping" { - const input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad"; - const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad"; +test "URI percent decoding" { + { + const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad"; + var input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad".*; - const actual = try unescapeString(std.testing.allocator, input); - defer std.testing.allocator.free(actual); + try std.testing.expectFmt(expected, "{raw}", .{Component{ .percent_encoded = &input }}); - try std.testing.expectEqualSlices(u8, expected, actual); + var output: [expected.len]u8 = undefined; + try std.testing.expectEqualStrings(percentDecodeBackwards(&output, &input), expected); - const decoded = try unescapeString(std.testing.allocator, "/abc%"); - defer std.testing.allocator.free(decoded); - try std.testing.expectEqualStrings("/abc%", decoded); + try std.testing.expectEqualStrings(expected, percentDecodeInPlace(&input)); + } + + { + const expected = "/abc%"; + var input = expected.*; + + try std.testing.expectFmt(expected, "{raw}", .{Component{ .percent_encoded = &input }}); + + var output: [expected.len]u8 = undefined; + try std.testing.expectEqualStrings(percentDecodeBackwards(&output, &input), expected); + + try std.testing.expectEqualStrings(expected, percentDecodeInPlace(&input)); + } } -test "URI query escaping" { +test "URI query encoding" { const address = "https://objects.githubusercontent.com/?response-content-type=application%2Foctet-stream"; const parsed = try Uri.parse(address); - // format the URI to escape it - const formatted_uri = try std.fmt.allocPrint(std.testing.allocator, "{/?}", .{parsed}); - defer std.testing.allocator.free(formatted_uri); - try std.testing.expectEqualStrings("/?response-content-type=application%2Foctet-stream", formatted_uri); + // format the URI to percent encode it + try std.testing.expectFmt("/?response-content-type=application%2Foctet-stream", "{/?}", .{parsed}); } test "format" { - const uri = Uri{ + const uri: Uri = .{ .scheme = "file", .user = null, .password = null, .host = null, .port = null, - .path = "/foo/bar/baz", + .path = .{ .raw = "/foo/bar/baz" }, .query = null, .fragment = null, }; - var buf = std.ArrayList(u8).init(std.testing.allocator); - defer buf.deinit(); - try buf.writer().print("{;/?#}", .{uri}); - try std.testing.expectEqualSlices(u8, "file:/foo/bar/baz", buf.items); + try std.testing.expectFmt("file:/foo/bar/baz", "{;/?#}", .{uri}); } test "URI malformed input" { @@ -894,3 +865,7 @@ test "URI malformed input" { try std.testing.expectError(error.InvalidFormat, std.Uri.parse("http://]@[")); try std.testing.expectError(error.InvalidFormat, std.Uri.parse("http://lo]s\x85hc@[/8\x10?0Q")); } + +const std = @import("std.zig"); +const testing = std.testing; +const Uri = @This(); diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig index 2cec73f281..20d9956cef 100644 --- a/lib/std/http/Client.zig +++ b/lib/std/http/Client.zig @@ -771,17 +771,41 @@ pub const Request = struct { req.client.connection_pool.release(req.client.allocator, req.connection.?); req.connection = null; - const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUrlScheme; + var server_header = std.heap.FixedBufferAllocator.init(req.response.parser.header_bytes_buffer); + defer req.response.parser.header_bytes_buffer = server_header.buffer[server_header.end_index..]; + const protocol, const valid_uri = try validateUri(uri, server_header.allocator()); - const port: u16 = uri.port orelse switch (protocol) { - .plain => 80, - .tls => 443, - }; + const new_host = valid_uri.host.?.raw; + const prev_host = req.uri.host.?.raw; + const keep_privileged_headers = + std.ascii.eqlIgnoreCase(valid_uri.scheme, req.uri.scheme) and + std.ascii.endsWithIgnoreCase(new_host, prev_host) and + (new_host.len == prev_host.len or new_host[new_host.len - prev_host.len - 1] == '.'); + if (!keep_privileged_headers) { + // When redirecting to a different domain, strip privileged headers. + req.privileged_headers = &.{}; + } - const host = uri.host orelse return error.UriMissingHost; + if (switch (req.response.status) { + .see_other => true, + .moved_permanently, .found => req.method == .POST, + else => false, + }) { + // A redirect to a GET must change the method and remove the body. + req.method = .GET; + req.transfer_encoding = .none; + req.headers.content_type = .omit; + } - req.uri = uri; - req.connection = try req.client.connect(host, port, protocol); + if (req.transfer_encoding != .none) { + // The request body has already been sent. The request is + // still in a valid state, but the redirect must be handled + // manually. + return error.RedirectRequiresResend; + } + + req.uri = valid_uri; + req.connection = try req.client.connect(new_host, valid_uri.port.?, protocol); req.redirect_behavior.subtractOne(); req.response.parser.reset(); @@ -796,13 +820,8 @@ pub const Request = struct { pub const SendError = Connection.WriteError || error{ InvalidContentLength, UnsupportedTransferEncoding }; - pub const SendOptions = struct { - /// Specifies that the uri is already escaped. - raw_uri: bool = false, - }; - /// Send the HTTP request headers to the server. - pub fn send(req: *Request, options: SendOptions) SendError!void { + pub fn send(req: *Request) SendError!void { if (!req.method.requestHasBody() and req.transfer_encoding != .none) return error.UnsupportedTransferEncoding; @@ -821,7 +840,6 @@ pub const Request = struct { .authority = connection.proxied, .path = true, .query = true, - .raw = options.raw_uri, }, w); } try w.writeByte(' '); @@ -1038,55 +1056,19 @@ pub const Request = struct { const location = req.response.location orelse return error.HttpRedirectLocationMissing; - // This mutates the beginning of header_buffer and uses that - // for the backing memory of the returned new_uri. - const header_buffer = req.response.parser.header_bytes_buffer; - const new_uri = req.uri.resolve_inplace(location, header_buffer) catch - return error.HttpRedirectLocationInvalid; - - // The new URI references the beginning of header_bytes_buffer memory. - // That memory will be kept, but everything after it will be - // reused by the subsequent request. In other words, - // header_bytes_buffer must be large enough to store all - // redirect locations as well as the final request header. - const path_end = new_uri.path.ptr + new_uri.path.len; - // https://github.com/ziglang/zig/issues/1738 - const path_offset = @intFromPtr(path_end) - @intFromPtr(header_buffer.ptr); - const end_offset = @max(path_offset, location.len); - req.response.parser.header_bytes_buffer = header_buffer[end_offset..]; - - const is_same_domain_or_subdomain = - std.ascii.endsWithIgnoreCase(new_uri.host.?, req.uri.host.?) and - (new_uri.host.?.len == req.uri.host.?.len or - new_uri.host.?[new_uri.host.?.len - req.uri.host.?.len - 1] == '.'); - - if (new_uri.host == null or !is_same_domain_or_subdomain or - !std.ascii.eqlIgnoreCase(new_uri.scheme, req.uri.scheme)) - { - // When redirecting to a different domain, strip privileged headers. - req.privileged_headers = &.{}; - } - - if (switch (req.response.status) { - .see_other => true, - .moved_permanently, .found => req.method == .POST, - else => false, - }) { - // A redirect to a GET must change the method and remove the body. - req.method = .GET; - req.transfer_encoding = .none; - req.headers.content_type = .omit; - } - - if (req.transfer_encoding != .none) { - // The request body has already been sent. The request is - // still in a valid state, but the redirect must be handled - // manually. - return error.RedirectRequiresResend; - } - - try req.redirect(new_uri); - try req.send(.{}); + // This mutates the beginning of header_bytes_buffer and uses that + // for the backing memory of the returned Uri. + try req.redirect(req.uri.resolve_inplace( + location, + &req.response.parser.header_bytes_buffer, + ) catch |err| switch (err) { + error.UnexpectedCharacter, + error.InvalidFormat, + error.InvalidPort, + => return error.HttpRedirectLocationInvalid, + error.NoSpaceLeft => return error.HttpHeadersOversize, + }); + try req.send(); } else { req.response.skip = false; if (!req.response.parser.done) { @@ -1264,30 +1246,25 @@ fn createProxyFromEnvVar(arena: Allocator, env_var_names: []const []const u8) !? }; } else return null; - const uri = Uri.parse(content) catch try Uri.parseWithoutScheme(content); + const uri = Uri.parse(content) catch try Uri.parseAfterScheme("http", content); + const protocol, const valid_uri = validateUri(uri, arena) catch |err| switch (err) { + error.UnsupportedUriScheme => return null, + error.UriMissingHost => return error.HttpProxyMissingHost, + error.OutOfMemory => |e| return e, + }; - const protocol = if (uri.scheme.len == 0) - .plain // No scheme, assume http:// - else - protocol_map.get(uri.scheme) orelse return null; // Unknown scheme, ignore - - const host = uri.host orelse return error.HttpProxyMissingHost; - - const authorization: ?[]const u8 = if (uri.user != null or uri.password != null) a: { - const authorization = try arena.alloc(u8, basic_authorization.valueLengthFromUri(uri)); - assert(basic_authorization.value(uri, authorization).len == authorization.len); + const authorization: ?[]const u8 = if (valid_uri.user != null or valid_uri.password != null) a: { + const authorization = try arena.alloc(u8, basic_authorization.valueLengthFromUri(valid_uri)); + assert(basic_authorization.value(valid_uri, authorization).len == authorization.len); break :a authorization; } else null; const proxy = try arena.create(Proxy); proxy.* = .{ .protocol = protocol, - .host = host, + .host = valid_uri.host.?.raw, .authorization = authorization, - .port = uri.port orelse switch (protocol) { - .plain => 80, - .tls => 443, - }, + .port = valid_uri.port.?, .supports_connect = true, }; return proxy; @@ -1305,24 +1282,26 @@ pub const basic_authorization = struct { } pub fn valueLengthFromUri(uri: Uri) usize { - return valueLength( - if (uri.user) |user| user.len else 0, - if (uri.password) |password| password.len else 0, - ); + var stream = std.io.countingWriter(std.io.null_writer); + try stream.writer().print("{user}", .{uri.user orelse Uri.Component.empty}); + const user_len = stream.bytes_written; + stream.bytes_written = 0; + try stream.writer().print("{password}", .{uri.password orelse Uri.Component.empty}); + const password_len = stream.bytes_written; + return valueLength(@intCast(user_len), @intCast(password_len)); } pub fn value(uri: Uri, out: []u8) []u8 { - assert(uri.user == null or uri.user.?.len <= max_user_len); - assert(uri.password == null or uri.password.?.len <= max_password_len); + var buf: [max_user_len + ":".len + max_password_len]u8 = undefined; + var stream = std.io.fixedBufferStream(&buf); + stream.writer().print("{user}", .{uri.user orelse Uri.Component.empty}) catch + unreachable; + assert(stream.pos <= max_user_len); + stream.writer().print(":{password}", .{uri.password orelse Uri.Component.empty}) catch + unreachable; @memcpy(out[0..prefix.len], prefix); - - var buf: [max_user_len + ":".len + max_password_len]u8 = undefined; - const unencoded = std.fmt.bufPrint(&buf, "{s}:{s}", .{ - uri.user orelse "", uri.password orelse "", - }) catch unreachable; - const base64 = std.base64.standard.Encoder.encode(out[prefix.len..], unencoded); - + const base64 = std.base64.standard.Encoder.encode(out[prefix.len..], stream.getWritten()); return out[0 .. prefix.len + base64.len]; } }; @@ -1337,8 +1316,7 @@ pub fn connectTcp(client: *Client, host: []const u8, port: u16, protocol: Connec .host = host, .port = port, .protocol = protocol, - })) |node| - return node; + })) |node| return node; if (disable_tls and protocol == .tls) return error.TlsInitializationFailed; @@ -1449,19 +1427,12 @@ pub fn connectTunnel( client.connection_pool.release(client.allocator, conn); } - const uri: Uri = .{ - .scheme = "http", - .user = null, - .password = null, - .host = tunnel_host, - .port = tunnel_port, - .path = "", - .query = null, - .fragment = null, - }; - var buffer: [8096]u8 = undefined; - var req = client.open(.CONNECT, uri, .{ + var req = client.open(.CONNECT, .{ + .scheme = "http", + .host = .{ .raw = tunnel_host }, + .port = tunnel_port, + }, .{ .redirect_behavior = .unhandled, .connection = conn, .server_header_buffer = &buffer, @@ -1471,7 +1442,7 @@ pub fn connectTunnel( }; defer req.deinit(); - req.send(.{ .raw_uri = true }) catch |err| break :tunnel err; + req.send() catch |err| break :tunnel err; req.wait() catch |err| break :tunnel err; if (req.response.status.class() == .server_error) { @@ -1500,7 +1471,7 @@ pub fn connectTunnel( } // Prevents a dependency loop in open() -const ConnectErrorPartial = ConnectTcpError || error{ UnsupportedUrlScheme, ConnectionRefused }; +const ConnectErrorPartial = ConnectTcpError || error{ UnsupportedUriScheme, ConnectionRefused }; pub const ConnectError = ConnectErrorPartial || RequestError; /// Connect to `host:port` using the specified protocol. This will reuse a @@ -1548,7 +1519,7 @@ pub fn connect( pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendError || std.fmt.ParseIntError || Connection.WriteError || error{ // TODO: file a zig fmt issue for this bad indentation - UnsupportedUrlScheme, + UnsupportedUriScheme, UriMissingHost, CertificateBundleLoadFailure, @@ -1598,12 +1569,25 @@ pub const RequestOptions = struct { privileged_headers: []const http.Header = &.{}, }; -pub const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{ - .{ "http", .plain }, - .{ "ws", .plain }, - .{ "https", .tls }, - .{ "wss", .tls }, -}); +fn validateUri(uri: Uri, arena: Allocator) !struct { Connection.Protocol, Uri } { + const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{ + .{ "http", .plain }, + .{ "ws", .plain }, + .{ "https", .tls }, + .{ "wss", .tls }, + }); + const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUriScheme; + var valid_uri = uri; + // The host is always going to be needed as a raw string for hostname resolution anyway. + valid_uri.host = .{ + .raw = try (uri.host orelse return error.UriMissingHost).toRawMaybeAlloc(arena), + }; + valid_uri.port = uri.port orelse switch (protocol) { + .plain => 80, + .tls => 443, + }; + return .{ protocol, valid_uri }; +} /// Open a connection to the host specified by `uri` and prepare to send a HTTP request. /// @@ -1633,14 +1617,8 @@ pub fn open( } } - const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUrlScheme; - - const port: u16 = uri.port orelse switch (protocol) { - .plain => 80, - .tls => 443, - }; - - const host = uri.host orelse return error.UriMissingHost; + var server_header = std.heap.FixedBufferAllocator.init(options.server_header_buffer); + const protocol, const valid_uri = try validateUri(uri, server_header.allocator()); if (protocol == .tls and @atomicLoad(bool, &client.next_https_rescan_certs, .acquire)) { if (disable_tls) unreachable; @@ -1649,15 +1627,17 @@ pub fn open( defer client.ca_bundle_mutex.unlock(); if (client.next_https_rescan_certs) { - client.ca_bundle.rescan(client.allocator) catch return error.CertificateBundleLoadFailure; + client.ca_bundle.rescan(client.allocator) catch + return error.CertificateBundleLoadFailure; @atomicStore(bool, &client.next_https_rescan_certs, false, .release); } } - const conn = options.connection orelse try client.connect(host, port, protocol); + const conn = options.connection orelse + try client.connect(valid_uri.host.?.raw, valid_uri.port.?, protocol); var req: Request = .{ - .uri = uri, + .uri = valid_uri, .client = client, .connection = conn, .keep_alive = options.keep_alive, @@ -1671,7 +1651,7 @@ pub fn open( .status = undefined, .reason = undefined, .keep_alive = undefined, - .parser = proto.HeadersParser.init(options.server_header_buffer), + .parser = proto.HeadersParser.init(server_header.buffer[server_header.end_index..]), }, .headers = options.headers, .extra_headers = options.extra_headers, @@ -1751,7 +1731,7 @@ pub fn fetch(client: *Client, options: FetchOptions) !FetchResult { if (options.payload) |payload| req.transfer_encoding = .{ .content_length = payload.len }; - try req.send(.{ .raw_uri = options.raw_uri }); + try req.send(); if (options.payload) |payload| try req.writeAll(payload); diff --git a/lib/std/http/test.zig b/lib/std/http/test.zig index e2aa810d58..caeed0e1ea 100644 --- a/lib/std/http/test.zig +++ b/lib/std/http/test.zig @@ -64,7 +64,7 @@ test "trailers" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -474,6 +474,15 @@ test "general client/server API coverage" { .{ .name = "location", .value = "/redirect/3" }, }, }); + } else if (mem.eql(u8, request.head.target, "/redirect/5")) { + try request.respond("Hello, Redirected!\n", .{ + .status = .found, + .extra_headers = &.{ + .{ .name = "location", .value = "/%2525" }, + }, + }); + } else if (mem.eql(u8, request.head.target, "/%2525")) { + try request.respond("Encoded redirect successful!\n", .{}); } else if (mem.eql(u8, request.head.target, "/redirect/invalid")) { const invalid_port = try getUnusedTcpPort(); const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}", .{invalid_port}); @@ -529,7 +538,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -554,7 +563,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192 * 1024); @@ -578,7 +587,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -604,7 +613,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -629,7 +638,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -656,7 +665,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -684,7 +693,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); try std.testing.expectEqual(.ok, req.response.status); @@ -725,7 +734,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -749,7 +758,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -773,7 +782,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); @@ -797,13 +806,34 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); req.wait() catch |err| switch (err) { error.TooManyHttpRedirects => {}, else => return err, }; } + { // redirect to encoded url + const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/5", .{port}); + defer gpa.free(location); + const uri = try std.Uri.parse(location); + + log.info("{s}", .{location}); + var server_header_buffer: [1024]u8 = undefined; + var req = try client.open(.GET, uri, .{ + .server_header_buffer = &server_header_buffer, + }); + defer req.deinit(); + + try req.send(); + try req.wait(); + + const body = try req.reader().readAllAlloc(gpa, 8192); + defer gpa.free(body); + + try expectEqualStrings("Encoded redirect successful!\n", body); + } + // connection has been kept alive try expect(client.http_proxy != null or client.connection_pool.free_len == 1); @@ -819,7 +849,7 @@ test "general client/server API coverage" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); const result = req.wait(); // a proxy without an upstream is likely to return a 5xx status. @@ -913,16 +943,16 @@ test "Server streams both reading and writing" { var server_header_buffer: [555]u8 = undefined; var req = try client.open(.POST, .{ .scheme = "http", - .host = "127.0.0.1", + .host = .{ .raw = "127.0.0.1" }, .port = test_server.port(), - .path = "/", + .path = .{ .percent_encoded = "/" }, }, .{ .server_header_buffer = &server_header_buffer, }); defer req.deinit(); req.transfer_encoding = .chunked; - try req.send(.{}); + try req.send(); try req.wait(); try req.writeAll("one "); @@ -956,7 +986,7 @@ fn echoTests(client: *http.Client, port: u16) !void { req.transfer_encoding = .{ .content_length = 14 }; - try req.send(.{}); + try req.send(); try req.writeAll("Hello, "); try req.writeAll("World!\n"); try req.finish(); @@ -990,7 +1020,7 @@ fn echoTests(client: *http.Client, port: u16) !void { req.transfer_encoding = .chunked; - try req.send(.{}); + try req.send(); try req.writeAll("Hello, "); try req.writeAll("World!\n"); try req.finish(); @@ -1044,7 +1074,7 @@ fn echoTests(client: *http.Client, port: u16) !void { req.transfer_encoding = .chunked; - try req.send(.{}); + try req.send(); try req.writeAll("Hello, "); try req.writeAll("World!\n"); try req.finish(); @@ -1075,7 +1105,7 @@ fn echoTests(client: *http.Client, port: u16) !void { req.transfer_encoding = .chunked; - try req.send(.{}); + try req.send(); try req.wait(); try expectEqual(.expectation_failed, req.response.status); } @@ -1180,7 +1210,7 @@ test "redirect to different connection" { }); defer req.deinit(); - try req.send(.{}); + try req.send(); try req.wait(); const body = try req.reader().readAllAlloc(gpa, 8192); diff --git a/lib/std/io.zig b/lib/std/io.zig index 9f0f444a83..ab89114000 100644 --- a/lib/std/io.zig +++ b/lib/std/io.zig @@ -413,7 +413,7 @@ pub const StreamSource = @import("io/stream_source.zig").StreamSource; pub const tty = @import("io/tty.zig"); /// A Writer that doesn't write to anything. -pub const null_writer = @as(NullWriter, .{ .context = {} }); +pub const null_writer: NullWriter = .{ .context = {} }; const NullWriter = Writer(void, error{}, dummyWrite); fn dummyWrite(context: void, data: []const u8) error{}!usize { diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 80a180f0d4..7a30637411 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -339,12 +339,12 @@ pub fn run(f: *Fetch) RunError!void { .path_or_url => |path_or_url| { if (fs.cwd().openDir(path_or_url, .{ .iterate = true })) |dir| { var resource: Resource = .{ .dir = dir }; - return runResource(f, path_or_url, &resource, null); + return f.runResource(path_or_url, &resource, null); } else |dir_err| { const file_err = if (dir_err == error.NotDir) e: { if (fs.cwd().openFile(path_or_url, .{})) |file| { var resource: Resource = .{ .file = file }; - return runResource(f, path_or_url, &resource, null); + return f.runResource(path_or_url, &resource, null); } else |err| break :e err; } else dir_err; @@ -356,7 +356,7 @@ pub fn run(f: *Fetch) RunError!void { }; var server_header_buffer: [header_buffer_size]u8 = undefined; var resource = try f.initResource(uri, &server_header_buffer); - return runResource(f, uri.path, &resource, null); + return f.runResource(try uri.path.toRawMaybeAlloc(arena), &resource, null); } }, }; @@ -418,7 +418,7 @@ pub fn run(f: *Fetch) RunError!void { ); var server_header_buffer: [header_buffer_size]u8 = undefined; var resource = try f.initResource(uri, &server_header_buffer); - return runResource(f, uri.path, &resource, remote.hash); + return f.runResource(try uri.path.toRawMaybeAlloc(arena), &resource, remote.hash); } pub fn deinit(f: *Fetch) void { @@ -897,13 +897,14 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re const arena = f.arena.allocator(); const eb = &f.error_bundle; - if (ascii.eqlIgnoreCase(uri.scheme, "file")) return .{ - .file = f.parent_package_root.openFile(uri.path, .{}) catch |err| { + if (ascii.eqlIgnoreCase(uri.scheme, "file")) { + const path = try uri.path.toRawMaybeAlloc(arena); + return .{ .file = f.parent_package_root.openFile(path, .{}) catch |err| { return f.fail(f.location_tok, try eb.printString("unable to open '{}{s}': {s}", .{ - f.parent_package_root, uri.path, @errorName(err), + f.parent_package_root, path, @errorName(err), })); - }, - }; + } }; + } const http_client = f.job_queue.http_client; @@ -920,7 +921,7 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re }; errdefer req.deinit(); // releases more than memory - req.send(.{}) catch |err| { + req.send() catch |err| { return f.fail(f.location_tok, try eb.printString( "HTTP request failed: {s}", .{@errorName(err)}, @@ -967,7 +968,8 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re }; const want_oid = want_oid: { - const want_ref = uri.fragment orelse "HEAD"; + const want_ref = + if (uri.fragment) |fragment| try fragment.toRawMaybeAlloc(arena) else "HEAD"; if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {} const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref}); diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index d7cdd8483c..a8c106412e 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -540,9 +540,13 @@ pub const Session = struct { http_headers_buffer: []u8, ) !CapabilityIterator { var info_refs_uri = session.uri; - info_refs_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "info/refs" }); - defer allocator.free(info_refs_uri.path); - info_refs_uri.query = "service=git-upload-pack"; + { + const session_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{session.uri.path}); + defer allocator.free(session_uri_path); + info_refs_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(allocator, &.{ "/", session_uri_path, "info/refs" }) }; + } + defer allocator.free(info_refs_uri.path.percent_encoded); + info_refs_uri.query = .{ .percent_encoded = "service=git-upload-pack" }; info_refs_uri.fragment = null; const max_redirects = 3; @@ -554,16 +558,18 @@ pub const Session = struct { }, }); errdefer request.deinit(); - try request.send(.{}); + try request.send(); try request.finish(); try request.wait(); if (request.response.status != .ok) return error.ProtocolError; const any_redirects_occurred = request.redirect_behavior.remaining() < max_redirects; if (any_redirects_occurred) { - if (!mem.endsWith(u8, request.uri.path, "/info/refs")) return error.UnparseableRedirect; + const request_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{request.uri.path}); + defer allocator.free(request_uri_path); + if (!mem.endsWith(u8, request_uri_path, "/info/refs")) return error.UnparseableRedirect; var new_uri = request.uri; - new_uri.path = new_uri.path[0 .. new_uri.path.len - "/info/refs".len]; + new_uri.path = .{ .percent_encoded = request_uri_path[0 .. request_uri_path.len - "/info/refs".len] }; new_uri.query = null; redirect_uri.* = try std.fmt.allocPrint(allocator, "{+/}", .{new_uri}); return error.Redirected; @@ -645,8 +651,12 @@ pub const Session = struct { /// Returns an iterator over refs known to the server. pub fn listRefs(session: Session, allocator: Allocator, options: ListRefsOptions) !RefIterator { var upload_pack_uri = session.uri; - upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" }); - defer allocator.free(upload_pack_uri.path); + { + const session_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{session.uri.path}); + defer allocator.free(session_uri_path); + upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(allocator, &.{ "/", session_uri_path, "git-upload-pack" }) }; + } + defer allocator.free(upload_pack_uri.path.percent_encoded); upload_pack_uri.query = null; upload_pack_uri.fragment = null; @@ -681,7 +691,7 @@ pub const Session = struct { }); errdefer request.deinit(); request.transfer_encoding = .{ .content_length = body.items.len }; - try request.send(.{}); + try request.send(); try request.writeAll(body.items); try request.finish(); @@ -748,8 +758,12 @@ pub const Session = struct { http_headers_buffer: []u8, ) !FetchStream { var upload_pack_uri = session.uri; - upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" }); - defer allocator.free(upload_pack_uri.path); + { + const session_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{session.uri.path}); + defer allocator.free(session_uri_path); + upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(allocator, &.{ "/", session_uri_path, "git-upload-pack" }) }; + } + defer allocator.free(upload_pack_uri.path.percent_encoded); upload_pack_uri.query = null; upload_pack_uri.fragment = null; @@ -786,7 +800,7 @@ pub const Session = struct { }); errdefer request.deinit(); request.transfer_encoding = .{ .content_length = body.items.len }; - try request.send(.{}); + try request.send(); try request.writeAll(body.items); try request.finish(); diff --git a/src/link/SpirV.zig b/src/link/SpirV.zig index 0a90a2fd67..27c905cc61 100644 --- a/src/link/SpirV.zig +++ b/src/link/SpirV.zig @@ -233,9 +233,18 @@ pub fn flushModule(self: *SpirV, arena: Allocator, prog_node: *std.Progress.Node // name if it contains no strange characters is nice for debugging. URI encoding fits the bill. // We're using : as separator, which is a reserved character. - const escaped_name = try std.Uri.escapeString(gpa, name.toSlice(&mod.intern_pool)); - defer gpa.free(escaped_name); - try error_info.writer().print(":{s}", .{escaped_name}); + try std.Uri.Component.percentEncode( + error_info.writer(), + name.toSlice(&mod.intern_pool), + struct { + fn isValidChar(c: u8) bool { + return switch (c) { + 0, '%', ':' => false, + else => true, + }; + } + }.isValidChar, + ); } try spv.sections.debug_strings.emit(gpa, .OpSourceExtension, .{ .extension = error_info.items,