mirror of
https://github.com/ziglang/zig.git
synced 2026-02-21 16:54:52 +00:00
Merge pull request #14207 from MasterQ32/zig-uri-upstream
Ports zig-uri to stdlib.
This commit is contained in:
commit
87b223428a
512
lib/std/Uri.zig
Normal file
512
lib/std/Uri.zig
Normal file
@ -0,0 +1,512 @@
|
||||
//! Implements URI parsing roughly adhering to <https://tools.ietf.org/html/rfc3986>.
|
||||
//! Does not do perfect grammar and character class checking, but should be robust against URIs in the wild.
|
||||
|
||||
const Uri = @This();
|
||||
const std = @import("std.zig");
|
||||
const testing = std.testing;
|
||||
|
||||
scheme: []const u8,
|
||||
user: ?[]const u8,
|
||||
password: ?[]const u8,
|
||||
host: ?[]const u8,
|
||||
port: ?u16,
|
||||
path: []const u8,
|
||||
query: ?[]const u8,
|
||||
fragment: ?[]const u8,
|
||||
|
||||
/// Applies URI encoding and replaces all reserved characters with their respective %XX code.
|
||||
pub fn escapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
|
||||
var outsize: usize = 0;
|
||||
for (input) |c| {
|
||||
outsize += if (isUnreserved(c)) @as(usize, 1) else 3;
|
||||
}
|
||||
var output = try allocator.alloc(u8, outsize);
|
||||
var outptr: usize = 0;
|
||||
|
||||
for (input) |c| {
|
||||
if (isUnreserved(c)) {
|
||||
output[outptr] = c;
|
||||
outptr += 1;
|
||||
} else {
|
||||
var buf: [2]u8 = undefined;
|
||||
_ = std.fmt.bufPrint(&buf, "{X:0>2}", .{c}) catch unreachable;
|
||||
|
||||
output[outptr + 0] = '%';
|
||||
output[outptr + 1] = buf[0];
|
||||
output[outptr + 2] = buf[1];
|
||||
outptr += 3;
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/// Parses a URI string and unescapes all %XX where XX is a valid hex number. Otherwise, verbatim copies
|
||||
/// them to the output.
|
||||
pub fn unescapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
|
||||
var outsize: usize = 0;
|
||||
var inptr: usize = 0;
|
||||
while (inptr < input.len) {
|
||||
if (input[inptr] == '%') {
|
||||
inptr += 1;
|
||||
if (inptr + 2 <= input.len) {
|
||||
_ = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
|
||||
outsize += 3;
|
||||
inptr += 2;
|
||||
continue;
|
||||
};
|
||||
inptr += 2;
|
||||
outsize += 1;
|
||||
}
|
||||
} else {
|
||||
inptr += 1;
|
||||
outsize += 1;
|
||||
}
|
||||
}
|
||||
|
||||
var output = try allocator.alloc(u8, outsize);
|
||||
var outptr: usize = 0;
|
||||
inptr = 0;
|
||||
while (inptr < input.len) {
|
||||
if (input[inptr] == '%') {
|
||||
inptr += 1;
|
||||
if (inptr + 2 <= input.len) {
|
||||
const value = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
|
||||
output[outptr + 0] = input[inptr + 0];
|
||||
output[outptr + 1] = input[inptr + 1];
|
||||
inptr += 2;
|
||||
outptr += 2;
|
||||
continue;
|
||||
};
|
||||
|
||||
output[outptr] = value;
|
||||
|
||||
inptr += 2;
|
||||
outptr += 1;
|
||||
}
|
||||
} else {
|
||||
output[outptr] = input[inptr];
|
||||
inptr += 1;
|
||||
outptr += 1;
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort };
|
||||
|
||||
/// Parses the URI or returns an error.
|
||||
/// The return value will contain unescaped strings pointing into the
|
||||
/// original `text`. Each component that is provided, will be non-`null`.
|
||||
pub fn parse(text: []const u8) ParseError!Uri {
|
||||
var reader = SliceReader{ .slice = text };
|
||||
var uri = Uri{
|
||||
.scheme = reader.readWhile(isSchemeChar),
|
||||
.user = null,
|
||||
.password = null,
|
||||
.host = null,
|
||||
.port = null,
|
||||
.path = "", // path is always set, but empty by default.
|
||||
.query = null,
|
||||
.fragment = null,
|
||||
};
|
||||
|
||||
// after the scheme, a ':' must appear
|
||||
if (reader.get()) |c| {
|
||||
if (c != ':')
|
||||
return error.UnexpectedCharacter;
|
||||
} else {
|
||||
return error.InvalidFormat;
|
||||
}
|
||||
|
||||
if (reader.peekPrefix("//")) { // authority part
|
||||
std.debug.assert(reader.get().? == '/');
|
||||
std.debug.assert(reader.get().? == '/');
|
||||
|
||||
const authority = reader.readUntil(isAuthoritySeparator);
|
||||
if (authority.len == 0)
|
||||
return error.InvalidFormat;
|
||||
|
||||
var start_of_host: usize = 0;
|
||||
if (std.mem.indexOf(u8, authority, "@")) |index| {
|
||||
start_of_host = index + 1;
|
||||
const user_info = authority[0..index];
|
||||
|
||||
if (std.mem.indexOf(u8, user_info, ":")) |idx| {
|
||||
uri.user = user_info[0..idx];
|
||||
if (idx < user_info.len - 1) { // empty password is also "no password"
|
||||
uri.password = user_info[idx + 1 ..];
|
||||
}
|
||||
} else {
|
||||
uri.user = user_info;
|
||||
uri.password = null;
|
||||
}
|
||||
}
|
||||
|
||||
var end_of_host: usize = authority.len;
|
||||
|
||||
if (authority[start_of_host] == '[') { // IPv6
|
||||
end_of_host = std.mem.lastIndexOf(u8, authority, "]") orelse return error.InvalidFormat;
|
||||
end_of_host += 1;
|
||||
|
||||
if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
|
||||
if (index >= end_of_host) { // if not part of the V6 address field
|
||||
end_of_host = std.math.min(end_of_host, index);
|
||||
uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
|
||||
}
|
||||
}
|
||||
} else if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
|
||||
if (index >= start_of_host) { // if not part of the userinfo field
|
||||
end_of_host = std.math.min(end_of_host, index);
|
||||
uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
|
||||
}
|
||||
}
|
||||
|
||||
uri.host = authority[start_of_host..end_of_host];
|
||||
}
|
||||
|
||||
uri.path = reader.readUntil(isPathSeparator);
|
||||
|
||||
if ((reader.peek() orelse 0) == '?') { // query part
|
||||
std.debug.assert(reader.get().? == '?');
|
||||
uri.query = reader.readUntil(isQuerySeparator);
|
||||
}
|
||||
|
||||
if ((reader.peek() orelse 0) == '#') { // fragment part
|
||||
std.debug.assert(reader.get().? == '#');
|
||||
uri.fragment = reader.readUntilEof();
|
||||
}
|
||||
|
||||
return uri;
|
||||
}
|
||||
|
||||
const SliceReader = struct {
|
||||
const Self = @This();
|
||||
|
||||
slice: []const u8,
|
||||
offset: usize = 0,
|
||||
|
||||
fn get(self: *Self) ?u8 {
|
||||
if (self.offset >= self.slice.len)
|
||||
return null;
|
||||
const c = self.slice[self.offset];
|
||||
self.offset += 1;
|
||||
return c;
|
||||
}
|
||||
|
||||
fn peek(self: Self) ?u8 {
|
||||
if (self.offset >= self.slice.len)
|
||||
return null;
|
||||
return self.slice[self.offset];
|
||||
}
|
||||
|
||||
fn readWhile(self: *Self, comptime predicate: fn (u8) bool) []const u8 {
|
||||
const start = self.offset;
|
||||
var end = start;
|
||||
while (end < self.slice.len and predicate(self.slice[end])) {
|
||||
end += 1;
|
||||
}
|
||||
self.offset = end;
|
||||
return self.slice[start..end];
|
||||
}
|
||||
|
||||
fn readUntil(self: *Self, comptime predicate: fn (u8) bool) []const u8 {
|
||||
const start = self.offset;
|
||||
var end = start;
|
||||
while (end < self.slice.len and !predicate(self.slice[end])) {
|
||||
end += 1;
|
||||
}
|
||||
self.offset = end;
|
||||
return self.slice[start..end];
|
||||
}
|
||||
|
||||
fn readUntilEof(self: *Self) []const u8 {
|
||||
const start = self.offset;
|
||||
self.offset = self.slice.len;
|
||||
return self.slice[start..];
|
||||
}
|
||||
|
||||
fn peekPrefix(self: Self, prefix: []const u8) bool {
|
||||
if (self.offset + prefix.len > self.slice.len)
|
||||
return false;
|
||||
return std.mem.eql(u8, self.slice[self.offset..][0..prefix.len], prefix);
|
||||
}
|
||||
};
|
||||
|
||||
/// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||
fn isSchemeChar(c: u8) bool {
|
||||
return switch (c) {
|
||||
'A'...'Z', 'a'...'z', '0'...'9', '+', '-', '.' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isAuthoritySeparator(c: u8) bool {
|
||||
return switch (c) {
|
||||
'/', '?', '#' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// reserved = gen-delims / sub-delims
|
||||
fn isReserved(c: u8) bool {
|
||||
return isGenLimit(c) or isSubLimit(c);
|
||||
}
|
||||
|
||||
/// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
||||
fn isGenLimit(c: u8) bool {
|
||||
return switch (c) {
|
||||
':', ',', '?', '#', '[', ']', '@' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
||||
/// / "*" / "+" / "," / ";" / "="
|
||||
fn isSubLimit(c: u8) bool {
|
||||
return switch (c) {
|
||||
'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
fn isUnreserved(c: u8) bool {
|
||||
return switch (c) {
|
||||
'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isPathSeparator(c: u8) bool {
|
||||
return switch (c) {
|
||||
'?', '#' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isQuerySeparator(c: u8) bool {
|
||||
return switch (c) {
|
||||
'#' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
test "basic" {
|
||||
const parsed = try parse("https://ziglang.org/download");
|
||||
try testing.expectEqualStrings("https", parsed.scheme);
|
||||
try testing.expectEqualStrings("ziglang.org", parsed.host orelse return error.UnexpectedNull);
|
||||
try testing.expectEqualStrings("/download", parsed.path);
|
||||
try testing.expectEqual(@as(?u16, null), parsed.port);
|
||||
}
|
||||
|
||||
test "with port" {
|
||||
const parsed = try parse("http://example:1337/");
|
||||
try testing.expectEqualStrings("http", parsed.scheme);
|
||||
try testing.expectEqualStrings("example", parsed.host orelse return error.UnexpectedNull);
|
||||
try testing.expectEqualStrings("/", parsed.path);
|
||||
try testing.expectEqual(@as(?u16, 1337), parsed.port);
|
||||
}
|
||||
|
||||
test "should fail gracefully" {
|
||||
try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://"));
|
||||
}
|
||||
|
||||
test "scheme" {
|
||||
try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme);
|
||||
try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme);
|
||||
try std.testing.expectEqualSlices(u8, "a.b.c", (try parse("a.b.c:_")).scheme);
|
||||
try std.testing.expectEqualSlices(u8, "ab+", (try parse("ab+:_")).scheme);
|
||||
try std.testing.expectEqualSlices(u8, "X+++", (try parse("X+++:_")).scheme);
|
||||
try std.testing.expectEqualSlices(u8, "Y+-.", (try parse("Y+-.:_")).scheme);
|
||||
}
|
||||
|
||||
test "authority" {
|
||||
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname")).host.?);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname")).host.?);
|
||||
try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname")).user.?);
|
||||
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname")).password);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname")).host.?);
|
||||
try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname")).user.?);
|
||||
try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname")).password.?);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname:0")).host.?);
|
||||
try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://hostname:1234")).port.?);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname:1234")).host.?);
|
||||
try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://userinfo@hostname:1234")).port.?);
|
||||
try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?);
|
||||
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname:1234")).password);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname:1234")).host.?);
|
||||
try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://user:password@hostname:1234")).port.?);
|
||||
try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname:1234")).user.?);
|
||||
try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname:1234")).password.?);
|
||||
}
|
||||
|
||||
test "authority.password" {
|
||||
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username@a")).user.?);
|
||||
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username@a")).password);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:@a")).user.?);
|
||||
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username:@a")).password);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:password@a")).user.?);
|
||||
try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://username:password@a")).password.?);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username::@a")).user.?);
|
||||
try std.testing.expectEqualSlices(u8, ":", (try parse("scheme://username::@a")).password.?);
|
||||
}
|
||||
|
||||
fn testAuthorityHost(comptime hostlist: anytype) !void {
|
||||
inline for (hostlist) |hostname| {
|
||||
try std.testing.expectEqualSlices(u8, hostname, (try parse("scheme://" ++ hostname)).host.?);
|
||||
}
|
||||
}
|
||||
|
||||
test "authority.dns-names" {
|
||||
try testAuthorityHost(.{
|
||||
"a",
|
||||
"a.b",
|
||||
"example.com",
|
||||
"www.example.com",
|
||||
"example.org.",
|
||||
"www.example.org.",
|
||||
"xn--nw2a.xn--j6w193g", // internationalized URI: 見.香港
|
||||
"fe80--1ff-fe23-4567-890as3.ipv6-literal.net",
|
||||
});
|
||||
}
|
||||
|
||||
test "authority.IPv4" {
|
||||
try testAuthorityHost(.{
|
||||
"127.0.0.1",
|
||||
"255.255.255.255",
|
||||
"0.0.0.0",
|
||||
"8.8.8.8",
|
||||
"1.2.3.4",
|
||||
"192.168.0.1",
|
||||
"10.42.0.0",
|
||||
});
|
||||
}
|
||||
|
||||
test "authority.IPv6" {
|
||||
try testAuthorityHost(.{
|
||||
"[2001:db8:0:0:0:0:2:1]",
|
||||
"[2001:db8::2:1]",
|
||||
"[2001:db8:0000:1:1:1:1:1]",
|
||||
"[2001:db8:0:1:1:1:1:1]",
|
||||
"[0:0:0:0:0:0:0:0]",
|
||||
"[0:0:0:0:0:0:0:1]",
|
||||
"[::1]",
|
||||
"[::]",
|
||||
"[2001:db8:85a3:8d3:1319:8a2e:370:7348]",
|
||||
"[fe80::1ff:fe23:4567:890a%25eth2]",
|
||||
"[fe80::1ff:fe23:4567:890a]",
|
||||
"[fe80::1ff:fe23:4567:890a%253]",
|
||||
"[fe80:3::1ff:fe23:4567:890a]",
|
||||
});
|
||||
}
|
||||
|
||||
test "RFC example 1" {
|
||||
const uri = "foo://example.com:8042/over/there?name=ferret#nose";
|
||||
try std.testing.expectEqual(Uri{
|
||||
.scheme = uri[0..3],
|
||||
.user = null,
|
||||
.password = null,
|
||||
.host = uri[6..17],
|
||||
.port = 8042,
|
||||
.path = uri[22..33],
|
||||
.query = uri[34..45],
|
||||
.fragment = uri[46..50],
|
||||
}, try parse(uri));
|
||||
}
|
||||
|
||||
test "RFX example 2" {
|
||||
const uri = "urn:example:animal:ferret:nose";
|
||||
try std.testing.expectEqual(Uri{
|
||||
.scheme = uri[0..3],
|
||||
.user = null,
|
||||
.password = null,
|
||||
.host = null,
|
||||
.port = null,
|
||||
.path = uri[4..],
|
||||
.query = null,
|
||||
.fragment = null,
|
||||
}, try parse(uri));
|
||||
}
|
||||
|
||||
// source:
|
||||
// https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Examples
|
||||
test "Examples from wikipedia" {
|
||||
const list = [_][]const u8{
|
||||
"https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top",
|
||||
"ldap://[2001:db8::7]/c=GB?objectClass?one",
|
||||
"mailto:John.Doe@example.com",
|
||||
"news:comp.infosystems.www.servers.unix",
|
||||
"tel:+1-816-555-1212",
|
||||
"telnet://192.0.2.16:80/",
|
||||
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
|
||||
"http://a/b/c/d;p?q",
|
||||
};
|
||||
for (list) |uri| {
|
||||
_ = try parse(uri);
|
||||
}
|
||||
}
|
||||
|
||||
// source:
|
||||
// https://tools.ietf.org/html/rfc3986#section-5.4.1
|
||||
test "Examples from RFC3986" {
|
||||
const list = [_][]const u8{
|
||||
"http://a/b/c/g",
|
||||
"http://a/b/c/g",
|
||||
"http://a/b/c/g/",
|
||||
"http://a/g",
|
||||
"http://g",
|
||||
"http://a/b/c/d;p?y",
|
||||
"http://a/b/c/g?y",
|
||||
"http://a/b/c/d;p?q#s",
|
||||
"http://a/b/c/g#s",
|
||||
"http://a/b/c/g?y#s",
|
||||
"http://a/b/c/;x",
|
||||
"http://a/b/c/g;x",
|
||||
"http://a/b/c/g;x?y#s",
|
||||
"http://a/b/c/d;p?q",
|
||||
"http://a/b/c/",
|
||||
"http://a/b/c/",
|
||||
"http://a/b/",
|
||||
"http://a/b/",
|
||||
"http://a/b/g",
|
||||
"http://a/",
|
||||
"http://a/",
|
||||
"http://a/g",
|
||||
};
|
||||
for (list) |uri| {
|
||||
_ = try parse(uri);
|
||||
}
|
||||
}
|
||||
|
||||
test "Special test" {
|
||||
// This is for all of you code readers ♥
|
||||
_ = try parse("https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=youtu.be&t=0");
|
||||
}
|
||||
|
||||
test "URI escaping" {
|
||||
const input = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
|
||||
const expected = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
|
||||
|
||||
const actual = try escapeString(std.testing.allocator, input);
|
||||
defer std.testing.allocator.free(actual);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, expected, actual);
|
||||
}
|
||||
|
||||
test "URI unescaping" {
|
||||
const input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
|
||||
const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
|
||||
|
||||
const actual = try unescapeString(std.testing.allocator, input);
|
||||
defer std.testing.allocator.free(actual);
|
||||
|
||||
try std.testing.expectEqualSlices(u8, expected, actual);
|
||||
}
|
||||
@ -1,98 +0,0 @@
|
||||
scheme: []const u8,
|
||||
host: []const u8,
|
||||
path: []const u8,
|
||||
port: ?u16,
|
||||
|
||||
/// TODO: redo this implementation according to RFC 1738. This code is only a
|
||||
/// placeholder for now.
|
||||
pub fn parse(s: []const u8) !Url {
|
||||
var scheme_end: usize = 0;
|
||||
var host_start: usize = 0;
|
||||
var host_end: usize = 0;
|
||||
var path_start: usize = 0;
|
||||
var port_start: usize = 0;
|
||||
var port_end: usize = 0;
|
||||
var state: enum {
|
||||
scheme,
|
||||
scheme_slash1,
|
||||
scheme_slash2,
|
||||
host,
|
||||
port,
|
||||
path,
|
||||
} = .scheme;
|
||||
|
||||
for (s) |b, i| switch (state) {
|
||||
.scheme => switch (b) {
|
||||
':' => {
|
||||
state = .scheme_slash1;
|
||||
scheme_end = i;
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
.scheme_slash1 => switch (b) {
|
||||
'/' => {
|
||||
state = .scheme_slash2;
|
||||
},
|
||||
else => return error.InvalidUrl,
|
||||
},
|
||||
.scheme_slash2 => switch (b) {
|
||||
'/' => {
|
||||
state = .host;
|
||||
host_start = i + 1;
|
||||
},
|
||||
else => return error.InvalidUrl,
|
||||
},
|
||||
.host => switch (b) {
|
||||
':' => {
|
||||
state = .port;
|
||||
host_end = i;
|
||||
port_start = i + 1;
|
||||
},
|
||||
'/' => {
|
||||
state = .path;
|
||||
host_end = i;
|
||||
path_start = i;
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
.port => switch (b) {
|
||||
'/' => {
|
||||
port_end = i;
|
||||
state = .path;
|
||||
path_start = i;
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
.path => {},
|
||||
};
|
||||
|
||||
const port_slice = s[port_start..port_end];
|
||||
const port = if (port_slice.len == 0) null else try std.fmt.parseInt(u16, port_slice, 10);
|
||||
|
||||
return .{
|
||||
.scheme = s[0..scheme_end],
|
||||
.host = s[host_start..host_end],
|
||||
.path = s[path_start..],
|
||||
.port = port,
|
||||
};
|
||||
}
|
||||
|
||||
const Url = @This();
|
||||
const std = @import("std.zig");
|
||||
const testing = std.testing;
|
||||
|
||||
test "basic" {
|
||||
const parsed = try parse("https://ziglang.org/download");
|
||||
try testing.expectEqualStrings("https", parsed.scheme);
|
||||
try testing.expectEqualStrings("ziglang.org", parsed.host);
|
||||
try testing.expectEqualStrings("/download", parsed.path);
|
||||
try testing.expectEqual(@as(?u16, null), parsed.port);
|
||||
}
|
||||
|
||||
test "with port" {
|
||||
const parsed = try parse("http://example:1337/");
|
||||
try testing.expectEqualStrings("http", parsed.scheme);
|
||||
try testing.expectEqualStrings("example", parsed.host);
|
||||
try testing.expectEqualStrings("/", parsed.path);
|
||||
try testing.expectEqual(@as(?u16, 1337), parsed.port);
|
||||
}
|
||||
@ -1,7 +1,3 @@
|
||||
//! This API is a barely-touched, barely-functional http client, just the
|
||||
//! absolute minimum thing I needed in order to test `std.crypto.tls`. Bear
|
||||
//! with me and I promise the API will become useful and streamlined.
|
||||
//!
|
||||
//! TODO: send connection: keep-alive and LRU cache a configurable number of
|
||||
//! open connections to skip DNS and TLS handshake for subsequent requests.
|
||||
|
||||
@ -11,7 +7,7 @@ const assert = std.debug.assert;
|
||||
const http = std.http;
|
||||
const net = std.net;
|
||||
const Client = @This();
|
||||
const Url = std.Url;
|
||||
const Uri = std.Uri;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const testing = std.testing;
|
||||
|
||||
@ -178,6 +174,8 @@ pub const Request = struct {
|
||||
seen_rnr,
|
||||
finished,
|
||||
/// Begin transfer-encoding: chunked parsing states.
|
||||
chunk_size_prefix_r,
|
||||
chunk_size_prefix_n,
|
||||
chunk_size,
|
||||
chunk_r,
|
||||
chunk_data,
|
||||
@ -382,6 +380,8 @@ pub const Request = struct {
|
||||
continue :state;
|
||||
},
|
||||
},
|
||||
.chunk_size_prefix_r => unreachable,
|
||||
.chunk_size_prefix_n => unreachable,
|
||||
.chunk_size => unreachable,
|
||||
.chunk_r => unreachable,
|
||||
.chunk_data => unreachable,
|
||||
@ -449,18 +449,6 @@ pub const Request = struct {
|
||||
try expectEqual(@as(u10, 999), parseInt3("999".*));
|
||||
}
|
||||
|
||||
inline fn int16(array: *const [2]u8) u16 {
|
||||
return @bitCast(u16, array.*);
|
||||
}
|
||||
|
||||
inline fn int32(array: *const [4]u8) u32 {
|
||||
return @bitCast(u32, array.*);
|
||||
}
|
||||
|
||||
inline fn int64(array: *const [8]u8) u64 {
|
||||
return @bitCast(u64, array.*);
|
||||
}
|
||||
|
||||
test "find headers end basic" {
|
||||
var buffer: [1]u8 = undefined;
|
||||
var r = Response.initStatic(&buffer);
|
||||
@ -480,6 +468,29 @@ pub const Request = struct {
|
||||
"\r\ncontent";
|
||||
try testing.expectEqual(@as(usize, 131), r.findHeadersEnd(example));
|
||||
}
|
||||
|
||||
test "find headers end bug" {
|
||||
var buffer: [1]u8 = undefined;
|
||||
var r = Response.initStatic(&buffer);
|
||||
const trail = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
|
||||
const example =
|
||||
"HTTP/1.1 200 OK\r\n" ++
|
||||
"Access-Control-Allow-Origin: https://render.githubusercontent.com\r\n" ++
|
||||
"content-disposition: attachment; filename=zig-0.10.0.tar.gz\r\n" ++
|
||||
"Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox\r\n" ++
|
||||
"Content-Type: application/x-gzip\r\n" ++
|
||||
"ETag: \"bfae0af6b01c7c0d89eb667cb5f0e65265968aeebda2689177e6b26acd3155ca\"\r\n" ++
|
||||
"Strict-Transport-Security: max-age=31536000\r\n" ++
|
||||
"Vary: Authorization,Accept-Encoding,Origin\r\n" ++
|
||||
"X-Content-Type-Options: nosniff\r\n" ++
|
||||
"X-Frame-Options: deny\r\n" ++
|
||||
"X-XSS-Protection: 1; mode=block\r\n" ++
|
||||
"Date: Fri, 06 Jan 2023 22:26:22 GMT\r\n" ++
|
||||
"Transfer-Encoding: chunked\r\n" ++
|
||||
"X-GitHub-Request-Id: 89C6:17E9:A7C9E:124B51:63B8A00E\r\n" ++
|
||||
"connection: close\r\n\r\n" ++ trail;
|
||||
try testing.expectEqual(@as(usize, example.len - trail.len), r.findHeadersEnd(example));
|
||||
}
|
||||
};
|
||||
|
||||
pub const Headers = struct {
|
||||
@ -536,8 +547,7 @@ pub const Request = struct {
|
||||
/// This one can return 0 without meaning EOF.
|
||||
/// TODO change to readvAdvanced
|
||||
pub fn readAdvanced(req: *Request, buffer: []u8) !usize {
|
||||
const amt = try req.connection.read(buffer);
|
||||
var in = buffer[0..amt];
|
||||
var in = buffer[0..try req.connection.read(buffer)];
|
||||
var out_index: usize = 0;
|
||||
while (true) {
|
||||
switch (req.response.state) {
|
||||
@ -559,7 +569,7 @@ pub const Request = struct {
|
||||
if (req.redirects_left == 0) return error.TooManyHttpRedirects;
|
||||
const location = req.response.headers.location orelse
|
||||
return error.HttpRedirectMissingLocation;
|
||||
const new_url = try std.Url.parse(location);
|
||||
const new_url = try std.Uri.parse(location);
|
||||
const new_req = try req.client.request(new_url, req.headers, .{
|
||||
.max_redirects = req.redirects_left - 1,
|
||||
.header_strategy = if (req.response.header_bytes_owned) .{
|
||||
@ -571,7 +581,8 @@ pub const Request = struct {
|
||||
req.deinit();
|
||||
req.* = new_req;
|
||||
assert(out_index == 0);
|
||||
return readAdvanced(req, buffer);
|
||||
in = buffer[0..try req.connection.read(buffer)];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (req.response.headers.transfer_encoding) |transfer_encoding| {
|
||||
@ -598,8 +609,50 @@ pub const Request = struct {
|
||||
return 0;
|
||||
},
|
||||
.finished => {
|
||||
mem.copy(u8, buffer[out_index..], in);
|
||||
return out_index + in.len;
|
||||
if (in.ptr == buffer.ptr) {
|
||||
return in.len;
|
||||
} else {
|
||||
mem.copy(u8, buffer[out_index..], in);
|
||||
return out_index + in.len;
|
||||
}
|
||||
},
|
||||
.chunk_size_prefix_r => switch (in.len) {
|
||||
0 => return out_index,
|
||||
1 => switch (in[0]) {
|
||||
'\r' => {
|
||||
req.response.state = .chunk_size_prefix_n;
|
||||
return out_index;
|
||||
},
|
||||
else => {
|
||||
req.response.state = .invalid;
|
||||
return error.HttpHeadersInvalid;
|
||||
},
|
||||
},
|
||||
else => switch (int16(in[0..2])) {
|
||||
int16("\r\n") => {
|
||||
in = in[2..];
|
||||
req.response.state = .chunk_size;
|
||||
continue;
|
||||
},
|
||||
else => {
|
||||
req.response.state = .invalid;
|
||||
return error.HttpHeadersInvalid;
|
||||
},
|
||||
},
|
||||
},
|
||||
.chunk_size_prefix_n => switch (in.len) {
|
||||
0 => return out_index,
|
||||
else => switch (in[0]) {
|
||||
'\n' => {
|
||||
in = in[1..];
|
||||
req.response.state = .chunk_size;
|
||||
continue;
|
||||
},
|
||||
else => {
|
||||
req.response.state = .invalid;
|
||||
return error.HttpHeadersInvalid;
|
||||
},
|
||||
},
|
||||
},
|
||||
.chunk_size, .chunk_r => {
|
||||
const i = req.response.findChunkedLen(in);
|
||||
@ -619,20 +672,38 @@ pub const Request = struct {
|
||||
},
|
||||
.chunk_data => {
|
||||
const sub_amt = @min(req.response.next_chunk_length, in.len);
|
||||
req.response.next_chunk_length -= sub_amt;
|
||||
if (req.response.next_chunk_length > 0) {
|
||||
if (in.ptr == buffer.ptr) {
|
||||
return sub_amt;
|
||||
} else {
|
||||
mem.copy(u8, buffer[out_index..], in[0..sub_amt]);
|
||||
out_index += sub_amt;
|
||||
return out_index;
|
||||
}
|
||||
}
|
||||
mem.copy(u8, buffer[out_index..], in[0..sub_amt]);
|
||||
out_index += sub_amt;
|
||||
req.response.next_chunk_length -= sub_amt;
|
||||
if (req.response.next_chunk_length == 0) {
|
||||
req.response.state = .chunk_size;
|
||||
in = in[sub_amt..];
|
||||
continue;
|
||||
}
|
||||
return out_index;
|
||||
req.response.state = .chunk_size_prefix_r;
|
||||
in = in[sub_amt..];
|
||||
continue;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline fn int16(array: *const [2]u8) u16 {
|
||||
return @bitCast(u16, array.*);
|
||||
}
|
||||
|
||||
inline fn int32(array: *const [4]u8) u32 {
|
||||
return @bitCast(u32, array.*);
|
||||
}
|
||||
|
||||
inline fn int64(array: *const [8]u8) u64 {
|
||||
return @bitCast(u64, array.*);
|
||||
}
|
||||
|
||||
test {
|
||||
_ = Response;
|
||||
}
|
||||
@ -663,23 +734,25 @@ pub fn connect(client: *Client, host: []const u8, port: u16, protocol: Connectio
|
||||
return conn;
|
||||
}
|
||||
|
||||
pub fn request(client: *Client, url: Url, headers: Request.Headers, options: Request.Options) !Request {
|
||||
const protocol: Connection.Protocol = if (mem.eql(u8, url.scheme, "http"))
|
||||
pub fn request(client: *Client, uri: Uri, headers: Request.Headers, options: Request.Options) !Request {
|
||||
const protocol: Connection.Protocol = if (mem.eql(u8, uri.scheme, "http"))
|
||||
.plain
|
||||
else if (mem.eql(u8, url.scheme, "https"))
|
||||
else if (mem.eql(u8, uri.scheme, "https"))
|
||||
.tls
|
||||
else
|
||||
return error.UnsupportedUrlScheme;
|
||||
|
||||
const port: u16 = url.port orelse switch (protocol) {
|
||||
const port: u16 = uri.port orelse switch (protocol) {
|
||||
.plain => 80,
|
||||
.tls => 443,
|
||||
};
|
||||
|
||||
const host = uri.host orelse return error.UriMissingHost;
|
||||
|
||||
var req: Request = .{
|
||||
.client = client,
|
||||
.headers = headers,
|
||||
.connection = try client.connect(url.host, port, protocol),
|
||||
.connection = try client.connect(host, port, protocol),
|
||||
.redirects_left = options.max_redirects,
|
||||
.response = switch (options.header_strategy) {
|
||||
.dynamic => |max| Request.Response.initDynamic(max),
|
||||
@ -691,11 +764,11 @@ pub fn request(client: *Client, url: Url, headers: Request.Headers, options: Req
|
||||
var h = try std.BoundedArray(u8, 1000).init(0);
|
||||
try h.appendSlice(@tagName(headers.method));
|
||||
try h.appendSlice(" ");
|
||||
try h.appendSlice(url.path);
|
||||
try h.appendSlice(uri.path);
|
||||
try h.appendSlice(" ");
|
||||
try h.appendSlice(@tagName(headers.version));
|
||||
try h.appendSlice("\r\nHost: ");
|
||||
try h.appendSlice(url.host);
|
||||
try h.appendSlice(host);
|
||||
try h.appendSlice("\r\nConnection: close\r\n\r\n");
|
||||
|
||||
const header_bytes = h.slice();
|
||||
|
||||
@ -42,7 +42,7 @@ pub const Target = @import("target.zig").Target;
|
||||
pub const Thread = @import("Thread.zig");
|
||||
pub const Treap = @import("treap.zig").Treap;
|
||||
pub const Tz = tz.Tz;
|
||||
pub const Url = @import("Url.zig");
|
||||
pub const Uri = @import("Uri.zig");
|
||||
|
||||
pub const array_hash_map = @import("array_hash_map.zig");
|
||||
pub const atomic = @import("atomic.zig");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user