From 50c52e013541550c7c89c30d336bc4991218f888 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 11:22:12 +0200 Subject: [PATCH 01/12] Use boyer-moore-horspool algorithm for indexOfPos and lastIndexOf --- lib/std/mem.zig | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 71190069a8..36d83b3e40 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -850,29 +850,38 @@ pub fn indexOfAnyPos(comptime T: type, slice: []const T, start_index: usize, val pub fn indexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { return indexOfPos(T, haystack, 0, needle); } - /// Find the index in a slice of a sub-slice, searching from the end backwards. /// To start looking at a different index, slice the haystack first. -/// TODO is there even a better algorithm for this? +// Reverse boyer-moore-horspool algorithm pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { - if (needle.len > haystack.len) return null; + if (needle.len > haystack.len or needle.len == 0) return null; - var i: usize = haystack.len - needle.len; - while (true) : (i -= 1) { - if (mem.eql(T, haystack[i .. i + needle.len], needle)) return i; - if (i == 0) return null; + var i: usize = needle.len - 1; + while (i < haystack.len) { + const reverseIndex = haystack.len - i - 1; + if (indexOfScalar(T, needle, haystack[reverseIndex])) |index| { + const haystackIndex = reverseIndex - index; + if (haystackIndex + needle.len <= haystack.len and mem.eql(T, haystack[haystackIndex .. haystackIndex + needle.len], needle)) return haystackIndex; + } + i += needle.len; } + + return null; } -// TODO boyer-moore algorithm +// Boyer-moore-horspool algorithm pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { - if (needle.len > haystack.len) return null; + if (needle.len > haystack.len or needle.len == 0) return null; - var i: usize = start_index; - const end = haystack.len - needle.len; - while (i <= end) : (i += 1) { - if (eql(T, haystack[i .. i + needle.len], needle)) return i; + var i: usize = start_index + needle.len - 1; + while (i < haystack.len) { + if (lastIndexOfScalar(T, needle, haystack[i])) |index| { + const haystackIndex = i - index; + if (haystackIndex + needle.len <= haystack.len and mem.eql(T, haystack[haystackIndex .. haystackIndex + needle.len], needle)) return haystackIndex; + } + i += needle.len; } + return null; } From e297b4815ca6a30c6d20fc8dc8aa186faa1384b7 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 11:24:05 +0200 Subject: [PATCH 02/12] Create skipping table Also fallback to naive implementation if haystack is small or if the needle is small or if sizeof type is not 1. --- lib/std/mem.zig | 72 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 17 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 36d83b3e40..681a7f9fc8 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -850,20 +850,56 @@ pub fn indexOfAnyPos(comptime T: type, slice: []const T, start_index: usize, val pub fn indexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { return indexOfPos(T, haystack, 0, needle); } + +/// Find the index in a slice of a sub-slice, searching from the end backwards. +/// To start looking at a different index, slice the haystack first. +fn lastIndexOfNaive(comptime T: type, haystack: []const T, needle: []const T) ?usize { + if (needle.len > haystack.len) return null; + + var i: usize = haystack.len - needle.len; + while (true) : (i -= 1) { + if (mem.eql(T, haystack[i .. i + needle.len], needle)) return i; + if (i == 0) return null; + } +} + +fn indexOfPosNaive(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { + if (needle.len > haystack.len) return null; + + var i: usize = start_index; + const end = haystack.len - needle.len; + while (i <= end) : (i += 1) { + if (eql(T, haystack[i .. i + needle.len], needle)) return i; + } + return null; +} + +fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: []usize) void { + for (table) |*c| { + c.* = pattern.len; + } + + var i: usize = 0; + while (i < pattern.len - 1) : (i += 1) { + table[pattern[i]] = pattern.len - 1 - i; + } +} /// Find the index in a slice of a sub-slice, searching from the end backwards. /// To start looking at a different index, slice the haystack first. // Reverse boyer-moore-horspool algorithm pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { - if (needle.len > haystack.len or needle.len == 0) return null; + if (T != u8 or haystack.len < 32 or needle.len <= 2) + return lastIndexOfNaive(T, haystack, needle); - var i: usize = needle.len - 1; - while (i < haystack.len) { - const reverseIndex = haystack.len - i - 1; - if (indexOfScalar(T, needle, haystack[reverseIndex])) |index| { - const haystackIndex = reverseIndex - index; - if (haystackIndex + needle.len <= haystack.len and mem.eql(T, haystack[haystackIndex .. haystackIndex + needle.len], needle)) return haystackIndex; - } - i += needle.len; + if (needle.len > haystack.len or needle.len == 0) return null; + var table: [256]usize = undefined; + boyerMooreHorspoolPreprocess(needle, table[0..]); + + var i: usize = 0; + while (i <= haystack.len - needle.len) { + const reverseIndex = haystack.len - i - needle.len - 1; + if (mem.eql(T, haystack[reverseIndex .. reverseIndex + needle.len], needle)) return i; + i += table[haystack[reverseIndex + needle.len - 1]]; } return null; @@ -871,15 +907,17 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us // Boyer-moore-horspool algorithm pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { - if (needle.len > haystack.len or needle.len == 0) return null; + if (T != u8 or haystack.len < 32 or needle.len <= 2) + return indexOfPosNaive(T, haystack, start_index, needle); - var i: usize = start_index + needle.len - 1; - while (i < haystack.len) { - if (lastIndexOfScalar(T, needle, haystack[i])) |index| { - const haystackIndex = i - index; - if (haystackIndex + needle.len <= haystack.len and mem.eql(T, haystack[haystackIndex .. haystackIndex + needle.len], needle)) return haystackIndex; - } - i += needle.len; + if (needle.len > haystack.len or needle.len == 0) return null; + var table: [256]usize = undefined; + boyerMooreHorspoolPreprocess(needle, table[0..]); + + var i: usize = start_index; + while (i <= haystack.len - needle.len) { + if (mem.eql(T, haystack[i .. i + needle.len], needle)) return i; + i += table[haystack[i + needle.len - 1]]; } return null; From d012507a8fc911b662085a70bee2d580a404bb81 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 11:46:41 +0200 Subject: [PATCH 03/12] Use boyer-moore-horspool with all types of T --- lib/std/mem.zig | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 681a7f9fc8..cc78de0563 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -888,18 +888,22 @@ fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: []usize) void { /// To start looking at a different index, slice the haystack first. // Reverse boyer-moore-horspool algorithm pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { - if (T != u8 or haystack.len < 32 or needle.len <= 2) + if (haystack.len < 32 or needle.len <= 2) return lastIndexOfNaive(T, haystack, needle); if (needle.len > haystack.len or needle.len == 0) return null; + + const haystackU8 = @bitCast([]const u8, haystack); + const needleU8 = @bitCast([]const u8, needle); + var table: [256]usize = undefined; - boyerMooreHorspoolPreprocess(needle, table[0..]); + boyerMooreHorspoolPreprocess(needleU8, table[0..]); var i: usize = 0; - while (i <= haystack.len - needle.len) { - const reverseIndex = haystack.len - i - needle.len - 1; - if (mem.eql(T, haystack[reverseIndex .. reverseIndex + needle.len], needle)) return i; - i += table[haystack[reverseIndex + needle.len - 1]]; + while (i <= haystackU8.len - needleU8.len) { + const reverseIndex = haystackU8.len - i - needleU8.len - 1; + if (mem.eql(u8, haystackU8[reverseIndex .. reverseIndex + needleU8.len], needleU8)) return i; + i += table[haystackU8[reverseIndex + needleU8.len - 1]]; } return null; @@ -907,17 +911,21 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us // Boyer-moore-horspool algorithm pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { - if (T != u8 or haystack.len < 32 or needle.len <= 2) + if (haystack.len < 32 or needle.len <= 2) return indexOfPosNaive(T, haystack, start_index, needle); if (needle.len > haystack.len or needle.len == 0) return null; + + const haystackU8 = @bitCast([]const u8, haystack); + const needleU8 = @bitCast([]const u8, needle); + var table: [256]usize = undefined; - boyerMooreHorspoolPreprocess(needle, table[0..]); + boyerMooreHorspoolPreprocess(needleU8, table[0..]); var i: usize = start_index; - while (i <= haystack.len - needle.len) { - if (mem.eql(T, haystack[i .. i + needle.len], needle)) return i; - i += table[haystack[i + needle.len - 1]]; + while (i <= haystackU8.len - needleU8.len) { + if (mem.eql(u8, haystackU8[i .. i + needleU8.len], needleU8)) return i; + i += table[haystackU8[i + needleU8.len - 1]]; } return null; From f65f3d24f8c6fcdc54da3a904999405508f0e706 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 12:57:05 +0200 Subject: [PATCH 04/12] Only use boyer-moore-horsepool for types that are power of 2 --- lib/std/mem.zig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index cc78de0563..30e3496dea 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -888,13 +888,13 @@ fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: []usize) void { /// To start looking at a different index, slice the haystack first. // Reverse boyer-moore-horspool algorithm pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { - if (haystack.len < 32 or needle.len <= 2) + if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) return lastIndexOfNaive(T, haystack, needle); if (needle.len > haystack.len or needle.len == 0) return null; - const haystackU8 = @bitCast([]const u8, haystack); - const needleU8 = @bitCast([]const u8, needle); + const haystackU8 = sliceAsBytes(haystack); + const needleU8 = sliceAsBytes(needle); var table: [256]usize = undefined; boyerMooreHorspoolPreprocess(needleU8, table[0..]); @@ -911,13 +911,13 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us // Boyer-moore-horspool algorithm pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { - if (haystack.len < 32 or needle.len <= 2) + if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) return indexOfPosNaive(T, haystack, start_index, needle); if (needle.len > haystack.len or needle.len == 0) return null; - const haystackU8 = @bitCast([]const u8, haystack); - const needleU8 = @bitCast([]const u8, needle); + const haystackU8 = sliceAsBytes(haystack); + const needleU8 = sliceAsBytes(needle); var table: [256]usize = undefined; boyerMooreHorspoolPreprocess(needleU8, table[0..]); From a394a6300cb79ced98d445917c8c73937107e4d1 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 14:17:38 +0200 Subject: [PATCH 05/12] Fix lastIndexOf and add tests that do not fallback to linear search Use sliceAsBytes to convert []const T to []const u8. Cleanup. --- lib/std/mem.zig | 69 +++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 30e3496dea..dc06ed8b61 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -853,9 +853,7 @@ pub fn indexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize /// Find the index in a slice of a sub-slice, searching from the end backwards. /// To start looking at a different index, slice the haystack first. -fn lastIndexOfNaive(comptime T: type, haystack: []const T, needle: []const T) ?usize { - if (needle.len > haystack.len) return null; - +fn lastIndexOfLinear(comptime T: type, haystack: []const T, needle: []const T) ?usize { var i: usize = haystack.len - needle.len; while (true) : (i -= 1) { if (mem.eql(T, haystack[i .. i + needle.len], needle)) return i; @@ -863,9 +861,7 @@ fn lastIndexOfNaive(comptime T: type, haystack: []const T, needle: []const T) ?u } } -fn indexOfPosNaive(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { - if (needle.len > haystack.len) return null; - +fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { var i: usize = start_index; const end = haystack.len - needle.len; while (i <= end) : (i += 1) { @@ -874,6 +870,17 @@ fn indexOfPosNaive(comptime T: type, haystack: []const T, start_index: usize, ne return null; } +fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: []usize) void { + for (table) |*c| { + c.* = pattern.len; + } + + var i: usize = pattern.len - 1; + while (i > 0) : (i -= 1) { + table[pattern[i]] = i; + } +} + fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: []usize) void { for (table) |*c| { c.* = pattern.len; @@ -888,22 +895,23 @@ fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: []usize) void { /// To start looking at a different index, slice the haystack first. // Reverse boyer-moore-horspool algorithm pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { - if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) - return lastIndexOfNaive(T, haystack, needle); - if (needle.len > haystack.len or needle.len == 0) return null; - const haystackU8 = sliceAsBytes(haystack); - const needleU8 = sliceAsBytes(needle); + if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) + return lastIndexOfLinear(T, haystack, needle); - var table: [256]usize = undefined; - boyerMooreHorspoolPreprocess(needleU8, table[0..]); + const haystack_bytes = sliceAsBytes(haystack); + const needle_bytes = sliceAsBytes(needle); - var i: usize = 0; - while (i <= haystackU8.len - needleU8.len) { - const reverseIndex = haystackU8.len - i - needleU8.len - 1; - if (mem.eql(u8, haystackU8[reverseIndex .. reverseIndex + needleU8.len], needleU8)) return i; - i += table[haystackU8[reverseIndex + needleU8.len - 1]]; + var skip_table: [256]usize = undefined; + boyerMooreHorspoolPreprocessReverse(needle_bytes, skip_table[0..]); + + var i: usize = haystack_bytes.len - needle_bytes.len; + while (true) { + if (mem.eql(u8, haystack_bytes[i .. i + needle_bytes.len], needle_bytes)) return i; + const skip = skip_table[haystack_bytes[i]]; + if (skip > i) break; + i -= skip; } return null; @@ -911,27 +919,32 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us // Boyer-moore-horspool algorithm pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { - if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) - return indexOfPosNaive(T, haystack, start_index, needle); - if (needle.len > haystack.len or needle.len == 0) return null; - const haystackU8 = sliceAsBytes(haystack); - const needleU8 = sliceAsBytes(needle); + if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) + return indexOfPosLinear(T, haystack, start_index, needle); - var table: [256]usize = undefined; - boyerMooreHorspoolPreprocess(needleU8, table[0..]); + const haystack_bytes = sliceAsBytes(haystack); + const needle_bytes = sliceAsBytes(needle); + + var skip_table: [256]usize = undefined; + boyerMooreHorspoolPreprocess(needle_bytes, skip_table[0..]); var i: usize = start_index; - while (i <= haystackU8.len - needleU8.len) { - if (mem.eql(u8, haystackU8[i .. i + needleU8.len], needleU8)) return i; - i += table[haystackU8[i + needleU8.len - 1]]; + while (i <= haystack_bytes.len - needle_bytes.len) { + if (mem.eql(u8, haystack_bytes[i .. i + needle_bytes.len], needle_bytes)) return i; + i += skip_table[haystack_bytes[i + needle_bytes.len - 1]]; } return null; } test "mem.indexOf" { + testing.expect(indexOf(u8, "one two three four five six seven eight nine ten", "three four").? == 8); + testing.expect(lastIndexOf(u8, "one two three four five six seven eight nine ten", "three four").? == 8); + testing.expect(indexOf(u8, "one two three four five six seven eight nine ten", "two two") == null); + testing.expect(lastIndexOf(u8, "one two three four five six seven eight nine ten", "two two") == null); + testing.expect(indexOf(u8, "one two three four", "four").? == 14); testing.expect(lastIndexOf(u8, "one two three two four", "two").? == 14); testing.expect(indexOf(u8, "one two three four", "gour") == null); From 997451da03f25de310fc9f19393b19fbb5ac0062 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 14:25:44 +0200 Subject: [PATCH 06/12] Make preprocess function take comptime sized slice --- lib/std/mem.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index dc06ed8b61..5833bf9670 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -870,7 +870,7 @@ fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usize, n return null; } -fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: []usize) void { +fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: *[256]usize) void { for (table) |*c| { c.* = pattern.len; } @@ -881,7 +881,7 @@ fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: []usize) void } } -fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: []usize) void { +fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: *[256]usize) void { for (table) |*c| { c.* = pattern.len; } From db51821a97bfa18ad897713087e3e59b14ae4b21 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 14:52:16 +0200 Subject: [PATCH 07/12] Remove type size check, looks like its not needed Add check if the type is float. Float byte comparison doesn't work because +0.0 and -0.0 are considered equal but their byte representations are not equal. --- lib/std/mem.zig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 5833bf9670..cd1000d5e3 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -897,7 +897,9 @@ fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: *[256]usize) void { pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { if (needle.len > haystack.len or needle.len == 0) return null; - if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) + // byte comparison with float doesn't work because +0.0 and -0.0 and considered + // equal but their byte representations are not equal. + if (@typeInfo(T) == .Float or haystack.len < 32 or needle.len <= 2) return lastIndexOfLinear(T, haystack, needle); const haystack_bytes = sliceAsBytes(haystack); @@ -921,7 +923,9 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { if (needle.len > haystack.len or needle.len == 0) return null; - if (!isValidAlign(T.bit_count) or haystack.len < 32 or needle.len <= 2) + // byte comparison with float doesn't work because +0.0 and -0.0 and considered + // equal but their byte representations are not equal. + if (@typeInfo(T) == .Float or haystack.len < 32 or needle.len <= 2) return indexOfPosLinear(T, haystack, start_index, needle); const haystack_bytes = sliceAsBytes(haystack); From 3a6e6bd538858d2aa271b8975a6e1f742fec144f Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 15:20:48 +0200 Subject: [PATCH 08/12] Check if the type has unique bit representation to see if it can be compared byte-wise --- lib/std/mem.zig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index cd1000d5e3..0939bebe98 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -897,9 +897,7 @@ fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: *[256]usize) void { pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { if (needle.len > haystack.len or needle.len == 0) return null; - // byte comparison with float doesn't work because +0.0 and -0.0 and considered - // equal but their byte representations are not equal. - if (@typeInfo(T) == .Float or haystack.len < 32 or needle.len <= 2) + if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 32 or needle.len <= 2) return lastIndexOfLinear(T, haystack, needle); const haystack_bytes = sliceAsBytes(haystack); @@ -923,9 +921,7 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { if (needle.len > haystack.len or needle.len == 0) return null; - // byte comparison with float doesn't work because +0.0 and -0.0 and considered - // equal but their byte representations are not equal. - if (@typeInfo(T) == .Float or haystack.len < 32 or needle.len <= 2) + if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 32 or needle.len <= 2) return indexOfPosLinear(T, haystack, start_index, needle); const haystack_bytes = sliceAsBytes(haystack); From f93498d2d87ff03ffac95c814cc46b6994416b55 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 15:31:22 +0200 Subject: [PATCH 09/12] Fix start index if @sizeOf(T) > 1 --- lib/std/mem.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 0939bebe98..b6b50bc32d 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -930,7 +930,7 @@ pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, nee var skip_table: [256]usize = undefined; boyerMooreHorspoolPreprocess(needle_bytes, skip_table[0..]); - var i: usize = start_index; + var i: usize = start_index * @sizeOf(T); while (i <= haystack_bytes.len - needle_bytes.len) { if (mem.eql(u8, haystack_bytes[i .. i + needle_bytes.len], needle_bytes)) return i; i += skip_table[haystack_bytes[i + needle_bytes.len - 1]]; From 0a016e8fc29467e48cfc04b1ee829bbd254b2d6d Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 16:55:32 +0200 Subject: [PATCH 10/12] Fix indexOf and lastIndexOf with empty needle --- lib/std/mem.zig | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index b6b50bc32d..846cca44b3 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -895,7 +895,8 @@ fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: *[256]usize) void { /// To start looking at a different index, slice the haystack first. // Reverse boyer-moore-horspool algorithm pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize { - if (needle.len > haystack.len or needle.len == 0) return null; + if (needle.len > haystack.len) return null; + if (needle.len == 0) return haystack.len; if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 32 or needle.len <= 2) return lastIndexOfLinear(T, haystack, needle); @@ -919,7 +920,8 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us // Boyer-moore-horspool algorithm pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { - if (needle.len > haystack.len or needle.len == 0) return null; + if (needle.len > haystack.len) return null; + if (needle.len == 0) return 0; if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 32 or needle.len <= 2) return indexOfPosLinear(T, haystack, start_index, needle); @@ -945,6 +947,9 @@ test "mem.indexOf" { testing.expect(indexOf(u8, "one two three four five six seven eight nine ten", "two two") == null); testing.expect(lastIndexOf(u8, "one two three four five six seven eight nine ten", "two two") == null); + testing.expect(indexOf(u8, "one two three four five six seven eight nine ten", "").? == 0); + testing.expect(lastIndexOf(u8, "one two three four five six seven eight nine ten", "").? == 48); + testing.expect(indexOf(u8, "one two three four", "four").? == 14); testing.expect(lastIndexOf(u8, "one two three two four", "two").? == 14); testing.expect(indexOf(u8, "one two three four", "gour") == null); From 8af1f8ba1ad43a750e0040ac845b331241ead329 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 5 Sep 2020 21:15:44 +0200 Subject: [PATCH 11/12] Add comment to clearify why the first/last element in preprocess is skipped --- lib/std/mem.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 846cca44b3..33d021eded 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -876,6 +876,8 @@ fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: *[256]usize) } var i: usize = pattern.len - 1; + // The first item is intentionally ignored and the skip size will be pattern.len. + // This is the standard way boyer-moore-horspool is implemented. while (i > 0) : (i -= 1) { table[pattern[i]] = i; } @@ -887,6 +889,8 @@ fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: *[256]usize) void { } var i: usize = 0; + // The last item is intentionally ignored and the skip size will be pattern.len. + // This is the standard way boyer-moore-horspool is implemented. while (i < pattern.len - 1) : (i += 1) { table[pattern[i]] = pattern.len - 1 - i; } From ff58f09b68de08a5fe33177f1874c677c762c1c0 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 6 Sep 2020 16:23:18 +0200 Subject: [PATCH 12/12] Use better haystack size and needle size for cutoff between linear and BMH --- lib/std/mem.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 33d021eded..a1951943d6 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -927,7 +927,7 @@ pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, nee if (needle.len > haystack.len) return null; if (needle.len == 0) return 0; - if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 32 or needle.len <= 2) + if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 52 or needle.len <= 4) return indexOfPosLinear(T, haystack, start_index, needle); const haystack_bytes = sliceAsBytes(haystack);