提交 431facd2 编写于 作者: chai2010's avatar chai2010

bytes 包更多测试

上级 80bc37c9
// 版权 @2023 凹语言 作者。保留所有权利。
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
// If MaxLen is not 0, make sure MaxLen >= 4.
global bytealg_MaxLen: int
// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
// three of them without causing allocation?
// PrimeRK is the prime base used in Rabin-Karp algorithm.
const bytealg_PrimeRK = 16777619
const bytealg_MaxBruteForce = 0
// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
func bytealg_Index(a, b: []byte) => int {
panic("unimplemented")
}
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
func bytealg_IndexString(a, b: string) => int {
panic("unimplemented")
}
// Cutover reports the number of failures of IndexByte we should tolerate
// before switching over to Index.
// n is the number of bytes processed so far.
// See the bytes.Index implementation for details.
func bytealg_Cutover(n: int) => int {
panic("unimplemented")
}
\ No newline at end of file
// 版权 @2023 凹语言 作者。保留所有权利。
import (
"unicode/utf8"
)
// Equal reports whether a and b
// are the same length and contain the same bytes.
// A nil argument is equivalent to an empty slice.
......@@ -24,8 +28,96 @@ func EqualFold(s, t: []byte) => bool {
return true
}
func Index(d: []byte, x: []byte) => int {
return 0
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
func Index(s, sep: []byte) => int {
n := len(sep)
switch {
case n == 0:
return 0
case n == 1:
return IndexByte(s, sep[0])
case n == len(s):
if Equal(sep, s) {
return 0
}
return -1
case n > len(s):
return -1
case n <= bytealg_MaxLen:
// Use brute force when s and sep both are small
//if len(s) <= bytealg.MaxBruteForce {
// return bytealg.Index(s, sep)
//}
c0 := sep[0]
c1 := sep[1]
i := 0
t := len(s) - n + 1
fails := 0
for i < t {
if s[i] != c0 {
// IndexByte is faster than bytealg.Index, so use it as long as
// we're not getting lots of false positives.
o := IndexByte(s[i+1:t], c0)
if o < 0 {
return -1
}
i += o + 1
}
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
}
fails++
i++
// Switch to bytealg.Index when IndexByte produces too many false positives.
//if fails > bytealg.Cutover(i) {
// r := bytealg.Index(s[i:], sep)
// if r >= 0 {
// return r + i
// }
// return -1
//}
}
return -1
}
c0 := sep[0]
c1 := sep[1]
i := 0
fails := 0
t := len(s) - n + 1
for i < t {
if s[i] != c0 {
o := IndexByte(s[i+1:t], c0)
if o < 0 {
break
}
i += o + 1
}
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
}
i++
fails++
if fails >= 4+i>>4 && i < t {
// Give up on IndexByte, it isn't skipping ahead
// far enough to be better than Rabin-Karp.
// Experiments (using IndexPeriodic) suggest
// the cutover is about 16 byte skips.
// TODO: if large prefixes of sep are matching
// we should cutover at even larger average skips,
// because Equal becomes that much more expensive.
// This code does not take that effect into account.
//j := bytealg.IndexRabinKarpBytes(s[i:], sep)
//if j < 0 {
// return -1
//}
//return i + j
return -1
}
}
return -1
}
func IndexByte(b: []byte, c: byte) => int {
......@@ -132,3 +224,40 @@ func Compare(a, b: []byte) => int {
}
return 0
}
// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
func LastIndexByte(s: []byte, c: byte) => int {
for i := len(s) - 1; i >= 0; i-- {
if s[i] == c {
return i
}
}
return -1
}
// IndexRune interprets s as a sequence of UTF-8-encoded code points.
// It returns the byte index of the first occurrence in s of the given rune.
// It returns -1 if rune is not present in s.
// If r is utf8.RuneError, it returns the first instance of any
// invalid UTF-8 byte sequence.
func IndexRune(s: []byte, r: rune) => int {
switch {
case 0 <= r && r < utf8.RuneSelf:
return IndexByte(s, byte(r))
case r == utf8.RuneError:
for i := 0; i < len(s); {
r1, n := utf8.DecodeRune(s[i:])
if r1 == utf8.RuneError {
return i
}
i += n
}
return -1
case !utf8.ValidRune(r):
return -1
default:
b: [utf8.UTFMax]byte
n := utf8.EncodeRune(b[:], r)
return Index(s, b[:n])
}
}
// 版权 @2023 凹语言 作者。保留所有权利。
import (
"unicode/utf8"
)
type BinOpTest struct {
a: string
b: string
i: int
}
func TestEqual {
for _, tt := range compareTests {
eql := Equal(tt.a, tt.b)
......@@ -10,6 +20,307 @@ func TestEqual {
}
}
func TestEqualExhaustive {
size := 128
//if testing.Short() {
size = 32
//}
a := make([]byte, size)
b := make([]byte, size)
b_init := make([]byte, size)
// randomish but deterministic data
for i := 0; i < size; i++ {
a[i] = byte(17 * i)
b_init[i] = byte(23*i + 100)
}
for len := 0; len <= size; len++ {
for x := 0; x <= size-len; x++ {
for y := 0; y <= size-len; y++ {
copy(b, b_init)
copy(b[y:y+len], a[x:x+len])
if !Equal(a[x:x+len], b[y:y+len]) || !Equal(b[y:y+len], a[x:x+len]) {
assert(false)
//t.Errorf("Equal(%d, %d, %d) = false", len, x, y)
}
}
}
}
}
// make sure Equal returns false for minimally different strings. The data
// is all zeros except for a single one in one location.
func TestNotEqual {
size := 128
//if testing.Short() {
size = 32
//}
a := make([]byte, size)
b := make([]byte, size)
for len := 0; len <= size; len++ {
for x := 0; x <= size-len; x++ {
for y := 0; y <= size-len; y++ {
for diffpos := x; diffpos < x+len; diffpos++ {
a[diffpos] = 1
if Equal(a[x:x+len], b[y:y+len]) || Equal(b[y:y+len], a[x:x+len]) {
assert(false)
//t.Errorf("NotEqual(%d, %d, %d, %d) = true", len, x, y, diffpos)
}
a[diffpos] = 0
}
}
}
}
}
var indexTests = []BinOpTest{
{"", "", 0},
{"", "a", -1},
{"", "foo", -1},
{"fo", "foo", -1},
{"foo", "baz", -1},
{"foo", "foo", 0},
{"oofofoofooo", "f", 2},
{"oofofoofooo", "foo", 4},
{"barfoobarfoo", "foo", 3},
{"foo", "", 0},
{"foo", "o", 1},
{"abcABCabc", "A", 3},
// cases with one byte strings - test IndexByte and special case in Index()
{"", "a", -1},
{"x", "a", -1},
{"x", "x", 0},
{"abc", "a", 0},
{"abc", "b", 1},
{"abc", "c", 2},
{"abc", "x", -1},
{"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33},
{"foofyfoobarfoobar", "y", 4},
{"oooooooooooooooooooooo", "r", -1},
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
// test fallback to Rabin-Karp.
{"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
}
func TestIndex {
// todo
}
func TestLastIndex {
// todo
}
func TestIndexAny {
// todo
}
func TestLastIndexAny {
// todo
}
func TestIndexByte{
for _, tt := range indexTests {
if len(tt.b) != 1 {
continue
}
a := []byte(tt.a)
b := tt.b[0]
pos := IndexByte(a, b)
if pos != tt.i {
assert(false)
// t.Errorf(`IndexByte(%q, '%c') = %v`, tt.a, b, pos)
}
posp := indexBytePortable(a, b)
if posp != tt.i {
assert(false)
//t.Errorf(`indexBytePortable(%q, '%c') = %v`, tt.a, b, posp)
}
}
}
func indexBytePortable(s: []byte, c: byte) => int {
for i, b := range s {
if b == c {
return i
}
}
return -1
}
func TestLastIndexByte {
testCases := []BinOpTest{
{"", "q", -1},
{"abcdef", "q", -1},
{"abcdefabcdef", "a", len("abcdef")}, // something in the middle
{"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
{"zabcdefabcdef", "z", 0}, // first byte
{"a☺b☻c☹d", "b", len("a☺")}, // non-ascii
}
for _, test := range testCases {
actual := LastIndexByte([]byte(test.a), test.b[0])
if actual != test.i {
assert(false)
//t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.a, test.b[0], actual, test.i)
}
}
}
// test a larger buffer with different sizes and alignments
func TestIndexByteBig {
n := 1024
//if testing.Short() {
n = 128
//}
b := make([]byte, n)
for i := 0; i < n; i++ {
// different start alignments
b1 := b[i:]
for j := 0; j < len(b1); j++ {
b1[j] = 'x'
pos := IndexByte(b1, 'x')
if pos != j {
assert(false)
//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
}
b1[j] = 0
pos = IndexByte(b1, 'x')
if pos != -1 {
assert(false)
//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
}
}
// different end alignments
b1 = b[:i]
for j := 0; j < len(b1); j++ {
b1[j] = 'x'
pos := IndexByte(b1, 'x')
if pos != j {
assert(false)
//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
}
b1[j] = 0
pos = IndexByte(b1, 'x')
if pos != -1 {
assert(false)
//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
}
}
// different start and end alignments
b1 = b[i/2 : n-(i+1)/2]
for j := 0; j < len(b1); j++ {
b1[j] = 'x'
pos := IndexByte(b1, 'x')
if pos != j {
assert(false)
//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
}
b1[j] = 0
pos = IndexByte(b1, 'x')
if pos != -1 {
assert(false)
//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
}
}
}
}
// test a small index across all page offsets
func TestIndexByteSmall {
b := make([]byte, 5015) // bigger than a page
// Make sure we find the correct byte even when straddling a page.
for i := 0; i <= len(b)-15; i++ {
for j := 0; j < 15; j++ {
b[i+j] = byte(100 + j)
}
for j := 0; j < 15; j++ {
p := IndexByte(b[i:i+15], byte(100+j))
if p != j {
assert(false)
//t.Errorf("IndexByte(%q, %d) = %d", b[i:i+15], 100+j, p)
}
}
for j := 0; j < 15; j++ {
b[i+j] = 0
}
}
// Make sure matches outside the slice never trigger.
for i := 0; i <= len(b)-15; i++ {
for j := 0; j < 15; j++ {
b[i+j] = 1
}
for j := 0; j < 15; j++ {
p := IndexByte(b[i:i+15], byte(0))
if p != -1 {
assert(false)
//t.Errorf("IndexByte(%q, %d) = %d", b[i:i+15], 0, p)
}
}
for j := 0; j < 15; j++ {
b[i+j] = 0
}
}
}
func TestIndexRune {
if true {
//x := IndexRune([]byte("foo"), 'o')
//if x != 1 {
// println(x)
// assert(false)
//}
}
tests := []struct {
in: string
rune: rune
want: int
}{
{"", 'a', -1},
{"", '☺', -1},
{"foo", '☹', -1},
{"foo", 'o', 1},
{"foo☺bar", '☺', 3},
//{"foo☺☻☹bar", '☹', 9},
{"a A x", 'A', 2},
{"some_text=some_value", '=', 9},
{"☺a", 'a', 3},
{"a☻☺b", '☺', 4},
// RuneError should match any invalid UTF-8 byte sequence.
{"�", '�', 0},
//{"\xff", '�', 0},
{"☻x�", '�', len("☻x")},
{"☻x\xe2\x98", '�', len("☻x")},
{"☻x\xe2\x98�", '�', len("☻x")},
{"☻x\xe2\x98x", '�', len("☻x")},
// Invalid rune values should never match.
{"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
{"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
{"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
}
for i, tt := range tests {
if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want {
println(i, tt.in, tt.rune, got, tt.want)
assert(false)
//t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
}
}
haystack := []byte("test世界")
{
if i := IndexRune(haystack, 's'); i != 2 {
assert(false)
//t.Fatalf("'s' at %d; want 2", i)
}
if i := IndexRune(haystack, '世'); i != 4 {
assert(false)
//t.Fatalf("'世' at %d; want 4", i)
}
}
}
func TestEqualFold {
for _, tt := range EqualFoldTests {
if out := EqualFold([]byte(tt.s), []byte(tt.t)); out != tt.out {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册