bytes 包更多测试

431facd2 · chai2010 · 80bc37c9 · 431facd2 · 431facd2 · 431facd2
Showing with 477 addition and 2 deletion

waroot/src/bytes/bytealg.wa waroot/src/bytes/bytealg.wa +35 -0

waroot/src/bytes/bytes.wa waroot/src/bytes/bytes.wa +131 -2

waroot/src/bytes/bytes_test.wa waroot/src/bytes/bytes_test.wa +311 -0

未找到文件。
--- a/waroot/src/bytes/bytealg.wa
+++ b/waroot/src/bytes/bytealg.wa
+// 版权 @2023 凹语言 作者。保留所有权利。
+
+
+// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
+// If MaxLen is not 0, make sure MaxLen >= 4.
+global bytealg_MaxLen: int
+
+// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
+// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
+// three of them without causing allocation?
+
+// PrimeRK is the prime base used in Rabin-Karp algorithm.
+const bytealg_PrimeRK = 16777619
+
+const bytealg_MaxBruteForce = 0
+
+// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
+// Requires 2 <= len(b) <= MaxLen.
+func bytealg_Index(a, b: []byte) => int {
+	panic("unimplemented")
+}
+
+// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
+// Requires 2 <= len(b) <= MaxLen.
+func bytealg_IndexString(a, b: string) => int {
+	panic("unimplemented")
+}
+
+// Cutover reports the number of failures of IndexByte we should tolerate
+// before switching over to Index.
+// n is the number of bytes processed so far.
+// See the bytes.Index implementation for details.
+func bytealg_Cutover(n: int) => int {
+	panic("unimplemented")
+}
\ No newline at end of file
--- a/waroot/src/bytes/bytes.wa
+++ b/waroot/src/bytes/bytes.wa
 // 版权 @2023 凹语言 作者。保留所有权利。

+import (
+	"unicode/utf8"
+)
+
 // Equal reports whether a and b
 // are the same length and contain the same bytes.
 // A nil argument is equivalent to an empty slice.
@@ -24,8 +28,96 @@ func EqualFold(s, t: []byte) => bool {
 	return true
 }

-func Index(d: []byte, x: []byte) => int {
-	return 0
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep: []byte) => int {
+	n := len(sep)
+	switch {
+	case n == 0:
+		return 0
+	case n == 1:
+		return IndexByte(s, sep[0])
+	case n == len(s):
+		if Equal(sep, s) {
+			return 0
+		}
+		return -1
+	case n > len(s):
+		return -1
+	case n <= bytealg_MaxLen:
+		// Use brute force when s and sep both are small
+		//if len(s) <= bytealg.MaxBruteForce {
+		//	return bytealg.Index(s, sep)
+		//}
+		c0 := sep[0]
+		c1 := sep[1]
+		i := 0
+		t := len(s) - n + 1
+		fails := 0
+		for i < t {
+			if s[i] != c0 {
+				// IndexByte is faster than bytealg.Index, so use it as long as
+				// we're not getting lots of false positives.
+				o := IndexByte(s[i+1:t], c0)
+				if o < 0 {
+					return -1
+				}
+				i += o + 1
+			}
+			if s[i+1] == c1 && Equal(s[i:i+n], sep) {
+				return i
+			}
+			fails++
+			i++
+			// Switch to bytealg.Index when IndexByte produces too many false positives.
+			//if fails > bytealg.Cutover(i) {
+			//	r := bytealg.Index(s[i:], sep)
+			//	if r >= 0 {
+			//		return r + i
+			//	}
+			//	return -1
+			//}
+		}
+		return -1
+	}
+	c0 := sep[0]
+	c1 := sep[1]
+	i := 0
+	fails := 0
+	t := len(s) - n + 1
+	for i < t {
+		if s[i] != c0 {
+			o := IndexByte(s[i+1:t], c0)
+			if o < 0 {
+				break
+			}
+			i += o + 1
+		}
+		if s[i+1] == c1 && Equal(s[i:i+n], sep) {
+			return i
+		}
+		i++
+		fails++
+		if fails >= 4+i>>4 && i < t {
+			// Give up on IndexByte, it isn't skipping ahead
+			// far enough to be better than Rabin-Karp.
+			// Experiments (using IndexPeriodic) suggest
+			// the cutover is about 16 byte skips.
+			// TODO: if large prefixes of sep are matching
+			// we should cutover at even larger average skips,
+			// because Equal becomes that much more expensive.
+			// This code does not take that effect into account.
+
+			//j := bytealg.IndexRabinKarpBytes(s[i:], sep)
+			//if j < 0 {
+			//	return -1
+			//}
+			//return i + j
+
+			return -1
+		}
+	}
+	return -1
 }

 func IndexByte(b: []byte, c: byte) => int {
@@ -132,3 +224,40 @@ func Compare(a, b: []byte) => int {
 	}
 	return 0
 }
+
+// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
+func LastIndexByte(s: []byte, c: byte) => int {
+	for i := len(s) - 1; i >= 0; i-- {
+		if s[i] == c {
+			return i
+		}
+	}
+	return -1
+}
+
+// IndexRune interprets s as a sequence of UTF-8-encoded code points.
+// It returns the byte index of the first occurrence in s of the given rune.
+// It returns -1 if rune is not present in s.
+// If r is utf8.RuneError, it returns the first instance of any
+// invalid UTF-8 byte sequence.
+func IndexRune(s: []byte, r: rune) => int {
+	switch {
+	case 0 <= r && r < utf8.RuneSelf:
+		return IndexByte(s, byte(r))
+	case r == utf8.RuneError:
+		for i := 0; i < len(s); {
+			r1, n := utf8.DecodeRune(s[i:])
+			if r1 == utf8.RuneError {
+				return i
+			}
+			i += n
+		}
+		return -1
+	case !utf8.ValidRune(r):
+		return -1
+	default:
+		b: [utf8.UTFMax]byte
+		n := utf8.EncodeRune(b[:], r)
+		return Index(s, b[:n])
+	}
+}
--- a/waroot/src/bytes/bytes_test.wa
+++ b/waroot/src/bytes/bytes_test.wa
 // 版权 @2023 凹语言 作者。保留所有权利。

+import (
+	"unicode/utf8"
+)
+
+type BinOpTest struct {
+	a: string
+	b: string
+	i: int
+}
+
 func TestEqual {
 	for _, tt := range compareTests {
 		eql := Equal(tt.a, tt.b)
@@ -10,6 +20,307 @@ func TestEqual {
 	}
 }

+func TestEqualExhaustive {
+	size := 128
+	//if testing.Short() {
+	size = 32
+	//}
+	a := make([]byte, size)
+	b := make([]byte, size)
+	b_init := make([]byte, size)
+	// randomish but deterministic data
+	for i := 0; i < size; i++ {
+		a[i] = byte(17 * i)
+		b_init[i] = byte(23*i + 100)
+	}
+
+	for len := 0; len <= size; len++ {
+		for x := 0; x <= size-len; x++ {
+			for y := 0; y <= size-len; y++ {
+				copy(b, b_init)
+				copy(b[y:y+len], a[x:x+len])
+				if !Equal(a[x:x+len], b[y:y+len]) || !Equal(b[y:y+len], a[x:x+len]) {
+					assert(false)
+					//t.Errorf("Equal(%d, %d, %d) = false", len, x, y)
+				}
+			}
+		}
+	}
+}
+
+// make sure Equal returns false for minimally different strings. The data
+// is all zeros except for a single one in one location.
+func TestNotEqual {
+	size := 128
+	//if testing.Short() {
+		size = 32
+	//}
+	a := make([]byte, size)
+	b := make([]byte, size)
+
+	for len := 0; len <= size; len++ {
+		for x := 0; x <= size-len; x++ {
+			for y := 0; y <= size-len; y++ {
+				for diffpos := x; diffpos < x+len; diffpos++ {
+					a[diffpos] = 1
+					if Equal(a[x:x+len], b[y:y+len]) || Equal(b[y:y+len], a[x:x+len]) {
+						assert(false)
+						//t.Errorf("NotEqual(%d, %d, %d, %d) = true", len, x, y, diffpos)
+					}
+					a[diffpos] = 0
+				}
+			}
+		}
+	}
+}
+
+
+var indexTests = []BinOpTest{
+	{"", "", 0},
+	{"", "a", -1},
+	{"", "foo", -1},
+	{"fo", "foo", -1},
+	{"foo", "baz", -1},
+	{"foo", "foo", 0},
+	{"oofofoofooo", "f", 2},
+	{"oofofoofooo", "foo", 4},
+	{"barfoobarfoo", "foo", 3},
+	{"foo", "", 0},
+	{"foo", "o", 1},
+	{"abcABCabc", "A", 3},
+	// cases with one byte strings - test IndexByte and special case in Index()
+	{"", "a", -1},
+	{"x", "a", -1},
+	{"x", "x", 0},
+	{"abc", "a", 0},
+	{"abc", "b", 1},
+	{"abc", "c", 2},
+	{"abc", "x", -1},
+	{"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33},
+	{"foofyfoobarfoobar", "y", 4},
+	{"oooooooooooooooooooooo", "r", -1},
+	{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
+	{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
+	// test fallback to Rabin-Karp.
+	{"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
+}
+
+func TestIndex {
+	// todo
+}
+
+func TestLastIndex {
+	// todo
+}
+func TestIndexAny {
+	// todo
+}
+
+func TestLastIndexAny {
+	// todo
+}
+
+func TestIndexByte{
+	for _, tt := range indexTests {
+		if len(tt.b) != 1 {
+			continue
+		}
+		a := []byte(tt.a)
+		b := tt.b[0]
+		pos := IndexByte(a, b)
+		if pos != tt.i {
+			assert(false)
+			// t.Errorf(`IndexByte(%q, '%c') = %v`, tt.a, b, pos)
+		}
+		posp := indexBytePortable(a, b)
+		if posp != tt.i {
+			assert(false)
+			//t.Errorf(`indexBytePortable(%q, '%c') = %v`, tt.a, b, posp)
+		}
+	}
+}
+
+func indexBytePortable(s: []byte, c: byte) => int {
+	for i, b := range s {
+		if b == c {
+			return i
+		}
+	}
+	return -1
+}
+
+func TestLastIndexByte {
+	testCases := []BinOpTest{
+		{"", "q", -1},
+		{"abcdef", "q", -1},
+		{"abcdefabcdef", "a", len("abcdef")},      // something in the middle
+		{"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
+		{"zabcdefabcdef", "z", 0},                 // first byte
+		{"a☺b☻c☹d", "b", len("a☺")},               // non-ascii
+	}
+	for _, test := range testCases {
+		actual := LastIndexByte([]byte(test.a), test.b[0])
+		if actual != test.i {
+			assert(false)
+			//t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.a, test.b[0], actual, test.i)
+		}
+	}
+}
+
+// test a larger buffer with different sizes and alignments
+func TestIndexByteBig {
+	n := 1024
+	//if testing.Short() {
+	n = 128
+	//}
+	b := make([]byte, n)
+	for i := 0; i < n; i++ {
+		// different start alignments
+		b1 := b[i:]
+		for j := 0; j < len(b1); j++ {
+			b1[j] = 'x'
+			pos := IndexByte(b1, 'x')
+			if pos != j {
+				assert(false)
+				//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
+			}
+			b1[j] = 0
+			pos = IndexByte(b1, 'x')
+			if pos != -1 {
+				assert(false)
+				//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
+			}
+		}
+		// different end alignments
+		b1 = b[:i]
+		for j := 0; j < len(b1); j++ {
+			b1[j] = 'x'
+			pos := IndexByte(b1, 'x')
+			if pos != j {
+				assert(false)
+				//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
+			}
+			b1[j] = 0
+			pos = IndexByte(b1, 'x')
+			if pos != -1 {
+				assert(false)
+				//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
+			}
+		}
+		// different start and end alignments
+		b1 = b[i/2 : n-(i+1)/2]
+		for j := 0; j < len(b1); j++ {
+			b1[j] = 'x'
+			pos := IndexByte(b1, 'x')
+			if pos != j {
+				assert(false)
+				//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
+			}
+			b1[j] = 0
+			pos = IndexByte(b1, 'x')
+			if pos != -1 {
+				assert(false)
+				//t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
+			}
+		}
+	}
+}
+
+// test a small index across all page offsets
+func TestIndexByteSmall {
+	b := make([]byte, 5015) // bigger than a page
+	// Make sure we find the correct byte even when straddling a page.
+	for i := 0; i <= len(b)-15; i++ {
+		for j := 0; j < 15; j++ {
+			b[i+j] = byte(100 + j)
+		}
+		for j := 0; j < 15; j++ {
+			p := IndexByte(b[i:i+15], byte(100+j))
+			if p != j {
+				assert(false)
+				//t.Errorf("IndexByte(%q, %d) = %d", b[i:i+15], 100+j, p)
+			}
+		}
+		for j := 0; j < 15; j++ {
+			b[i+j] = 0
+		}
+	}
+	// Make sure matches outside the slice never trigger.
+	for i := 0; i <= len(b)-15; i++ {
+		for j := 0; j < 15; j++ {
+			b[i+j] = 1
+		}
+		for j := 0; j < 15; j++ {
+			p := IndexByte(b[i:i+15], byte(0))
+			if p != -1 {
+				assert(false)
+				//t.Errorf("IndexByte(%q, %d) = %d", b[i:i+15], 0, p)
+			}
+		}
+		for j := 0; j < 15; j++ {
+			b[i+j] = 0
+		}
+	}
+}
+
+func TestIndexRune {
+	if true {
+		//x := IndexRune([]byte("foo"), 'o')
+		//if x != 1 {
+		//	println(x)
+		//	assert(false)
+		//}
+	}
+	tests := []struct {
+		in:   string
+		rune: rune
+		want: int
+	}{
+		{"", 'a', -1},
+		{"", '☺', -1},
+		{"foo", '☹', -1},
+		{"foo", 'o', 1},
+		{"foo☺bar", '☺', 3},
+		//{"foo☺☻☹bar", '☹', 9},
+		{"a A x", 'A', 2},
+		{"some_text=some_value", '=', 9},
+		{"☺a", 'a', 3},
+		{"a☻☺b", '☺', 4},
+
+		// RuneError should match any invalid UTF-8 byte sequence.
+		{"�", '�', 0},
+		//{"\xff", '�', 0},
+		{"☻x�", '�', len("☻x")},
+		{"☻x\xe2\x98", '�', len("☻x")},
+		{"☻x\xe2\x98�", '�', len("☻x")},
+		{"☻x\xe2\x98x", '�', len("☻x")},
+
+		// Invalid rune values should never match.
+		{"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
+		{"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
+		{"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+	}
+	for i, tt := range tests {
+		if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want {
+			println(i, tt.in, tt.rune, got, tt.want)
+			assert(false)
+			//t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
+		}
+	}
+
+	haystack := []byte("test世界")
+	{
+		if i := IndexRune(haystack, 's'); i != 2 {
+			assert(false)
+			//t.Fatalf("'s' at %d; want 2", i)
+		}
+		if i := IndexRune(haystack, '世'); i != 4 {
+			assert(false)
+			//t.Fatalf("'世' at %d; want 4", i)
+		}
+	}
+}
+
 func TestEqualFold {
 	for _, tt := range EqualFoldTests {
 		if out := EqualFold([]byte(tt.s), []byte(tt.t)); out != tt.out {