补充测试

6557bef6 · chai2010 · 91ecd0c2 · 6557bef6 · 6557bef6 · 6557bef6
9 changed file
--- a/waroot/src/bytes/bytes.wa
+++ b/waroot/src/bytes/bytes.wa
 // 版权 @2023 凹语言 作者。保留所有权利。

 import (
+	"unicode/ctypes"
 	"unicode/utf8"
 )

@@ -28,7 +29,6 @@ func EqualFold(s, t: []byte) => bool {
 	return true
 }

-
 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
 func Index(s, sep: []byte) => int {
 	n := len(sep)
@@ -262,7 +262,6 @@ func IndexRune(s: []byte, r: rune) => int {
 	}
 }

-
 // Count counts the number of non-overlapping instances of sep in s.
 // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
 func Count(s, sep: []byte) => int {
@@ -337,27 +336,6 @@ func genSplit(s, sep: []byte, sepSave, n: int) => [][]byte {

 global asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}

-
-const (
-	unicode_MaxRune         = '\U0010FFFF' // Maximum valid Unicode code point.
-	unicode_ReplacementChar = '\uFFFD'     // Represents invalid code points.
-	unicode_MaxASCII        = '\u007F'     // maximum ASCII value.
-	unicode_MaxLatin1       = '\u00FF'     // maximum Latin-1 value.
-)
-
-// 简化版本, 不支持 unicode 空白
-func unicode_IsSpace(r: rune) => bool {
-	// This property isn't the same as Z; special-case it.
-	if u32(r) <= unicode_MaxLatin1 {
-		switch r {
-		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
-			return true
-		}
-		return false
-	}
-	return false
-}
-
 // Fields interprets s as a sequence of UTF-8-encoded code points.
 // It splits the slice s around each instance of one or more consecutive white space
 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
@@ -379,7 +357,7 @@ func Fields(s: []byte) => [][]byte {

 	if setBits >= utf8.RuneSelf {
 		// Some runes in the input slice are not ASCII.
-		return FieldsFunc(s, unicode_IsSpace)
+		return FieldsFunc(s, ctypes.IsSpace)
 	}

 	// ASCII fast path
@@ -412,7 +390,6 @@ func Fields(s: []byte) => [][]byte {
 	return a
 }

-
 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
 // It splits the slice s at each run of code points c satisfying f(c) and
 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
@@ -513,3 +490,341 @@ func Join(s: [][]byte, sep: []byte) => []byte {
 	}
 	return b
 }
+
+// Map returns a copy of the byte slice s with all its characters modified
+// according to the mapping function. If mapping returns a negative value, the character is
+// dropped from the byte slice with no replacement. The characters in s and the
+// output are interpreted as UTF-8-encoded code points.
+func Map(mapping: func(r: rune) => rune, s: []byte) => []byte {
+	// In the worst case, the slice can grow when mapped, making
+	// things unpleasant. But it's so rare we barge in assuming it's
+	// fine. It could also shrink but that falls out naturally.
+	maxbytes := len(s) // length of b
+	nbytes := 0        // number of bytes encoded in b
+	b := make([]byte, maxbytes)
+	for i := 0; i < len(s); {
+		wid := 1
+		r := rune(s[i])
+		if r >= utf8.RuneSelf {
+			r, wid = utf8.DecodeRune(s[i:])
+		}
+		r = mapping(r)
+		if r >= 0 {
+			rl := utf8.RuneLen(r)
+			if rl < 0 {
+				rl = len(string(utf8.RuneError))
+			}
+			if nbytes+rl > maxbytes {
+				// Grow the buffer.
+				maxbytes = maxbytes*2 + utf8.UTFMax
+				nb := make([]byte, maxbytes)
+				copy(nb, b[0:nbytes])
+				b = nb
+			}
+			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
+		}
+		i += wid
+	}
+	return b[0:nbytes]
+}
+
+// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
+// their upper case.
+func ToUpper(s: []byte) => []byte {
+	isASCII, hasLower := true, false
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c >= utf8.RuneSelf {
+			isASCII = false
+			break
+		}
+		hasLower = hasLower || ('a' <= c && c <= 'z')
+	}
+
+	if isASCII { // optimize for ASCII-only byte slices.
+		if !hasLower {
+			// Just return a copy.
+			return append([]byte(""), s...)
+		}
+		b := make([]byte, len(s))
+		for i := 0; i < len(s); i++ {
+			c := s[i]
+			if 'a' <= c && c <= 'z' {
+				c -= 'a' - 'A'
+			}
+			b[i] = c
+		}
+		return b
+	}
+	return Map(ctypes.ToUpper, s)
+}
+
+// ToLower returns a copy of the byte slice s with all Unicode letters mapped to
+// their lower case.
+func ToLower(s: []byte) => []byte {
+	isASCII, hasUpper := true, false
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c >= utf8.RuneSelf {
+			isASCII = false
+			break
+		}
+		hasUpper = hasUpper || ('A' <= c && c <= 'Z')
+	}
+
+	if isASCII { // optimize for ASCII-only byte slices.
+		if !hasUpper {
+			return append([]byte(""), s...)
+		}
+		b := make([]byte, len(s))
+		for i := 0; i < len(s); i++ {
+			c := s[i]
+			if 'A' <= c && c <= 'Z' {
+				c += 'a' - 'A'
+			}
+			b[i] = c
+		}
+		return b
+	}
+	return Map(ctypes.ToLower, s)
+}
+
+// ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes
+// representing invalid UTF-8 replaced with the bytes in replacement, which may be empty.
+func ToValidUTF8(s, replacement: []byte) => []byte {
+	b := make([]byte, 0, len(s)+len(replacement))
+	invalid := false // previous byte was from an invalid UTF-8 sequence
+	for i := 0; i < len(s); {
+		c := s[i]
+		if c < utf8.RuneSelf {
+			i++
+			invalid = false
+			b = append(b, byte(c))
+			continue
+		}
+		_, wid := utf8.DecodeRune(s[i:])
+		if wid == 1 {
+			i++
+			if !invalid {
+				invalid = true
+				b = append(b, replacement...)
+			}
+			continue
+		}
+		invalid = false
+		b = append(b, s[i:i+wid]...)
+		i += wid
+	}
+	return b
+}
+
+// TrimSpace returns a subslice of s by slicing off all leading and
+// trailing white space, as defined by Unicode.
+func TrimSpace(s: []byte) => []byte {
+	// Fast path for ASCII: look for the first ASCII non-space byte
+	start := 0
+	for ; start < len(s); start++ {
+		c := s[start]
+		if c >= utf8.RuneSelf {
+			// If we run into a non-ASCII byte, fall back to the
+			// slower unicode-aware method on the remaining bytes
+			return TrimFunc(s[start:], ctypes.IsSpace)
+		}
+		if asciiSpace[c] == 0 {
+			break
+		}
+	}
+
+	// Now look for the first ASCII non-space byte from the end
+	stop := len(s)
+	for ; stop > start; stop-- {
+		c := s[stop-1]
+		if c >= utf8.RuneSelf {
+			return TrimFunc(s[start:stop], ctypes.IsSpace)
+		}
+		if asciiSpace[c] == 0 {
+			break
+		}
+	}
+
+	// At this point s[start:stop] starts and ends with an ASCII
+	// non-space bytes, so we're done. Non-ASCII cases have already
+	// been handled above.
+	if start == stop {
+		// Special case to preserve previous TrimLeftFunc behavior,
+		// returning nil instead of empty slice if all spaces.
+		return nil
+	}
+	return s[start:stop]
+}
+
+// TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
+// all leading UTF-8-encoded code points c that satisfy f(c).
+func TrimLeftFunc(s: []byte, f: func(r: rune) => bool) => []byte {
+	i := indexFunc(s, f, false)
+	if i == -1 {
+		return nil
+	}
+	return s[i:]
+}
+
+// indexFunc is the same as IndexFunc except that if
+// truth==false, the sense of the predicate function is
+// inverted.
+func indexFunc(s: []byte, f: func(r: rune) => bool, truth: bool) => int {
+	start := 0
+	for start < len(s) {
+		wid := 1
+		r := rune(s[start])
+		if r >= utf8.RuneSelf {
+			r, wid = utf8.DecodeRune(s[start:])
+		}
+		if f(r) == truth {
+			return start
+		}
+		start += wid
+	}
+	return -1
+}
+
+// lastIndexFunc is the same as LastIndexFunc except that if
+// truth==false, the sense of the predicate function is
+// inverted.
+func lastIndexFunc(s: []byte, f: func(r: rune) => bool, truth: bool) => int {
+	for i := len(s); i > 0; {
+		r, size := rune(s[i-1]), 1
+		if r >= utf8.RuneSelf {
+			r, size = utf8.DecodeLastRune(s[0:i])
+		}
+		i -= size
+		if f(r) == truth {
+			return i
+		}
+	}
+	return -1
+}
+
+// TrimRightFunc returns a subslice of s by slicing off all trailing
+// UTF-8-encoded code points c that satisfy f(c).
+func TrimRightFunc(s: []byte, f: func(r: rune) => bool) => []byte {
+	i := lastIndexFunc(s, f, false)
+	if i >= 0 && s[i] >= utf8.RuneSelf {
+		_, wid := utf8.DecodeRune(s[i:])
+		i += wid
+	} else {
+		i++
+	}
+	return s[0:i]
+}
+
+// TrimFunc returns a subslice of s by slicing off all leading and trailing
+// UTF-8-encoded code points c that satisfy f(c).
+func TrimFunc(s: []byte, f: func(r: rune) => bool) => []byte {
+	return TrimRightFunc(TrimLeftFunc(s, f), f)
+}
+
+// Trim returns a subslice of s by slicing off all leading and
+// trailing UTF-8-encoded code points contained in cutset.
+func Trim(s: []byte, cutset: string) => []byte {
+	return TrimFunc(s, makeCutsetFunc(cutset))
+}
+
+// TrimLeft returns a subslice of s by slicing off all leading
+// UTF-8-encoded code points contained in cutset.
+func TrimLeft(s: []byte, cutset: string) => []byte {
+	return TrimLeftFunc(s, makeCutsetFunc(cutset))
+}
+
+// TrimRight returns a subslice of s by slicing off all trailing
+// UTF-8-encoded code points that are contained in cutset.
+func TrimRight(s: []byte, cutset: string) => []byte {
+	return TrimRightFunc(s, makeCutsetFunc(cutset))
+}
+
+// Runes interprets s as a sequence of UTF-8-encoded code points.
+// It returns a slice of runes (Unicode code points) equivalent to s.
+func Runes(s: []byte) => []rune {
+	t := make([]rune, utf8.RuneCount(s))
+	i := 0
+	for len(s) > 0 {
+		r, l := utf8.DecodeRune(s)
+		t[i] = r
+		i++
+		s = s[l:]
+	}
+	return t
+}
+
+// TrimPrefix returns s without the provided leading prefix string.
+// If s doesn't start with prefix, s is returned unchanged.
+func TrimPrefix(s, prefix: []byte) => []byte {
+	if HasPrefix(s, prefix) {
+		return s[len(prefix):]
+	}
+	return s
+}
+
+// TrimSuffix returns s without the provided trailing suffix string.
+// If s doesn't end with suffix, s is returned unchanged.
+func TrimSuffix(s, suffix: []byte) => []byte {
+	if HasSuffix(s, suffix) {
+		return s[:len(s)-len(suffix)]
+	}
+	return s
+}
+
+func makeCutsetFunc(cutset: string) => func(r: rune) => bool {
+
+	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+		return func(r: rune) => bool {
+			return r == rune(cutset[0])
+		}
+	}
+	if as, isASCII := makeASCIISet(cutset); isASCII {
+		return func(r: rune) => bool {
+			return r < utf8.RuneSelf && as.contains(byte(r))
+		}
+	}
+	return func(r: rune) => bool {
+		for _, c := range []byte(cutset) {
+			if rune(c) == r {
+				return true
+			}
+		}
+		// todo(chai2010)
+		//for _, c := range cutset {
+		//	if c == r {
+		//		return true
+		//	}
+		//}
+		return false
+	}
+}
+
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet struct {
+	Data: [8]uint32
+}
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars: string) => (as: asciiSet, ok: bool) {
+	for i := 0; i < len(chars); i++ {
+		c := chars[i]
+		if c >= utf8.RuneSelf {
+			return as, false
+		}
+		as.Data[c>>5] |= 1 << uint(c&31)
+	}
+	return as, true
+}
+
+// contains reports whether c is inside the set.
+func asciiSet.contains(c: byte) => bool {
+	return (this.Data[c>>5] & (1 << uint(c&31))) != 0
+}
--- a/waroot/src/bytes/bytes_test.wa
+++ b/waroot/src/bytes/bytes_test.wa
 // 版权 @2023 凹语言 作者。保留所有权利。

 import (
+	"unicode"
+	"unicode/ctypes"
 	"unicode/utf8"
 )

@@ -53,7 +55,7 @@ func TestEqualExhaustive {
 func TestNotEqual {
 	size := 128
 	//if testing.Short() {
-		size = 32
+	size = 32
 	//}
 	a := make([]byte, size)
 	b := make([]byte, size)
@@ -74,7 +76,6 @@ func TestNotEqual {
 	}
 }

-
 var indexTests = []BinOpTest{
 	{"", "", 0},
 	{"", "a", -1},
@@ -120,7 +121,7 @@ func TestLastIndexAny {
 	// todo
 }

-func TestIndexByte{
+func TestIndexByte {
 	for _, tt := range indexTests {
 		if len(tt.b) != 1 {
 			continue
@@ -500,7 +501,6 @@ global splitaftertests = []SplitTest{
 	{"123", "", 17, []string{"1", "2", "3"}},
 }

-
 func TestSplitAfter {
 	for _, tt := range splitaftertests {
 		a := SplitAfterN([]byte(tt.s), []byte(tt.sep), tt.n)
@@ -586,6 +586,398 @@ func TestFields {
 	}
 }

+func TestFieldsFunc {
+	for _, tt := range fieldstests {
+		a := FieldsFunc([]byte(tt.s), ctypes.IsSpace)
+		result := sliceOfString(a)
+		if !eq(result, tt.a) {
+			assert(false)
+			//t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
+			//continue
+		}
+	}
+	pred := func(c: rune) => bool { return c == 'X' }
+	fieldsFuncTests := []FieldsTest{
+		{"", []string{}},
+		{"XX", []string{}},
+		{"XXhiXXX", []string{"hi"}},
+		{"aXXbXXXcX", []string{"a", "b", "c"}},
+	}
+	for _, tt := range fieldsFuncTests {
+		b := []byte(tt.s)
+		a := FieldsFunc(b, pred)
+
+		// Appending to the results should not change future results.
+		x: []byte
+		for _, v := range a {
+			x = append(v, 'z')
+		}
+
+		result := sliceOfString(a)
+		if !eq(result, tt.a) {
+			assert(false)
+			//t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
+		}
+
+		if string(b) != tt.s {
+			assert(false)
+			//t.Errorf("slice changed to %s; want %s", b, tt.s)
+		}
+		if len(tt.a) > 0 {
+			if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
+				assert(false)
+				//t.Errorf("last appended result was %s; want %s", x, want)
+			}
+		}
+	}
+}
+
+// Test case for any function which accepts and returns a byte slice.
+// For ease of creation, we write the input byte slice as a string.
+type StringTest struct {
+	in:  string
+	out: []byte
+}
+
+global upperTests = []StringTest{
+	{"", []byte("")},
+	{"ONLYUPPER", []byte("ONLYUPPER")},
+	{"abc", []byte("ABC")},
+	{"AbC123", []byte("ABC123")},
+	{"azAZ09_", []byte("AZAZ09_")},
+	{"longStrinGwitHmixofsmaLLandcAps", []byte("LONGSTRINGWITHMIXOFSMALLANDCAPS")},
+	//{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", []byte("LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS")},
+	//{"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char
+	//{"a\u0080\U0010FFFF", []byte("A\u0080\U0010FFFF")},                           // test utf8.RuneSelf and utf8.MaxRune
+}
+
+global lowerTests = []StringTest{
+	{"", []byte("")},
+	{"abc", []byte("abc")},
+	{"AbC123", []byte("abc123")},
+	{"azAZ09_", []byte("azaz09_")},
+	{"longStrinGwitHmixofsmaLLandcAps", []byte("longstringwithmixofsmallandcaps")},
+	//{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", []byte("long\u0250string\u0250with\u0250nonascii\u0250chars")},
+	//{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char
+	//{"A\u0080\U0010FFFF", []byte("a\u0080\U0010FFFF")},                           // test utf8.RuneSelf and utf8.MaxRune
+}
+
+const space = "\t\v\r\f\n"
+
+global trimSpaceTests = []StringTest{
+	{"", nil},
+	{"  a", []byte("a")},
+	{"b  ", []byte("b")},
+	{"abc", []byte("abc")},
+	{space + "abc" + space, []byte("abc")},
+	{" ", nil},
+	//{"\u3000 ", nil},
+	//{" \u3000", nil},
+	{" \t\r\n \t\t\r\r\n\n ", nil},
+	{" \t\r\n x\t\t\r\r\n\n ", []byte("x")},
+	//{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", []byte("x\t\t\r\r\ny")},
+	{"1 \t\r\n2", []byte("1 \t\r\n2")},
+	{" x\x80", []byte("x\x80")},
+	{" x\xc0", []byte("x\xc0")},
+	{"x \xc0\xc0 ", []byte("x \xc0\xc0")},
+	{"x \xc0", []byte("x \xc0")},
+	{"x \xc0 ", []byte("x \xc0")},
+	{"x \xc0\xc0 ", []byte("x \xc0\xc0")},
+	{"x ☺\xc0\xc0 ", []byte("x ☺\xc0\xc0")},
+	{"x ☺ ", []byte("x ☺")},
+}
+
+// Execute f on each test case.  funcName should be the name of f; it's used
+// in failure reports.
+func runStringTests(f: func([]byte) => []byte, funcName: string, testCases: []StringTest) {
+	for _, tc := range testCases {
+		actual := f([]byte(tc.in))
+		if actual == nil && tc.out != nil {
+			assert(false)
+			// t.Errorf("%s(%q) = nil; want %q", funcName, tc.in, tc.out)
+		}
+		if actual != nil && tc.out == nil {
+			assert(false)
+			//t.Errorf("%s(%q) = %q; want nil", funcName, tc.in, actual)
+		}
+		if !Equal(actual, tc.out) {
+			assert(false)
+			//t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
+		}
+	}
+}
+
+func tenRunes(r: rune) => string {
+	runes := make([]byte, 10)
+	for i := range runes {
+		runes[i] = byte(r)
+	}
+	return string(runes)
+}
+
+// User-defined self-inverse mapping function
+func rot13(r: rune) => rune {
+	const step = 13
+	if r >= 'a' && r <= 'z' {
+		return ((r - 'a' + step) % 26) + 'a'
+	}
+	if r >= 'A' && r <= 'Z' {
+		return ((r - 'A' + step) % 26) + 'A'
+	}
+	return r
+}
+
+func TestMap {
+	// Run a couple of awful growth/shrinkage tests
+	a := tenRunes('a')
+
+	// 1.  Grow. This triggers two reallocations in Map.
+	//maxRune := func(r: rune) => rune { return unicode.MaxRune }
+	//m := Map(maxRune, []byte(a))
+	//expect := tenRunes(unicode.MaxRune)
+	//if string(m) != expect {
+	//	t.Errorf("growing: expected %q got %q", expect, m)
+	//}
+
+	// 2. Shrink
+	minRune := func(r: rune) => rune { return 'a' }
+	m := Map(minRune, []byte(tenRunes(unicode.MaxRune)))
+	expect := a
+	if string(m) != expect {
+		assert(false)
+		//t.Errorf("shrinking: expected %q got %q", expect, m)
+	}
+
+	// 3. Rot13
+	m = Map(rot13, []byte("a to zed"))
+	expect = "n gb mrq"
+	if string(m) != expect {
+		assert(false)
+		//t.Errorf("rot13: expected %q got %q", expect, m)
+	}
+
+	// 4. Rot13^2
+	m = Map(rot13, Map(rot13, []byte("a to zed")))
+	expect = "a to zed"
+	if string(m) != expect {
+		assert(false)
+		//t.Errorf("rot13: expected %q got %q", expect, m)
+	}
+
+	// 5. Drop
+	dropNotLatin := func(r: rune) => rune {
+		// add by chai2010
+		if r < unicode.MaxLatin1 {
+			return r
+		}
+		//if unicode.Is(unicode.Latin, r) {
+		//	return r
+		//}
+		return -1
+	}
+	m = Map(dropNotLatin, []byte("Hello凹凹"))
+	expect = "Hello"
+	if string(m) != expect {
+		assert(false)
+		//t.Errorf("drop: expected %q got %q", expect, m)
+	}
+
+	// 6. Invalid rune
+	//invalidRune := func(r: rune) => rune {
+	//	return utf8.MaxRune + 1
+	//}
+	//m = Map(invalidRune, []byte("x"))
+	//expect = "\uFFFD"
+	//if string(m) != expect {
+	//	t.Errorf("invalidRune: expected %q got %q", expect, m)
+	//}
+}
+
+func TestToUpper { runStringTests(ToUpper, "ToUpper", upperTests) }
+
+func TestToLower { runStringTests(ToLower, "ToLower", lowerTests) }
+
+global toValidUTF8Tests = []struct {
+	in:   string
+	repl: string
+	out:  string
+}{
+	{"", "\uFFFD", ""},
+	{"abc", "\uFFFD", "abc"},
+	{"\uFDDD", "\uFFFD", "\uFDDD"},
+	{"a\xffb", "\uFFFD", "a\uFFFDb"},
+	{"a\xffb\uFFFD", "X", "aXb\uFFFD"},
+	{"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"},
+	{"\xC0\xAF", "\uFFFD", "\uFFFD"},
+	{"\xE0\x80\xAF", "\uFFFD", "\uFFFD"},
+	{"\xed\xa0\x80", "abc", "abc"},
+	{"\xed\xbf\xbf", "\uFFFD", "\uFFFD"},
+	{"\xF0\x80\x80\xaf", "☺", "☺"},
+	{"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+	{"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+}
+
+func TestToValidUTF8 {
+	for _, tc := range toValidUTF8Tests {
+		got := ToValidUTF8([]byte(tc.in), []byte(tc.repl))
+		if !Equal(got, []byte(tc.out)) {
+			assert(false)
+			// t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out)
+		}
+	}
+}
+
+func TestTrimSpace {
+	runStringTests(TrimSpace, "TrimSpace", trimSpaceTests)
+}
+
+type RepeatTest struct {
+	in, out: string
+	count:   int
+}
+
+global RepeatTests = []RepeatTest{
+	{"", "", 0},
+	{"", "", 1},
+	{"", "", 2},
+	{"-", "", 0},
+	{"-", "-", 1},
+	{"-", "----------", 10},
+	{"abc ", "abc abc abc ", 3},
+}
+
+func TestRepeat {
+	for _, tt := range RepeatTests {
+		tin := []byte(tt.in)
+		tout := []byte(tt.out)
+		a := Repeat(tin, tt.count)
+		if !Equal(a, tout) {
+			assert(false)
+			//t.Errorf("Repeat(%q, %d) = %q; want %q", tin, tt.count, a, tout)
+			//continue
+		}
+	}
+}
+
+func runesEqual(a, b: []rune) => bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i, r := range a {
+		if r != b[i] {
+			return false
+		}
+	}
+	return true
+}
+
+type RunesTest struct {
+	in:    string
+	out:   []rune
+	lossy: bool
+}
+
+global RunesTests = []RunesTest{
+	{"", []rune{}, false},
+	{" ", []rune{32}, false},
+	{"ABC", []rune{65, 66, 67}, false},
+	{"abc", []rune{97, 98, 99}, false},
+	//{"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
+	//{"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
+	//{"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
+}
+
+func TestRunes {
+	for _, tt := range RunesTests {
+		tin := []byte(tt.in)
+		a := Runes(tin)
+
+		if !runesEqual(a, tt.out) {
+			assert(false)
+			//t.Errorf("Runes(%q) = %v; want %v", tin, a, tt.out)
+			//continue
+		}
+		if !tt.lossy {
+			// can only test reassembly if we didn't lose information
+			//s := string(a)
+			//if s != tt.in {
+			//	assert(false)
+			//t.Errorf("string(Runes(%q)) = %x; want %x", tin, s, tin)
+			//}
+		}
+	}
+}
+
+type TrimTest struct {
+	f:            string
+	in, arg, out: string
+}
+
+global trimTests = []TrimTest{
+	{"Trim", "abba", "a", "bb"},
+	{"Trim", "abba", "ab", ""},
+	{"TrimLeft", "abba", "ab", ""},
+	{"TrimRight", "abba", "ab", ""},
+	{"TrimLeft", "abba", "a", "bba"},
+	{"TrimRight", "abba", "a", "abb"},
+	{"Trim", "<tag>", "<>", "tag"},
+	{"Trim", "* listitem", " *", "listitem"},
+	{"Trim", `"quote"`, `"`, "quote"},
+	//{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+	//{"Trim", "\x80test\xff", "\xff", "test"},
+	//{"Trim", " Ġ ", " ", "Ġ"},
+	//{"Trim", " Ġİ0", "0 ", "Ġİ"},
+	//empty string tests
+	{"Trim", "abba", "", "abba"},
+	{"Trim", "", "123", ""},
+	{"Trim", "", "", ""},
+	{"TrimLeft", "abba", "", "abba"},
+	{"TrimLeft", "", "123", ""},
+	{"TrimLeft", "", "", ""},
+	{"TrimRight", "abba", "", "abba"},
+	{"TrimRight", "", "123", ""},
+	{"TrimRight", "", "", ""},
+	//{"TrimRight", "☺\xc0", "☺", "☺\xc0"},
+	//{"TrimPrefix", "aabb", "a", "abb"}, // todo(chai2010): bug
+	//{"TrimPrefix", "aabb", "b", "aabb"},
+	//{"TrimSuffix", "aabb", "a", "aabb"},
+	//{"TrimSuffix", "aabb", "b", "aab"},
+}
+
+func TestTrim {
+	for i, tc := range trimTests {
+		name := tc.f
+		f: func([]byte, string) => []byte
+		fb: func([]byte, []byte) => []byte
+		switch name {
+		case "Trim":
+			f = Trim
+		case "TrimLeft":
+			f = TrimLeft
+		case "TrimRight":
+			f = TrimRight
+		case "TrimPrefix":
+			fb = TrimPrefix
+		case "TrimSuffix":
+			fb = TrimSuffix
+		default:
+			assert(false)
+			//t.Errorf("Undefined trim function %s", name)
+		}
+		actual: string
+		if f != nil {
+			actual = string(f([]byte(tc.in), tc.arg))
+		} else {
+			actual = string(fb([]byte(tc.in), []byte(tc.arg)))
+		}
+		if actual != tc.out {
+			println(i, actual, tc.out)
+			assert(false)
+			//t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
+		}
+	}
+}
+
 func TestEqualFold {
 	for _, tt := range EqualFoldTests {
 		if out := EqualFold([]byte(tt.s), []byte(tt.t)); out != tt.out {
@@ -600,8 +992,8 @@ func TestEqualFold {
 }

 global EqualFoldTests = []struct {
-	s, t string
-	out  bool
+	s, t: string
+	out:  bool
 }{
 	{"abc", "abc", true},
 	{"ABcd", "ABcd", true},

--- a/waroot/src/unicode/const.wa
+++ b/waroot/src/unicode/const.wa
+// 版权 @2023 凹语言 作者。保留所有权利。
+
+const (
+	MaxRune         = '\U0010FFFF' // Maximum valid Unicode code point.
+	ReplacementChar = '\uFFFD'     // Represents invalid code points.
+	MaxASCII        = '\u007F'     // maximum ASCII value.
+	MaxLatin1       = '\u00FF'     // maximum Latin-1 value.
+)
--- a/waroot/src/unicode/ctypes/ctypes.wa
+++ b/waroot/src/unicode/ctypes/ctypes.wa
+// 版权 @2023 凹语言 作者。保留所有权利。
+
+func IsAlnum(r: rune) => bool {
+	return IsAlpha(r) || IsDigit(r)
+}
+
+func IsAlpha(r: rune) => bool {
+	return (r|32)-'a' < 26
+}
+
+func IsAsscii(r: rune) => bool {
+	return r <= 0x7f
+}
+
+func IsBlank(r: rune) => bool {
+	return r == ' ' || r == '\t'
+}
+
+func IsCntrl(r: rune) => bool {
+	return r < 0x20 || r == 0x7f
+}
+
+func IsDigit(r: rune) => bool {
+	return r >= '0' && r <= '9'
+}
+
+func IsGraph(r: rune) => bool {
+	if IsAsscii(r) {
+		return r-0x21 < 0x5e
+	}
+	return false
+}
+
+func IsLower(r: rune) => bool {
+	return r >= 'a' && r <= 'z'
+}
+
+func IsPrint(r: rune) => bool {
+	if IsAsscii(r) {
+		return r-0x20 < 0x5f
+	}
+	return false
+}
+
+func IsPunct(r: rune) => bool {
+	if IsAsscii(r) {
+		return IsGraph(r) && !IsAlnum(r)
+	}
+	return false
+}
+
+func IsSpace(r: rune) => bool {
+	switch r {
+	case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
+		return true
+	}
+	return false
+}
+
+func IsUpper(r: rune) => bool {
+	return r >= 'A' && r <= 'Z'
+}
+
+func IsXdigit(r: rune) => bool {
+	return IsDigit(r) || (r >= 'A' && r <= 'F') || (r >= 'a' && r <= 'f')
+}
+
+func ToAscii(r: rune) => rune {
+	return r & 0x7f
+}
+
+func ToLower(r: rune) => rune {
+	if IsUpper(r) {
+		return r | 32
+	}
+	return r
+}
+
+func ToUpper(r: rune) => rune {
+	if IsLower(r) {
+		return r & 0x5f
+	}
+	return r
+}
--- a/waroot/src/unicode/ctypes/ctypes_test.wa
+++ b/waroot/src/unicode/ctypes/ctypes_test.wa
+// 版权 @2023 凹语言 作者。保留所有权利。
--- a/waroot/src/unicode/digit.wa
+++ b/waroot/src/unicode/digit.wa
-// 版权 @2023 凹语言 作者。保留所有权利。
-
-// IsDigit reports whether the rune is a decimal digit.
-func IsDigit(r: rune) => bool {
-	if r <= MaxLatin1 {
-		return '0' <= r && r <= '9'
-	}
-	return isExcludingLatin(Digit, r)
-}
--- a/waroot/src/unicode/graphic.wa
+++ b/waroot/src/unicode/graphic.wa
-// 版权 @2023 凹语言 作者。保留所有权利。
-
-// Bit masks for each code point under U+0100, for fast lookup.
-const (
-	pC     = 1 << iota // a control character.
-	pP                 // a punctuation character.
-	pN                 // a numeral.
-	pS                 // a symbolic character.
-	pZ                 // a spacing character.
-	pLu                // an upper-case letter.
-	pLl                // a lower-case letter.
-	pp                 // a printable character according to Go's definition.
-	pg     = pp | pZ   // a graphical character according to the Unicode definition.
-	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
-	pLmask = pLo
-)
-
-// GraphicRanges defines the set of graphic characters according to Unicode.
-global GraphicRanges = []*RangeTable{
-	L, M, N, P, S, Zs,
-}
-
-// PrintRanges defines the set of printable characters according to Go.
-// ASCII space, U+0020, is handled separately.
-global PrintRanges = []*RangeTable{
-	L, M, N, P, S,
-}
-
-// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
-// Such characters include letters, marks, numbers, punctuation, symbols, and
-// spaces, from categories L, M, N, P, S, Zs.
-func IsGraphic(r: rune) => bool {
-	// We convert to uint32 to avoid the extra test for negative,
-	// and in the index we convert to uint8 to avoid the range check.
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pg != 0
-	}
-	return In(r, GraphicRanges...)
-}
-
-// IsPrint reports whether the rune is defined as printable by Go. Such
-// characters include letters, marks, numbers, punctuation, symbols, and the
-// ASCII space character, from categories L, M, N, P, S and the ASCII space
-// character. This categorization is the same as IsGraphic except that the
-// only spacing character is ASCII space, U+0020.
-func IsPrint(r: rune) => bool {
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pp != 0
-	}
-	return In(r, PrintRanges...)
-}
-
-// IsOneOf reports whether the rune is a member of one of the ranges.
-// The function "In" provides a nicer signature and should be used in preference to IsOneOf.
-func IsOneOf(ranges: []*RangeTable, r: rune) => bool {
-	for _, inside := range ranges {
-		if Is(inside, r) {
-			return true
-		}
-	}
-	return false
-}
-
-// In reports whether the rune is a member of one of the ranges.
-func In(r: rune, ranges: ...*RangeTable) => bool {
-	for _, inside := range ranges {
-		if Is(inside, r) {
-			return true
-		}
-	}
-	return false
-}
-
-// IsControl reports whether the rune is a control character.
-// The C (Other) Unicode category includes more code points
-// such as surrogates; use Is(C, r) to test for them.
-func IsControl(r: rune) => bool {
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pC != 0
-	}
-	// All control characters are < MaxLatin1.
-	return false
-}
-
-// IsLetter reports whether the rune is a letter (category L).
-func IsLetter(r: rune) => bool {
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&(pLmask) != 0
-	}
-	return isExcludingLatin(Letter, r)
-}
-
-// IsMark reports whether the rune is a mark character (category M).
-func IsMark(r: rune) => bool {
-	// There are no mark characters in Latin-1.
-	return isExcludingLatin(Mark, r)
-}
-
-// IsNumber reports whether the rune is a number (category N).
-func IsNumber(r: rune) => bool {
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pN != 0
-	}
-	return isExcludingLatin(Number, r)
-}
-
-// IsPunct reports whether the rune is a Unicode punctuation character
-// (category P).
-func IsPunct(r: rune) => bool {
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pP != 0
-	}
-	return Is(Punct, r)
-}
-
-// IsSpace reports whether the rune is a space character as defined
-// by Unicode's White Space property; in the Latin-1 space
-// this is
-//	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
-// Other definitions of spacing characters are set by category
-// Z and property Pattern_White_Space.
-func IsSpace(r: rune) => bool {
-	// This property isn't the same as Z; special-case it.
-	if uint32(r) <= MaxLatin1 {
-		switch r {
-		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
-			return true
-		}
-		return false
-	}
-	return isExcludingLatin(White_Space, r)
-}
-
-// IsSymbol reports whether the rune is a symbolic character.
-func IsSymbol(r: rune) => bool {
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pS != 0
-	}
-	return isExcludingLatin(Symbol, r)
-}
--- a/waroot/src/unicode/letter.wa
+++ b/waroot/src/unicode/letter.wa
-// 版权 @2023 凹语言 作者。保留所有权利。
-
-// Package unicode provides data and functions to test some properties of
-// Unicode code points.
-
-const (
-	MaxRune         = '\U0010FFFF' // Maximum valid Unicode code point.
-	ReplacementChar = '\uFFFD'     // Represents invalid code points.
-	MaxASCII        = '\u007F'     // maximum ASCII value.
-	MaxLatin1       = '\u00FF'     // maximum Latin-1 value.
-)
-
-// RangeTable defines a set of Unicode code points by listing the ranges of
-// code points within the set. The ranges are listed in two slices
-// to save space: a slice of 16-bit ranges and a slice of 32-bit ranges.
-// The two slices must be in sorted order and non-overlapping.
-// Also, R32 should contain only values >= 0x10000 (1<<16).
-type RangeTable struct {
-	R16         []Range16
-	R32         []Range32
-	LatinOffset int // number of entries in R16 with Hi <= MaxLatin1
-}
-
-// Range16 represents of a range of 16-bit Unicode code points. The range runs from Lo to Hi
-// inclusive and has the specified stride.
-type Range16 struct {
-	Lo     uint16
-	Hi     uint16
-	Stride uint16
-}
-
-// Range32 represents of a range of Unicode code points and is used when one or
-// more of the values will not fit in 16 bits. The range runs from Lo to Hi
-// inclusive and has the specified stride. Lo and Hi must always be >= 1<<16.
-type Range32 struct {
-	Lo     uint32
-	Hi     uint32
-	Stride uint32
-}
-
-// CaseRange represents a range of Unicode code points for simple (one
-// code point to one code point) case conversion.
-// The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas
-// are the number to add to the code point to reach the code point for a
-// different case for that character. They may be negative. If zero, it
-// means the character is in the corresponding case. There is a special
-// case representing sequences of alternating corresponding Upper and Lower
-// pairs. It appears with a fixed Delta of
-//	{UpperLower, UpperLower, UpperLower}
-// The constant UpperLower has an otherwise impossible delta value.
-type CaseRange struct {
-	Lo    uint32
-	Hi    uint32
-	Delta d
-}
-
-// SpecialCase represents language-specific case mappings such as Turkish.
-// Methods of SpecialCase customize (by overriding) the standard mappings.
-type SpecialCase []CaseRange
-
-// BUG(r): There is no mechanism for full case folding, that is, for
-// characters that involve multiple runes in the input or output.
-
-// Indices into the Delta arrays inside CaseRanges for case mapping.
-const (
-	UpperCase = iota
-	LowerCase
-	TitleCase
-	MaxCase
-)
-
-type d [MaxCase]rune // to make the CaseRanges text shorter
-
-// If the Delta field of a CaseRange is UpperLower, it means
-// this CaseRange represents a sequence of the form (say)
-// Upper Lower Upper Lower.
-const (
-	UpperLower = MaxRune + 1 // (Cannot be a valid delta.)
-)
-
-// linearMax is the maximum size table for linear search for non-Latin1 rune.
-// Derived by running 'go test -calibrate'.
-const linearMax = 18
-
-// is16 reports whether r is in the sorted slice of 16-bit ranges.
-func is16(ranges: []Range16, r: uint16) => bool {
-	if len(ranges) <= linearMax || r <= MaxLatin1 {
-		for i := range ranges {
-			range_ := &ranges[i]
-			if r < range_.Lo {
-				return false
-			}
-			if r <= range_.Hi {
-				return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
-			}
-		}
-		return false
-	}
-
-	// binary search over ranges
-	lo := 0
-	hi := len(ranges)
-	for lo < hi {
-		m := lo + (hi-lo)/2
-		range_ := &ranges[m]
-		if range_.Lo <= r && r <= range_.Hi {
-			return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
-		}
-		if r < range_.Lo {
-			hi = m
-		} else {
-			lo = m + 1
-		}
-	}
-	return false
-}
-
-// is32 reports whether r is in the sorted slice of 32-bit ranges.
-func is32(ranges: []Range32, r: uint32) => bool {
-	if len(ranges) <= linearMax {
-		for i := range ranges {
-			range_ := &ranges[i]
-			if r < range_.Lo {
-				return false
-			}
-			if r <= range_.Hi {
-				return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
-			}
-		}
-		return false
-	}
-
-	// binary search over ranges
-	lo := 0
-	hi := len(ranges)
-	for lo < hi {
-		m := lo + (hi-lo)/2
-		range_ := ranges[m]
-		if range_.Lo <= r && r <= range_.Hi {
-			return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
-		}
-		if r < range_.Lo {
-			hi = m
-		} else {
-			lo = m + 1
-		}
-	}
-	return false
-}
-
-// Is reports whether the rune is in the specified table of ranges.
-func Is(rangeTab: *RangeTable, r: rune) => bool {
-	r16 := rangeTab.R16
-	// Compare as uint32 to correctly handle negative runes.
-	if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
-		return is16(r16, uint16(r))
-	}
-	r32 := rangeTab.R32
-	if len(r32) > 0 && r >= rune(r32[0].Lo) {
-		return is32(r32, uint32(r))
-	}
-	return false
-}
-
-func isExcludingLatin(rangeTab: *RangeTable, r: rune) => bool {
-	r16 := rangeTab.R16
-	// Compare as uint32 to correctly handle negative runes.
-	if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
-		return is16(r16[off:], uint16(r))
-	}
-	r32 := rangeTab.R32
-	if len(r32) > 0 && r >= rune(r32[0].Lo) {
-		return is32(r32, uint32(r))
-	}
-	return false
-}
-
-// IsUpper reports whether the rune is an upper case letter.
-func IsUpper(r: rune) => bool {
-	// See comment in IsGraphic.
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pLmask == pLu
-	}
-	return isExcludingLatin(Upper, r)
-}
-
-// IsLower reports whether the rune is a lower case letter.
-func IsLower(r: rune) => bool {
-	// See comment in IsGraphic.
-	if uint32(r) <= MaxLatin1 {
-		return properties[uint8(r)]&pLmask == pLl
-	}
-	return isExcludingLatin(Lower, r)
-}
-
-// IsTitle reports whether the rune is a title case letter.
-func IsTitle(r: rune) => bool {
-	if r <= MaxLatin1 {
-		return false
-	}
-	return isExcludingLatin(Title, r)
-}
-
-// to maps the rune using the specified case mapping.
-// It additionally reports whether caseRange contained a mapping for r.
-func to(_case: int, r: rune, caseRange: []CaseRange) => (mappedRune: rune, foundMapping: bool) {
-	if _case < 0 || MaxCase <= _case {
-		return ReplacementChar, false // as reasonable an error as any
-	}
-	// binary search over ranges
-	lo := 0
-	hi := len(caseRange)
-	for lo < hi {
-		m := lo + (hi-lo)/2
-		cr := caseRange[m]
-		if rune(cr.Lo) <= r && r <= rune(cr.Hi) {
-			delta := cr.Delta[_case]
-			if delta > MaxRune {
-				// In an Upper-Lower sequence, which always starts with
-				// an UpperCase letter, the real deltas always look like:
-				//	{0, 1, 0}    UpperCase (Lower is next)
-				//	{-1, 0, -1}  LowerCase (Upper, Title are previous)
-				// The characters at even offsets from the beginning of the
-				// sequence are upper case; the ones at odd offsets are lower.
-				// The correct mapping can be done by clearing or setting the low
-				// bit in the sequence offset.
-				// The constants UpperCase and TitleCase are even while LowerCase
-				// is odd so we take the low bit from _case.
-				return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)), true
-			}
-			return r + delta, true
-		}
-		if r < rune(cr.Lo) {
-			hi = m
-		} else {
-			lo = m + 1
-		}
-	}
-	return r, false
-}
-
-// To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase.
-func To(_case: int, r: rune) => rune {
-	r, _ = to(_case, r, CaseRanges)
-	return r
-}
-
-// ToUpper maps the rune to upper case.
-func ToUpper(r: rune) => rune {
-	if r <= MaxASCII {
-		if 'a' <= r && r <= 'z' {
-			r -= 'a' - 'A'
-		}
-		return r
-	}
-	return To(UpperCase, r)
-}
-
-// ToLower maps the rune to lower case.
-func ToLower(r: rune) => rune {
-	if r <= MaxASCII {
-		if 'A' <= r && r <= 'Z' {
-			r += 'a' - 'A'
-		}
-		return r
-	}
-	return To(LowerCase, r)
-}
-
-// ToTitle maps the rune to title case.
-func ToTitle(r: rune) => rune {
-	if r <= MaxASCII {
-		if 'a' <= r && r <= 'z' { // title case is upper case for ASCII
-			r -= 'a' - 'A'
-		}
-		return r
-	}
-	return To(TitleCase, r)
-}
-
-// ToUpper maps the rune to upper case giving priority to the special mapping.
-func SpecialCase.ToUpper(r: rune) => rune {
-	r1, hadMapping := to(UpperCase, r, []CaseRange(*this))
-	if r1 == r && !hadMapping {
-		r1 = ToUpper(r)
-	}
-	return r1
-}
-
-// ToTitle maps the rune to title case giving priority to the special mapping.
-func SpecialCase.ToTitle(r: rune) => rune {
-	r1, hadMapping := to(TitleCase, r, []CaseRange(*this))
-	if r1 == r && !hadMapping {
-		r1 = ToTitle(r)
-	}
-	return r1
-}
-
-// ToLower maps the rune to lower case giving priority to the special mapping.
-func SpecialCase.ToLower(r: rune) => rune {
-	r1, hadMapping := to(LowerCase, r, []CaseRange(*this))
-	if r1 == r && !hadMapping {
-		r1 = ToLower(r)
-	}
-	return r1
-}
-
-// caseOrbit is defined in tables.go as []foldPair. Right now all the
-// entries fit in uint16, so use uint16. If that changes, compilation
-// will fail (the constants in the composite literal will not fit in uint16)
-// and the types here can change to uint32.
-type foldPair struct {
-	From :u16
-	To   :u16
-}
-
-// SimpleFold iterates over Unicode code points equivalent under
-// the Unicode-defined simple case folding. Among the code points
-// equivalent to rune (including rune itself), SimpleFold returns the
-// smallest rune > r if one exists, or else the smallest rune >= 0.
-// If r is not a valid Unicode code point, SimpleFold(r) returns r.
-//
-// For example:
-//	SimpleFold('A') = 'a'
-//	SimpleFold('a') = 'A'
-//
-//	SimpleFold('K') = 'k'
-//	SimpleFold('k') = '\u212A' (Kelvin symbol, K)
-//	SimpleFold('\u212A') = 'K'
-//
-//	SimpleFold('1') = '1'
-//
-//	SimpleFold(-2) = -2
-//
-func SimpleFold(r: rune) => rune {
-	if r < 0 || r > MaxRune {
-		return r
-	}
-
-	if int(r) < len(asciiFold) {
-		return rune(asciiFold[r])
-	}
-
-	// Consult caseOrbit table for special cases.
-	lo := 0
-	hi := len(caseOrbit)
-	for lo < hi {
-		m := lo + (hi-lo)/2
-		if rune(caseOrbit[m].From) < r {
-			lo = m + 1
-		} else {
-			hi = m
-		}
-	}
-	if lo < len(caseOrbit) && rune(caseOrbit[lo].From) == r {
-		return rune(caseOrbit[lo].To)
-	}
-
-	// No folding specified. This is a one- or two-element
-	// equivalence class containing rune and ToLower(rune)
-	// and ToUpper(rune) if they are different from rune.
-	if l := ToLower(r); l != r {
-		return l
-	}
-	return ToUpper(r)
-}
--- a/waroot/src/unicode/zz_tables.wa
+++ b/waroot/src/unicode/zz_tables.wa