提交 69aaa0d6 编写于 作者: chai2010's avatar chai2010

bytes 包补充函数和测试

上级 34dbf326
// 版权 @2023 凹语言 作者。保留所有权利。
import (
//"unicode" => _
"unicode/utf8"
)
......@@ -295,6 +296,17 @@ func SplitN(s, sep: []byte, n: int) => [][]byte {
return genSplit(s, sep, 0, n)
}
// SplitAfterN slices s into subslices after each instance of sep and
// returns a slice of those subslices.
// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
// The count determines the number of subslices to return:
// n > 0: at most n subslices; the last subslice will be the unsplit remainder.
// n == 0: the result is nil (zero subslices)
// n < 0: all subslices
func SplitAfterN(s, sep: []byte, n: int) => [][]byte {
return genSplit(s, sep, len(sep), n)
}
// Generic split: splits after each instance of sep,
// including sepSave bytes of sep in the subslices.
func genSplit(s, sep: []byte, sepSave, n: int) => [][]byte {
......@@ -324,6 +336,132 @@ func genSplit(s, sep: []byte, sepSave, n: int) => [][]byte {
return a[:i+1]
}
global asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
// Fields interprets s as a sequence of UTF-8-encoded code points.
// It splits the slice s around each instance of one or more consecutive white space
// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
// empty slice if s contains only white space.
func Fields(s: []byte) => [][]byte {
if true {
return nil
}
/*
// First count the fields.
// This is an exact count if s is ASCII, otherwise it is an approximation.
n := 0
wasSpace := 1
// setBits is used to track which bits are set in the bytes of s.
setBits := uint8(0)
for i := 0; i < len(s); i++ {
r := s[i]
setBits |= r
isSpace := int(asciiSpace[r])
n += wasSpace & ^isSpace
wasSpace = isSpace
}
if setBits >= utf8.RuneSelf {
// Some runes in the input slice are not ASCII.
return FieldsFunc(s, unicode.IsSpace)
}
// ASCII fast path
a := make([][]byte, n)
na := 0
fieldStart := 0
i := 0
// Skip spaces in the front of the input.
for i < len(s) && asciiSpace[s[i]] != 0 {
i++
}
fieldStart = i
for i < len(s) {
if asciiSpace[s[i]] == 0 {
i++
continue
}
a[na] = s[fieldStart:i:i]
na++
i++
// Skip spaces in between fields.
for i < len(s) && asciiSpace[s[i]] != 0 {
i++
}
fieldStart = i
}
if fieldStart < len(s) { // Last field might end at EOF.
a[na] = s[fieldStart:len(s):len(s)]
}
return a
*/
return nil
}
// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
// It splits the slice s at each run of code points c satisfying f(c) and
// returns a slice of subslices of s. If all code points in s satisfy f(c), or
// len(s) == 0, an empty slice is returned.
//
// FieldsFunc makes no guarantees about the order in which it calls f(c)
// and assumes that f always returns the same value for a given c.
func FieldsFunc(s: []byte, f: func(rune) => bool) => [][]byte {
if true {
return nil
}
/*
// A span is used to record a slice of s of the form s[start:end].
// The start index is inclusive and the end index is exclusive.
type span struct {
start: int
end: int
}
spans := make([]span, 0, 32)
// Find the field start and end indices.
// Doing this in a separate pass (rather than slicing the string s
// and collecting the result substrings right away) is significantly
// more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0
for i := 0; i < len(s); {
size := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(s[i:])
}
if f(r) {
if start >= 0 {
spans = append(spans, span{start, i})
start = -1
}
} else {
if start < 0 {
start = i
}
}
i += size
}
// Last field might end at EOF.
if start >= 0 {
spans = append(spans, span{start, len(s)})
}
// Create subslices from recorded field indices.
a := make([][]byte, len(spans))
for i, span := range spans {
a[i] = s[span.start:span.end:span.end]
}
return a
*/
return nil
}
// explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
......
......@@ -484,6 +484,109 @@ func TestSplit {
}
}
global splitaftertests = []SplitTest{
{abcd, "a", -1, []string{"a", "bcd"}},
{abcd, "z", -1, []string{"abcd"}},
{abcd, "", -1, []string{"a", "b", "c", "d"}},
{commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
{dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
{faces, "☹", -1, []string{"☺☻☹", ""}},
{faces, "~", -1, []string{faces}},
{faces, "", -1, []string{"☺", "☻", "☹"}},
{"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
{"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
{"1 2", " ", 3, []string{"1 ", "2"}},
{"123", "", 2, []string{"1", "23"}},
{"123", "", 17, []string{"1", "2", "3"}},
}
func TestSplitAfter {
for _, tt := range splitaftertests {
a := SplitAfterN([]byte(tt.s), []byte(tt.sep), tt.n)
// Appending to the results should not change future results.
x: []byte
for _, v := range a {
x = append(v, 'z')
}
result := sliceOfString(a)
if !eq(result, tt.a) {
assert(false)
//t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
//continue
}
if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
assert(false)
//t.Errorf("last appended result was %s; want %s", x, want)
}
s := Join(a, nil)
if string(s) != tt.s {
assert(false)
//t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
}
if tt.n < 0 {
//b := SplitAfter([]byte(tt.s), []byte(tt.sep))
//if !reflect.DeepEqual(a, b) {
// t.Errorf("SplitAfter disagrees withSplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
//}
}
}
}
type FieldsTest struct {
s: string
a: []string
}
global fieldstests = []FieldsTest{
{"", []string{}},
{" ", []string{}},
{" \t ", []string{}},
{" abc ", []string{"abc"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
{"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
{"\u2000\u2001\u2002", []string{}},
{"\n™\t™\n", []string{"™", "™"}},
{faces, []string{faces}},
}
func _TestFields {
for _, tt := range fieldstests {
b := []byte(tt.s)
a := Fields(b)
// Appending to the results should not change future results.
x: []byte
for _, v := range a {
x = append(v, 'z')
}
result := sliceOfString(a)
if !eq(result, tt.a) {
assert(false)
//t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
//continue
}
if string(b) != tt.s {
//t.Errorf("slice changed to %s; want %s", string(b), tt.s)
}
if len(tt.a) > 0 {
assert(false)
if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
assert(false)
// t.Errorf("last appended result was %s; want %s", x, want)
}
}
}
}
func TestEqualFold {
for _, tt := range EqualFoldTests {
if out := EqualFold([]byte(tt.s), []byte(tt.t)); out != tt.out {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册