Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
wa-lang
wa
提交
69aaa0d6
wa
项目概览
wa-lang
/
wa
9 个月 前同步成功
通知
68
Star
655
Fork
45
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
wa
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
69aaa0d6
编写于
8月 21, 2023
作者:
chai2010
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
bytes 包补充函数和测试
上级
34dbf326
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
241 addition
and
0 deletion
+241
-0
waroot/src/bytes/bytes.wa
waroot/src/bytes/bytes.wa
+138
-0
waroot/src/bytes/bytes_test.wa
waroot/src/bytes/bytes_test.wa
+103
-0
未找到文件。
waroot/src/bytes/bytes.wa
浏览文件 @
69aaa0d6
// 版权 @2023 凹语言 作者。保留所有权利。
import (
//"unicode" => _
"unicode/utf8"
)
...
...
@@ -295,6 +296,17 @@ func SplitN(s, sep: []byte, n: int) => [][]byte {
return genSplit(s, sep, 0, n)
}
// SplitAfterN slices s into subslices after each instance of sep and
// returns a slice of those subslices.
// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
// The count determines the number of subslices to return:
// n > 0: at most n subslices; the last subslice will be the unsplit remainder.
// n == 0: the result is nil (zero subslices)
// n < 0: all subslices
func SplitAfterN(s, sep: []byte, n: int) => [][]byte {
return genSplit(s, sep, len(sep), n)
}
// Generic split: splits after each instance of sep,
// including sepSave bytes of sep in the subslices.
func genSplit(s, sep: []byte, sepSave, n: int) => [][]byte {
...
...
@@ -324,6 +336,132 @@ func genSplit(s, sep: []byte, sepSave, n: int) => [][]byte {
return a[:i+1]
}
global asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
// Fields interprets s as a sequence of UTF-8-encoded code points.
// It splits the slice s around each instance of one or more consecutive white space
// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
// empty slice if s contains only white space.
func Fields(s: []byte) => [][]byte {
if true {
return nil
}
/*
// First count the fields.
// This is an exact count if s is ASCII, otherwise it is an approximation.
n := 0
wasSpace := 1
// setBits is used to track which bits are set in the bytes of s.
setBits := uint8(0)
for i := 0; i < len(s); i++ {
r := s[i]
setBits |= r
isSpace := int(asciiSpace[r])
n += wasSpace & ^isSpace
wasSpace = isSpace
}
if setBits >= utf8.RuneSelf {
// Some runes in the input slice are not ASCII.
return FieldsFunc(s, unicode.IsSpace)
}
// ASCII fast path
a := make([][]byte, n)
na := 0
fieldStart := 0
i := 0
// Skip spaces in the front of the input.
for i < len(s) && asciiSpace[s[i]] != 0 {
i++
}
fieldStart = i
for i < len(s) {
if asciiSpace[s[i]] == 0 {
i++
continue
}
a[na] = s[fieldStart:i:i]
na++
i++
// Skip spaces in between fields.
for i < len(s) && asciiSpace[s[i]] != 0 {
i++
}
fieldStart = i
}
if fieldStart < len(s) { // Last field might end at EOF.
a[na] = s[fieldStart:len(s):len(s)]
}
return a
*/
return nil
}
// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
// It splits the slice s at each run of code points c satisfying f(c) and
// returns a slice of subslices of s. If all code points in s satisfy f(c), or
// len(s) == 0, an empty slice is returned.
//
// FieldsFunc makes no guarantees about the order in which it calls f(c)
// and assumes that f always returns the same value for a given c.
func FieldsFunc(s: []byte, f: func(rune) => bool) => [][]byte {
if true {
return nil
}
/*
// A span is used to record a slice of s of the form s[start:end].
// The start index is inclusive and the end index is exclusive.
type span struct {
start: int
end: int
}
spans := make([]span, 0, 32)
// Find the field start and end indices.
// Doing this in a separate pass (rather than slicing the string s
// and collecting the result substrings right away) is significantly
// more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0
for i := 0; i < len(s); {
size := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(s[i:])
}
if f(r) {
if start >= 0 {
spans = append(spans, span{start, i})
start = -1
}
} else {
if start < 0 {
start = i
}
}
i += size
}
// Last field might end at EOF.
if start >= 0 {
spans = append(spans, span{start, len(s)})
}
// Create subslices from recorded field indices.
a := make([][]byte, len(spans))
for i, span := range spans {
a[i] = s[span.start:span.end:span.end]
}
return a
*/
return nil
}
// explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
...
...
waroot/src/bytes/bytes_test.wa
浏览文件 @
69aaa0d6
...
...
@@ -484,6 +484,109 @@ func TestSplit {
}
}
global splitaftertests = []SplitTest{
{abcd, "a", -1, []string{"a", "bcd"}},
{abcd, "z", -1, []string{"abcd"}},
{abcd, "", -1, []string{"a", "b", "c", "d"}},
{commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
{dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
{faces, "☹", -1, []string{"☺☻☹", ""}},
{faces, "~", -1, []string{faces}},
{faces, "", -1, []string{"☺", "☻", "☹"}},
{"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
{"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
{"1 2", " ", 3, []string{"1 ", "2"}},
{"123", "", 2, []string{"1", "23"}},
{"123", "", 17, []string{"1", "2", "3"}},
}
func TestSplitAfter {
for _, tt := range splitaftertests {
a := SplitAfterN([]byte(tt.s), []byte(tt.sep), tt.n)
// Appending to the results should not change future results.
x: []byte
for _, v := range a {
x = append(v, 'z')
}
result := sliceOfString(a)
if !eq(result, tt.a) {
assert(false)
//t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
//continue
}
if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
assert(false)
//t.Errorf("last appended result was %s; want %s", x, want)
}
s := Join(a, nil)
if string(s) != tt.s {
assert(false)
//t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
}
if tt.n < 0 {
//b := SplitAfter([]byte(tt.s), []byte(tt.sep))
//if !reflect.DeepEqual(a, b) {
// t.Errorf("SplitAfter disagrees withSplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
//}
}
}
}
type FieldsTest struct {
s: string
a: []string
}
global fieldstests = []FieldsTest{
{"", []string{}},
{" ", []string{}},
{" \t ", []string{}},
{" abc ", []string{"abc"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
{"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
{"\u2000\u2001\u2002", []string{}},
{"\n™\t™\n", []string{"™", "™"}},
{faces, []string{faces}},
}
func _TestFields {
for _, tt := range fieldstests {
b := []byte(tt.s)
a := Fields(b)
// Appending to the results should not change future results.
x: []byte
for _, v := range a {
x = append(v, 'z')
}
result := sliceOfString(a)
if !eq(result, tt.a) {
assert(false)
//t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
//continue
}
if string(b) != tt.s {
//t.Errorf("slice changed to %s; want %s", string(b), tt.s)
}
if len(tt.a) > 0 {
assert(false)
if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
assert(false)
// t.Errorf("last appended result was %s; want %s", x, want)
}
}
}
}
func TestEqualFold {
for _, tt := range EqualFoldTests {
if out := EqualFold([]byte(tt.s), []byte(tt.t)); out != tt.out {
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录