未验证 提交 4b7aef21 编写于 作者: F fatedier 提交者: GitHub

Merge pull request #1157 from fatedier/dev

bump version to v0.25.2
...@@ -18,6 +18,8 @@ import ( ...@@ -18,6 +18,8 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"io"
"io/ioutil"
"net" "net"
"net/http" "net/http"
"time" "time"
...@@ -170,6 +172,8 @@ func (monitor *HealthCheckMonitor) doHttpCheck(ctx context.Context) error { ...@@ -170,6 +172,8 @@ func (monitor *HealthCheckMonitor) doHttpCheck(ctx context.Context) error {
if err != nil { if err != nil {
return err return err
} }
defer resp.Body.Close()
io.Copy(ioutil.Discard, resp.Body)
if resp.StatusCode/100 != 2 { if resp.StatusCode/100 != 2 {
return fmt.Errorf("do http health check, StatusCode is [%d] not 2xx", resp.StatusCode) return fmt.Errorf("do http health check, StatusCode is [%d] not 2xx", resp.StatusCode)
......
[Unit]
Description=Frp Client Service
After=network.target
[Service]
Type=simple
User=nobody
Restart=on-failure
RestartSec=5s
ExecStart=/usr/bin/frpc -c /etc/frp/frpc.ini
ExecReload=/usr/bin/frpc reload -c /etc/frp/frpc.ini
[Install]
WantedBy=multi-user.target
[Unit]
Description=Frp Client Service
After=network.target
[Service]
Type=idle
User=nobody
Restart=on-failure
RestartSec=5s
ExecStart=/usr/bin/frpc -c /etc/frp/%i.ini
ExecReload=/usr/bin/frpc reload -c /etc/frp/%i.ini
[Install]
WantedBy=multi-user.target
[Unit]
Description=Frp Server Service
After=network.target
[Service]
Type=simple
User=nobody
Restart=on-failure
RestartSec=5s
ExecStart=/usr/bin/frps -c /etc/frp/frps.ini
[Install]
WantedBy=multi-user.target
[Unit]
Description=Frp Server Service
After=network.target
[Service]
Type=simple
User=nobody
Restart=on-failure
RestartSec=5s
ExecStart=/usr/bin/frps -c /etc/frp/%i.ini
[Install]
WantedBy=multi-user.target
...@@ -7,13 +7,15 @@ require ( ...@@ -7,13 +7,15 @@ require (
github.com/davecgh/go-spew v1.1.0 // indirect github.com/davecgh/go-spew v1.1.0 // indirect
github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb
github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049 github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049
github.com/fatedier/kcp-go v0.0.0-20171023144637-cd167d2f15f4 github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible
github.com/golang/snappy v0.0.0-20170215233205-553a64147049 // indirect github.com/golang/snappy v0.0.0-20170215233205-553a64147049 // indirect
github.com/gorilla/context v1.1.1 // indirect github.com/gorilla/context v1.1.1 // indirect
github.com/gorilla/mux v1.6.2 github.com/gorilla/mux v1.6.2
github.com/gorilla/websocket v1.2.0 github.com/gorilla/websocket v1.2.0
github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d
github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/klauspost/cpuid v1.2.0 // indirect
github.com/klauspost/reedsolomon v1.9.1 // indirect
github.com/mattn/go-runewidth v0.0.4 // indirect github.com/mattn/go-runewidth v0.0.4 // indirect
github.com/pkg/errors v0.8.0 // indirect github.com/pkg/errors v0.8.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect
...@@ -23,7 +25,6 @@ require ( ...@@ -23,7 +25,6 @@ require (
github.com/spf13/pflag v1.0.1 // indirect github.com/spf13/pflag v1.0.1 // indirect
github.com/stretchr/testify v1.2.1 github.com/stretchr/testify v1.2.1
github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047 // indirect github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047 // indirect
github.com/templexxx/reedsolomon v0.0.0-20170926020725-5e06b81a1c76 // indirect
github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554 // indirect github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554 // indirect
github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8 // indirect github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8 // indirect
github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec
......
...@@ -3,7 +3,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs ...@@ -3,7 +3,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb/go.mod h1:wx3gB6dbIfBRcucp94PI9Bt3I0F2c/MyNEWuhzpWiwk= github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb/go.mod h1:wx3gB6dbIfBRcucp94PI9Bt3I0F2c/MyNEWuhzpWiwk=
github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049 h1:teH578mf2ii42NHhIp3PhgvjU5bv+NFMq9fSQR8NaG8= github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049 h1:teH578mf2ii42NHhIp3PhgvjU5bv+NFMq9fSQR8NaG8=
github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049/go.mod h1:DqIrnl0rp3Zybg9zbJmozTy1n8fYJoX+QoAj9slIkKM= github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049/go.mod h1:DqIrnl0rp3Zybg9zbJmozTy1n8fYJoX+QoAj9slIkKM=
github.com/fatedier/kcp-go v0.0.0-20171023144637-cd167d2f15f4/go.mod h1:YpCOaxj7vvMThhIQ9AfTOPW2sfztQR5WDfs7AflSy4s= github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible h1:pNNeBKz1jtMDupiwvtEGFTujA3J86xoEXGSkwVeYFsw=
github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible/go.mod h1:YpCOaxj7vvMThhIQ9AfTOPW2sfztQR5WDfs7AflSy4s=
github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
...@@ -13,6 +14,10 @@ github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d h1:kJCB4vdITiW1eC1 ...@@ -13,6 +14,10 @@ github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d h1:kJCB4vdITiW1eC1
github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/reedsolomon v1.9.1 h1:kYrT1MlR4JH6PqOpC+okdb9CDTcwEC/BqpzK4WFyXL8=
github.com/klauspost/reedsolomon v1.9.1/go.mod h1:CwCi+NUr9pqSVktrkN+Ondf06rkhYZ/pcNv7fu+8Un4=
github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y= github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y=
github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
...@@ -23,7 +28,6 @@ github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3 ...@@ -23,7 +28,6 @@ github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3
github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047/go.mod h1:wM7WEvslTq+iOEAMDLSzhVuOt5BRZ05WirO+b09GHQU= github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047/go.mod h1:wM7WEvslTq+iOEAMDLSzhVuOt5BRZ05WirO+b09GHQU=
github.com/templexxx/reedsolomon v0.0.0-20170926020725-5e06b81a1c76/go.mod h1:ToWcj2sZ6xHl14JjZiVDktYpFtrFZJXBlsu7TV23lNg=
github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554/go.mod h1:5XA7W9S6mni3h5uvOC75dA3m9CCCaS83lltmc0ukdi4= github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554/go.mod h1:5XA7W9S6mni3h5uvOC75dA3m9CCCaS83lltmc0ukdi4=
github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8/go.mod h1:XxO4hdhhrzAd+G4CjDqaOkd0hUzmtPR/d3EiBBMn/wc= github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8/go.mod h1:XxO4hdhhrzAd+G4CjDqaOkd0hUzmtPR/d3EiBBMn/wc=
github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec/go.mod h1:owBmyHYMLkxyrugmfwE/DLJyW8Ro9mkphwuVErQ0iUw= github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec/go.mod h1:owBmyHYMLkxyrugmfwE/DLJyW8Ro9mkphwuVErQ0iUw=
......
...@@ -19,7 +19,7 @@ import ( ...@@ -19,7 +19,7 @@ import (
"strings" "strings"
) )
var version string = "0.25.1" var version string = "0.25.2"
func Full() string { func Full() string {
return version return version
......
language: go language: go
go: go:
- 1.9 - 1.9.x
- 1.10.x
- 1.11.x
before_install: before_install:
- go get -t -v ./... - go get -t -v ./...
......
...@@ -20,24 +20,20 @@ ...@@ -20,24 +20,20 @@
**kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/). **kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/).
It provides **fast, ordered and error-checked** delivery of streams over **UDP** packets, has been well tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) are running with **kcp-go** at present, including applications like **online games, live broadcasting, file synchronization and network acceleration**. This library intents to provide a **smooth, resilient, ordered, error-checked and anonymous** delivery of streams over **UDP** packets, it has been battle-tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) have deployed **kcp-go** powered program in a variety of forms like **online games, live broadcasting, file synchronization and network acceleration**.
[Lastest Release](https://github.com/xtaci/kcp-go/releases) [Lastest Release](https://github.com/xtaci/kcp-go/releases)
## Features ## Features
1. Optimized for **Realtime Online Games, Audio/Video Streaming and Latency-Sensitive Distributed Consensus**. 1. Designed for **Latency-sensitive** scenarios.
1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with language specific optimizations.
1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core. 1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core.
1. Handles **>5K concurrent connections** on a single commodity server. 1. Handles **>5K concurrent connections** on a single commodity server.
1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn). 1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn).
1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction) 1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode. 1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode, which generates completely anonymous packet.
1. **Fixed number of goroutines** created for the entire server application, minimized goroutine context switch. 1. Only **A fixed number of goroutines** will be created for the entire server application, costs in **context switch** between goroutines have been taken into consideration.
1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with various improvements.
## Conventions
Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
## Documentation ## Documentation
...@@ -80,47 +76,59 @@ Server: [full demo](https://github.com/xtaci/kcptun/blob/master/server/main.go ...@@ -80,47 +76,59 @@ Server: [full demo](https://github.com/xtaci/kcptun/blob/master/server/main.go
lis, err := kcp.ListenWithOptions(":10000", nil, 10, 3) lis, err := kcp.ListenWithOptions(":10000", nil, 10, 3)
``` ```
## Performance ## Benchmark
``` ```
Model Name: MacBook Pro Model Name: MacBook Pro
Model Identifier: MacBookPro12,1 Model Identifier: MacBookPro14,1
Processor Name: Intel Core i5 Processor Name: Intel Core i5
Processor Speed: 2.7 GHz Processor Speed: 3.1 GHz
Number of Processors: 1 Number of Processors: 1
Total Number of Cores: 2 Total Number of Cores: 2
L2 Cache (per Core): 256 KB L2 Cache (per Core): 256 KB
L3 Cache: 3 MB L3 Cache: 4 MB
Memory: 8 GB Memory: 8 GB
``` ```
``` ```
$ go test -v -run=^$ -bench . $ go test -v -run=^$ -bench .
beginning tests, encryption:salsa20, fec:10/3 beginning tests, encryption:salsa20, fec:10/3
BenchmarkAES128-4 200000 8256 ns/op 363.33 MB/s 0 B/op 0 allocs/op goos: darwin
BenchmarkAES192-4 200000 9153 ns/op 327.74 MB/s 0 B/op 0 allocs/op goarch: amd64
BenchmarkAES256-4 200000 10079 ns/op 297.64 MB/s 0 B/op 0 allocs/op pkg: github.com/xtaci/kcp-go
BenchmarkTEA-4 100000 18643 ns/op 160.91 MB/s 0 B/op 0 allocs/op BenchmarkSM4-4 50000 32180 ns/op 93.23 MB/s 0 B/op 0 allocs/op
BenchmarkXOR-4 5000000 316 ns/op 9486.46 MB/s 0 B/op 0 allocs/op BenchmarkAES128-4 500000 3285 ns/op 913.21 MB/s 0 B/op 0 allocs/op
BenchmarkBlowfish-4 50000 35643 ns/op 84.17 MB/s 0 B/op 0 allocs/op BenchmarkAES192-4 300000 3623 ns/op 827.85 MB/s 0 B/op 0 allocs/op
BenchmarkNone-4 30000000 56.2 ns/op 53371.83 MB/s 0 B/op 0 allocs/op BenchmarkAES256-4 300000 3874 ns/op 774.20 MB/s 0 B/op 0 allocs/op
BenchmarkCast5-4 30000 44744 ns/op 67.05 MB/s 0 B/op 0 allocs/op BenchmarkTEA-4 100000 15384 ns/op 195.00 MB/s 0 B/op 0 allocs/op
Benchmark3DES-4 2000 639839 ns/op 4.69 MB/s 2 B/op 0 allocs/op BenchmarkXOR-4 20000000 89.9 ns/op 33372.00 MB/s 0 B/op 0 allocs/op
BenchmarkTwofish-4 30000 43368 ns/op 69.17 MB/s 0 B/op 0 allocs/op BenchmarkBlowfish-4 50000 26927 ns/op 111.41 MB/s 0 B/op 0 allocs/op
BenchmarkXTEA-4 30000 57673 ns/op 52.02 MB/s 0 B/op 0 allocs/op BenchmarkNone-4 30000000 45.7 ns/op 65597.94 MB/s 0 B/op 0 allocs/op
BenchmarkSalsa20-4 300000 3917 ns/op 765.80 MB/s 0 B/op 0 allocs/op BenchmarkCast5-4 50000 34258 ns/op 87.57 MB/s 0 B/op 0 allocs/op
BenchmarkFlush-4 10000000 226 ns/op 0 B/op 0 allocs/op Benchmark3DES-4 10000 117149 ns/op 25.61 MB/s 0 B/op 0 allocs/op
BenchmarkEchoSpeed4K-4 5000 300030 ns/op 13.65 MB/s 5672 B/op 177 allocs/op BenchmarkTwofish-4 50000 33538 ns/op 89.45 MB/s 0 B/op 0 allocs/op
BenchmarkEchoSpeed64K-4 500 3202335 ns/op 20.47 MB/s 73295 B/op 2198 allocs/op BenchmarkXTEA-4 30000 45666 ns/op 65.69 MB/s 0 B/op 0 allocs/op
BenchmarkEchoSpeed512K-4 50 24926924 ns/op 21.03 MB/s 659339 B/op 17602 allocs/op BenchmarkSalsa20-4 500000 3308 ns/op 906.76 MB/s 0 B/op 0 allocs/op
BenchmarkEchoSpeed1M-4 20 64857821 ns/op 16.17 MB/s 1772437 B/op 42869 allocs/op BenchmarkCRC32-4 20000000 65.2 ns/op 15712.43 MB/s
BenchmarkSinkSpeed4K-4 30000 50230 ns/op 81.54 MB/s 2058 B/op 48 allocs/op BenchmarkCsprngSystem-4 1000000 1150 ns/op 13.91 MB/s
BenchmarkSinkSpeed64K-4 2000 648718 ns/op 101.02 MB/s 31165 B/op 687 allocs/op BenchmarkCsprngMD5-4 10000000 145 ns/op 110.26 MB/s
BenchmarkSinkSpeed256K-4 300 4635905 ns/op 113.09 MB/s 286229 B/op 5516 allocs/op BenchmarkCsprngSHA1-4 10000000 158 ns/op 126.54 MB/s
BenchmarkSinkSpeed1M-4 200 9566933 ns/op 109.60 MB/s 463771 B/op 10701 allocs/op BenchmarkCsprngNonceMD5-4 10000000 153 ns/op 104.22 MB/s
BenchmarkCsprngNonceAES128-4 100000000 19.1 ns/op 837.81 MB/s
BenchmarkFECDecode-4 1000000 1119 ns/op 1339.61 MB/s 1606 B/op 2 allocs/op
BenchmarkFECEncode-4 2000000 832 ns/op 1801.83 MB/s 17 B/op 0 allocs/op
BenchmarkFlush-4 5000000 272 ns/op 0 B/op 0 allocs/op
BenchmarkEchoSpeed4K-4 5000 259617 ns/op 15.78 MB/s 5451 B/op 149 allocs/op
BenchmarkEchoSpeed64K-4 1000 1706084 ns/op 38.41 MB/s 56002 B/op 1604 allocs/op
BenchmarkEchoSpeed512K-4 100 14345505 ns/op 36.55 MB/s 482597 B/op 13045 allocs/op
BenchmarkEchoSpeed1M-4 30 34859104 ns/op 30.08 MB/s 1143773 B/op 27186 allocs/op
BenchmarkSinkSpeed4K-4 50000 31369 ns/op 130.57 MB/s 1566 B/op 30 allocs/op
BenchmarkSinkSpeed64K-4 5000 329065 ns/op 199.16 MB/s 21529 B/op 453 allocs/op
BenchmarkSinkSpeed256K-4 500 2373354 ns/op 220.91 MB/s 166332 B/op 3554 allocs/op
BenchmarkSinkSpeed1M-4 300 5117927 ns/op 204.88 MB/s 310378 B/op 6988 allocs/op
PASS PASS
ok _/Users/xtaci/.godeps/src/github.com/xtaci/kcp-go 39.689s ok github.com/xtaci/kcp-go 50.349s
``` ```
## Design Considerations ## Key Design Considerations
1. slice vs. container/list 1. slice vs. container/list
...@@ -139,7 +147,9 @@ List structure introduces **heavy cache misses** compared to slice which owns be ...@@ -139,7 +147,9 @@ List structure introduces **heavy cache misses** compared to slice which owns be
2. Timing accuracy vs. syscall clock_gettime 2. Timing accuracy vs. syscall clock_gettime
Timing is **critical** to **RTT estimator**, inaccurate timing introduces false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz), the benchmark for time.Now(): Timing is **critical** to **RTT estimator**, inaccurate timing leads to false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz).
The benchmark for time.Now() lies here:
https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
...@@ -147,14 +157,17 @@ https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go ...@@ -147,14 +157,17 @@ https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
BenchmarkNow-4 100000000 15.6 ns/op BenchmarkNow-4 100000000 15.6 ns/op
``` ```
In kcp-go, after each `kcp.output()` function call, current time will be updated upon return, and each `kcp.flush()` will get current time once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(no packet needs to be sent by `kcp.output()`), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**. In kcp-go, after each `kcp.output()` function call, current clock time will be updated upon return, and for a single `kcp.flush()` operation, current time will be queried from system once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(a fixed cost while no packet needs to be sent), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
## Connection Termination
Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
## Tuning ## FAQ
Q: I'm handling >5K connections on my server. the CPU utilization is high. Q: I'm handling >5K connections on my server, the CPU utilization is so high.
A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load. A: A standalone `agent` or `gate` server for running kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements(timing) which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load, but lower the performance.
## Who is using this? ## Who is using this?
...@@ -163,10 +176,9 @@ A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for C ...@@ -163,10 +176,9 @@ A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for C
3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan. 3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan.
4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing. 4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing.
5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization. 5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization.
6. https://play.google.com/store/apps/details?id=com.k17game.k3 -- Battle Zone - Earth 2048, a world-wide strategy game.
## Links ## Links
1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++ 1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol 2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
3. https://github.com/templexxx/reedsolomon -- Reed-Solomon Erasure Coding in Go 3. https://github.com/klauspost/reedsolomon -- Reed-Solomon Erasure Coding in Go
...@@ -57,8 +57,8 @@ func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) { ...@@ -57,8 +57,8 @@ func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
} }
type sm4BlockCrypt struct { type sm4BlockCrypt struct {
encbuf []byte encbuf [sm4.BlockSize]byte
decbuf []byte decbuf [2 * sm4.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -70,17 +70,15 @@ func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -70,17 +70,15 @@ func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, sm4.BlockSize)
c.decbuf = make([]byte, 2*sm4.BlockSize)
return c, nil return c, nil
} }
func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type twofishBlockCrypt struct { type twofishBlockCrypt struct {
encbuf []byte encbuf [twofish.BlockSize]byte
decbuf []byte decbuf [2 * twofish.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -92,17 +90,15 @@ func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -92,17 +90,15 @@ func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, twofish.BlockSize)
c.decbuf = make([]byte, 2*twofish.BlockSize)
return c, nil return c, nil
} }
func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type tripleDESBlockCrypt struct { type tripleDESBlockCrypt struct {
encbuf []byte encbuf [des.BlockSize]byte
decbuf []byte decbuf [2 * des.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -114,17 +110,15 @@ func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -114,17 +110,15 @@ func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, des.BlockSize)
c.decbuf = make([]byte, 2*des.BlockSize)
return c, nil return c, nil
} }
func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type cast5BlockCrypt struct { type cast5BlockCrypt struct {
encbuf []byte encbuf [cast5.BlockSize]byte
decbuf []byte decbuf [2 * cast5.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -136,17 +130,15 @@ func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -136,17 +130,15 @@ func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, cast5.BlockSize)
c.decbuf = make([]byte, 2*cast5.BlockSize)
return c, nil return c, nil
} }
func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type blowfishBlockCrypt struct { type blowfishBlockCrypt struct {
encbuf []byte encbuf [blowfish.BlockSize]byte
decbuf []byte decbuf [2 * blowfish.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -158,17 +150,15 @@ func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -158,17 +150,15 @@ func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, blowfish.BlockSize)
c.decbuf = make([]byte, 2*blowfish.BlockSize)
return c, nil return c, nil
} }
func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type aesBlockCrypt struct { type aesBlockCrypt struct {
encbuf []byte encbuf [aes.BlockSize]byte
decbuf []byte decbuf [2 * aes.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -180,17 +170,15 @@ func NewAESBlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -180,17 +170,15 @@ func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, aes.BlockSize)
c.decbuf = make([]byte, 2*aes.BlockSize)
return c, nil return c, nil
} }
func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type teaBlockCrypt struct { type teaBlockCrypt struct {
encbuf []byte encbuf [tea.BlockSize]byte
decbuf []byte decbuf [2 * tea.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -202,17 +190,15 @@ func NewTEABlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -202,17 +190,15 @@ func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, tea.BlockSize)
c.decbuf = make([]byte, 2*tea.BlockSize)
return c, nil return c, nil
} }
func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type xteaBlockCrypt struct { type xteaBlockCrypt struct {
encbuf []byte encbuf [xtea.BlockSize]byte
decbuf []byte decbuf [2 * xtea.BlockSize]byte
block cipher.Block block cipher.Block
} }
...@@ -224,13 +210,11 @@ func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) { ...@@ -224,13 +210,11 @@ func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
return nil, err return nil, err
} }
c.block = block c.block = block
c.encbuf = make([]byte, xtea.BlockSize)
c.decbuf = make([]byte, 2*xtea.BlockSize)
return c, nil return c, nil
} }
func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
type simpleXORBlockCrypt struct { type simpleXORBlockCrypt struct {
xortbl []byte xortbl []byte
...@@ -258,31 +242,544 @@ func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) } ...@@ -258,31 +242,544 @@ func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
// packet encryption with local CFB mode // packet encryption with local CFB mode
func encrypt(block cipher.Block, dst, src, buf []byte) { func encrypt(block cipher.Block, dst, src, buf []byte) {
switch block.BlockSize() {
case 8:
encrypt8(block, dst, src, buf)
case 16:
encrypt16(block, dst, src, buf)
default:
encryptVariant(block, dst, src, buf)
}
}
// optimized encryption for the ciphers which works in 8-bytes
func encrypt8(block cipher.Block, dst, src, buf []byte) {
tbl := buf[:8]
block.Encrypt(tbl, initialVector)
n := len(src) / 8
base := 0
repeat := n / 8
left := n % 8
for i := 0; i < repeat; i++ {
s := src[base:][0:64]
d := dst[base:][0:64]
// 1
xor.BytesSrc1(d[0:8], s[0:8], tbl)
block.Encrypt(tbl, d[0:8])
// 2
xor.BytesSrc1(d[8:16], s[8:16], tbl)
block.Encrypt(tbl, d[8:16])
// 3
xor.BytesSrc1(d[16:24], s[16:24], tbl)
block.Encrypt(tbl, d[16:24])
// 4
xor.BytesSrc1(d[24:32], s[24:32], tbl)
block.Encrypt(tbl, d[24:32])
// 5
xor.BytesSrc1(d[32:40], s[32:40], tbl)
block.Encrypt(tbl, d[32:40])
// 6
xor.BytesSrc1(d[40:48], s[40:48], tbl)
block.Encrypt(tbl, d[40:48])
// 7
xor.BytesSrc1(d[48:56], s[48:56], tbl)
block.Encrypt(tbl, d[48:56])
// 8
xor.BytesSrc1(d[56:64], s[56:64], tbl)
block.Encrypt(tbl, d[56:64])
base += 64
}
switch left {
case 7:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 8
fallthrough
case 6:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 8
fallthrough
case 5:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 8
fallthrough
case 4:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 8
fallthrough
case 3:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 8
fallthrough
case 2:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 8
fallthrough
case 1:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 8
fallthrough
case 0:
xor.BytesSrc0(dst[base:], src[base:], tbl)
}
}
// optimized encryption for the ciphers which works in 16-bytes
func encrypt16(block cipher.Block, dst, src, buf []byte) {
tbl := buf[:16]
block.Encrypt(tbl, initialVector)
n := len(src) / 16
base := 0
repeat := n / 8
left := n % 8
for i := 0; i < repeat; i++ {
s := src[base:][0:128]
d := dst[base:][0:128]
// 1
xor.BytesSrc1(d[0:16], s[0:16], tbl)
block.Encrypt(tbl, d[0:16])
// 2
xor.BytesSrc1(d[16:32], s[16:32], tbl)
block.Encrypt(tbl, d[16:32])
// 3
xor.BytesSrc1(d[32:48], s[32:48], tbl)
block.Encrypt(tbl, d[32:48])
// 4
xor.BytesSrc1(d[48:64], s[48:64], tbl)
block.Encrypt(tbl, d[48:64])
// 5
xor.BytesSrc1(d[64:80], s[64:80], tbl)
block.Encrypt(tbl, d[64:80])
// 6
xor.BytesSrc1(d[80:96], s[80:96], tbl)
block.Encrypt(tbl, d[80:96])
// 7
xor.BytesSrc1(d[96:112], s[96:112], tbl)
block.Encrypt(tbl, d[96:112])
// 8
xor.BytesSrc1(d[112:128], s[112:128], tbl)
block.Encrypt(tbl, d[112:128])
base += 128
}
switch left {
case 7:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 16
fallthrough
case 6:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 16
fallthrough
case 5:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 16
fallthrough
case 4:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 16
fallthrough
case 3:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 16
fallthrough
case 2:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 16
fallthrough
case 1:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += 16
fallthrough
case 0:
xor.BytesSrc0(dst[base:], src[base:], tbl)
}
}
func encryptVariant(block cipher.Block, dst, src, buf []byte) {
blocksize := block.BlockSize() blocksize := block.BlockSize()
tbl := buf[:blocksize] tbl := buf[:blocksize]
block.Encrypt(tbl, initialVector) block.Encrypt(tbl, initialVector)
n := len(src) / blocksize n := len(src) / blocksize
base := 0 base := 0
for i := 0; i < n; i++ { repeat := n / 8
left := n % 8
for i := 0; i < repeat; i++ {
// 1
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
// 2
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
// 3
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
// 4
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
// 5
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
// 6
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
// 7
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
// 8
xor.BytesSrc1(dst[base:], src[base:], tbl) xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:]) block.Encrypt(tbl, dst[base:])
base += blocksize base += blocksize
} }
xor.BytesSrc0(dst[base:], src[base:], tbl)
switch left {
case 7:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
fallthrough
case 6:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
fallthrough
case 5:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
fallthrough
case 4:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
fallthrough
case 3:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
fallthrough
case 2:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
fallthrough
case 1:
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
fallthrough
case 0:
xor.BytesSrc0(dst[base:], src[base:], tbl)
}
} }
// decryption
func decrypt(block cipher.Block, dst, src, buf []byte) { func decrypt(block cipher.Block, dst, src, buf []byte) {
switch block.BlockSize() {
case 8:
decrypt8(block, dst, src, buf)
case 16:
decrypt16(block, dst, src, buf)
default:
decryptVariant(block, dst, src, buf)
}
}
func decrypt8(block cipher.Block, dst, src, buf []byte) {
tbl := buf[0:8]
next := buf[8:16]
block.Encrypt(tbl, initialVector)
n := len(src) / 8
base := 0
repeat := n / 8
left := n % 8
for i := 0; i < repeat; i++ {
s := src[base:][0:64]
d := dst[base:][0:64]
// 1
block.Encrypt(next, s[0:8])
xor.BytesSrc1(d[0:8], s[0:8], tbl)
// 2
block.Encrypt(tbl, s[8:16])
xor.BytesSrc1(d[8:16], s[8:16], next)
// 3
block.Encrypt(next, s[16:24])
xor.BytesSrc1(d[16:24], s[16:24], tbl)
// 4
block.Encrypt(tbl, s[24:32])
xor.BytesSrc1(d[24:32], s[24:32], next)
// 5
block.Encrypt(next, s[32:40])
xor.BytesSrc1(d[32:40], s[32:40], tbl)
// 6
block.Encrypt(tbl, s[40:48])
xor.BytesSrc1(d[40:48], s[40:48], next)
// 7
block.Encrypt(next, s[48:56])
xor.BytesSrc1(d[48:56], s[48:56], tbl)
// 8
block.Encrypt(tbl, s[56:64])
xor.BytesSrc1(d[56:64], s[56:64], next)
base += 64
}
switch left {
case 7:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 8
fallthrough
case 6:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 8
fallthrough
case 5:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 8
fallthrough
case 4:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 8
fallthrough
case 3:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 8
fallthrough
case 2:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 8
fallthrough
case 1:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 8
fallthrough
case 0:
xor.BytesSrc0(dst[base:], src[base:], tbl)
}
}
func decrypt16(block cipher.Block, dst, src, buf []byte) {
tbl := buf[0:16]
next := buf[16:32]
block.Encrypt(tbl, initialVector)
n := len(src) / 16
base := 0
repeat := n / 8
left := n % 8
for i := 0; i < repeat; i++ {
s := src[base:][0:128]
d := dst[base:][0:128]
// 1
block.Encrypt(next, s[0:16])
xor.BytesSrc1(d[0:16], s[0:16], tbl)
// 2
block.Encrypt(tbl, s[16:32])
xor.BytesSrc1(d[16:32], s[16:32], next)
// 3
block.Encrypt(next, s[32:48])
xor.BytesSrc1(d[32:48], s[32:48], tbl)
// 4
block.Encrypt(tbl, s[48:64])
xor.BytesSrc1(d[48:64], s[48:64], next)
// 5
block.Encrypt(next, s[64:80])
xor.BytesSrc1(d[64:80], s[64:80], tbl)
// 6
block.Encrypt(tbl, s[80:96])
xor.BytesSrc1(d[80:96], s[80:96], next)
// 7
block.Encrypt(next, s[96:112])
xor.BytesSrc1(d[96:112], s[96:112], tbl)
// 8
block.Encrypt(tbl, s[112:128])
xor.BytesSrc1(d[112:128], s[112:128], next)
base += 128
}
switch left {
case 7:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 16
fallthrough
case 6:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 16
fallthrough
case 5:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 16
fallthrough
case 4:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 16
fallthrough
case 3:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 16
fallthrough
case 2:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 16
fallthrough
case 1:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += 16
fallthrough
case 0:
xor.BytesSrc0(dst[base:], src[base:], tbl)
}
}
func decryptVariant(block cipher.Block, dst, src, buf []byte) {
blocksize := block.BlockSize() blocksize := block.BlockSize()
tbl := buf[:blocksize] tbl := buf[:blocksize]
next := buf[blocksize:] next := buf[blocksize:]
block.Encrypt(tbl, initialVector) block.Encrypt(tbl, initialVector)
n := len(src) / blocksize n := len(src) / blocksize
base := 0 base := 0
for i := 0; i < n; i++ { repeat := n / 8
left := n % 8
for i := 0; i < repeat; i++ {
// 1
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
base += blocksize
// 2
block.Encrypt(tbl, src[base:])
xor.BytesSrc1(dst[base:], src[base:], next)
base += blocksize
// 3
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
base += blocksize
// 4
block.Encrypt(tbl, src[base:])
xor.BytesSrc1(dst[base:], src[base:], next)
base += blocksize
// 5
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
base += blocksize
// 6
block.Encrypt(tbl, src[base:])
xor.BytesSrc1(dst[base:], src[base:], next)
base += blocksize
// 7
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
base += blocksize
// 8
block.Encrypt(tbl, src[base:])
xor.BytesSrc1(dst[base:], src[base:], next)
base += blocksize
}
switch left {
case 7:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
fallthrough
case 6:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
fallthrough
case 5:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
fallthrough
case 4:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
fallthrough
case 3:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
fallthrough
case 2:
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
fallthrough
case 1:
block.Encrypt(next, src[base:]) block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl) xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl tbl, next = next, tbl
base += blocksize base += blocksize
fallthrough
case 0:
xor.BytesSrc0(dst[base:], src[base:], tbl)
} }
xor.BytesSrc0(dst[base:], src[base:], tbl)
} }
package kcp
import (
"crypto/aes"
"crypto/cipher"
"crypto/md5"
"crypto/rand"
"io"
)
// Entropy defines a entropy source
type Entropy interface {
Init()
Fill(nonce []byte)
}
// nonceMD5 nonce generator for packet header
type nonceMD5 struct {
seed [md5.Size]byte
}
func (n *nonceMD5) Init() { /*nothing required*/ }
func (n *nonceMD5) Fill(nonce []byte) {
if n.seed[0] == 0 { // entropy update
io.ReadFull(rand.Reader, n.seed[:])
}
n.seed = md5.Sum(n.seed[:])
copy(nonce, n.seed[:])
}
// nonceAES128 nonce generator for packet headers
type nonceAES128 struct {
seed [aes.BlockSize]byte
block cipher.Block
}
func (n *nonceAES128) Init() {
var key [16]byte //aes-128
io.ReadFull(rand.Reader, key[:])
io.ReadFull(rand.Reader, n.seed[:])
block, _ := aes.NewCipher(key[:])
n.block = block
}
func (n *nonceAES128) Fill(nonce []byte) {
if n.seed[0] == 0 { // entropy update
io.ReadFull(rand.Reader, n.seed[:])
}
n.block.Encrypt(n.seed[:], n.seed[:])
copy(nonce, n.seed[:])
}
...@@ -4,7 +4,7 @@ import ( ...@@ -4,7 +4,7 @@ import (
"encoding/binary" "encoding/binary"
"sync/atomic" "sync/atomic"
"github.com/templexxx/reedsolomon" "github.com/klauspost/reedsolomon"
) )
const ( const (
...@@ -34,6 +34,9 @@ type ( ...@@ -34,6 +34,9 @@ type (
decodeCache [][]byte decodeCache [][]byte
flagCache []bool flagCache []bool
// zeros
zeros []byte
// RS decoder // RS decoder
codec reedsolomon.Encoder codec reedsolomon.Encoder
} }
...@@ -47,19 +50,20 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder { ...@@ -47,19 +50,20 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
return nil return nil
} }
fec := new(fecDecoder) dec := new(fecDecoder)
fec.rxlimit = rxlimit dec.rxlimit = rxlimit
fec.dataShards = dataShards dec.dataShards = dataShards
fec.parityShards = parityShards dec.parityShards = parityShards
fec.shardSize = dataShards + parityShards dec.shardSize = dataShards + parityShards
enc, err := reedsolomon.New(dataShards, parityShards) codec, err := reedsolomon.New(dataShards, parityShards)
if err != nil { if err != nil {
return nil return nil
} }
fec.codec = enc dec.codec = codec
fec.decodeCache = make([][]byte, fec.shardSize) dec.decodeCache = make([][]byte, dec.shardSize)
fec.flagCache = make([]bool, fec.shardSize) dec.flagCache = make([]bool, dec.shardSize)
return fec dec.zeros = make([]byte, mtuLimit)
return dec
} }
// decodeBytes a fec packet // decodeBytes a fec packet
...@@ -116,7 +120,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) { ...@@ -116,7 +120,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
if searchEnd-searchBegin+1 >= dec.dataShards { if searchEnd-searchBegin+1 >= dec.dataShards {
var numshard, numDataShard, first, maxlen int var numshard, numDataShard, first, maxlen int
// zero cache // zero caches
shards := dec.decodeCache shards := dec.decodeCache
shardsflag := dec.flagCache shardsflag := dec.flagCache
for k := range dec.decodeCache { for k := range dec.decodeCache {
...@@ -146,15 +150,15 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) { ...@@ -146,15 +150,15 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
} }
if numDataShard == dec.dataShards { if numDataShard == dec.dataShards {
// case 1: no lost data shards // case 1: no loss on data shards
dec.rx = dec.freeRange(first, numshard, dec.rx) dec.rx = dec.freeRange(first, numshard, dec.rx)
} else if numshard >= dec.dataShards { } else if numshard >= dec.dataShards {
// case 2: data shard lost, but recoverable from parity shard // case 2: loss on data shards, but it's recoverable from parity shards
for k := range shards { for k := range shards {
if shards[k] != nil { if shards[k] != nil {
dlen := len(shards[k]) dlen := len(shards[k])
shards[k] = shards[k][:maxlen] shards[k] = shards[k][:maxlen]
xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:]) copy(shards[k][dlen:], dec.zeros)
} }
} }
if err := dec.codec.ReconstructData(shards); err == nil { if err := dec.codec.ReconstructData(shards); err == nil {
...@@ -170,7 +174,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) { ...@@ -170,7 +174,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
// keep rxlimit // keep rxlimit
if len(dec.rx) > dec.rxlimit { if len(dec.rx) > dec.rxlimit {
if dec.rx[0].flag == typeData { // record unrecoverable data if dec.rx[0].flag == typeData { // track the unrecoverable data
atomic.AddUint64(&DefaultSnmp.FECShortShards, 1) atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
} }
dec.rx = dec.freeRange(0, 1, dec.rx) dec.rx = dec.freeRange(0, 1, dec.rx)
...@@ -180,7 +184,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) { ...@@ -180,7 +184,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
// free a range of fecPacket, and zero for GC recycling // free a range of fecPacket, and zero for GC recycling
func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket { func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
for i := first; i < first+n; i++ { // free for i := first; i < first+n; i++ { // recycle buffer
xmitBuf.Put(q[i].data) xmitBuf.Put(q[i].data)
} }
copy(q[first:], q[first+n:]) copy(q[first:], q[first+n:])
...@@ -200,7 +204,7 @@ type ( ...@@ -200,7 +204,7 @@ type (
next uint32 // next seqid next uint32 // next seqid
shardCount int // count the number of datashards collected shardCount int // count the number of datashards collected
maxSize int // record maximum data length in datashard maxSize int // track maximum data length in datashard
headerOffset int // FEC header offset headerOffset int // FEC header offset
payloadOffset int // FEC payload offset payloadOffset int // FEC payload offset
...@@ -209,6 +213,9 @@ type ( ...@@ -209,6 +213,9 @@ type (
shardCache [][]byte shardCache [][]byte
encodeCache [][]byte encodeCache [][]byte
// zeros
zeros []byte
// RS encoder // RS encoder
codec reedsolomon.Encoder codec reedsolomon.Encoder
} }
...@@ -218,31 +225,32 @@ func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder { ...@@ -218,31 +225,32 @@ func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
if dataShards <= 0 || parityShards <= 0 { if dataShards <= 0 || parityShards <= 0 {
return nil return nil
} }
fec := new(fecEncoder) enc := new(fecEncoder)
fec.dataShards = dataShards enc.dataShards = dataShards
fec.parityShards = parityShards enc.parityShards = parityShards
fec.shardSize = dataShards + parityShards enc.shardSize = dataShards + parityShards
fec.paws = (0xffffffff/uint32(fec.shardSize) - 1) * uint32(fec.shardSize) enc.paws = (0xffffffff/uint32(enc.shardSize) - 1) * uint32(enc.shardSize)
fec.headerOffset = offset enc.headerOffset = offset
fec.payloadOffset = fec.headerOffset + fecHeaderSize enc.payloadOffset = enc.headerOffset + fecHeaderSize
enc, err := reedsolomon.New(dataShards, parityShards) codec, err := reedsolomon.New(dataShards, parityShards)
if err != nil { if err != nil {
return nil return nil
} }
fec.codec = enc enc.codec = codec
// caches // caches
fec.encodeCache = make([][]byte, fec.shardSize) enc.encodeCache = make([][]byte, enc.shardSize)
fec.shardCache = make([][]byte, fec.shardSize) enc.shardCache = make([][]byte, enc.shardSize)
for k := range fec.shardCache { for k := range enc.shardCache {
fec.shardCache[k] = make([]byte, mtuLimit) enc.shardCache[k] = make([]byte, mtuLimit)
} }
return fec enc.zeros = make([]byte, mtuLimit)
return enc
} }
// encode the packet, output parity shards if we have enough datashards // encodes the packet, outputs parity shards if we have collected quorum datashards
// the content of returned parityshards will change in next encode // notice: the contents of 'ps' will be re-written in successive calling
func (enc *fecEncoder) encode(b []byte) (ps [][]byte) { func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
enc.markData(b[enc.headerOffset:]) enc.markData(b[enc.headerOffset:])
binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:]))) binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
...@@ -253,18 +261,18 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) { ...@@ -253,18 +261,18 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
copy(enc.shardCache[enc.shardCount], b) copy(enc.shardCache[enc.shardCount], b)
enc.shardCount++ enc.shardCount++
// record max datashard length // track max datashard length
if sz > enc.maxSize { if sz > enc.maxSize {
enc.maxSize = sz enc.maxSize = sz
} }
// calculate Reed-Solomon Erasure Code // Generation of Reed-Solomon Erasure Code
if enc.shardCount == enc.dataShards { if enc.shardCount == enc.dataShards {
// bzero each datashard's tail // fill '0' into the tail of each datashard
for i := 0; i < enc.dataShards; i++ { for i := 0; i < enc.dataShards; i++ {
shard := enc.shardCache[i] shard := enc.shardCache[i]
slen := len(shard) slen := len(shard)
xorBytes(shard[slen:enc.maxSize], shard[slen:enc.maxSize], shard[slen:enc.maxSize]) copy(shard[slen:enc.maxSize], enc.zeros)
} }
// construct equal-sized slice with stripped header // construct equal-sized slice with stripped header
...@@ -273,7 +281,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) { ...@@ -273,7 +281,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize] cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize]
} }
// rs encode // encoding
if err := enc.codec.Encode(cache); err == nil { if err := enc.codec.Encode(cache); err == nil {
ps = enc.shardCache[enc.dataShards:] ps = enc.shardCache[enc.dataShards:]
for k := range ps { for k := range ps {
...@@ -282,7 +290,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) { ...@@ -282,7 +290,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
} }
} }
// reset counters to zero // counters resetting
enc.shardCount = 0 enc.shardCount = 0
enc.maxSize = 0 enc.maxSize = 0
} }
......
...@@ -104,6 +104,7 @@ type segment struct { ...@@ -104,6 +104,7 @@ type segment struct {
xmit uint32 xmit uint32
resendts uint32 resendts uint32
fastack uint32 fastack uint32
acked uint32 // mark if the seg has acked
data []byte data []byte
} }
...@@ -181,8 +182,11 @@ func (kcp *KCP) newSegment(size int) (seg segment) { ...@@ -181,8 +182,11 @@ func (kcp *KCP) newSegment(size int) (seg segment) {
} }
// delSegment recycles a KCP segment // delSegment recycles a KCP segment
func (kcp *KCP) delSegment(seg segment) { func (kcp *KCP) delSegment(seg *segment) {
xmitBuf.Put(seg.data) if seg.data != nil {
xmitBuf.Put(seg.data)
seg.data = nil
}
} }
// PeekSize checks the size of next message in the recv queue // PeekSize checks the size of next message in the recv queue
...@@ -238,7 +242,7 @@ func (kcp *KCP) Recv(buffer []byte) (n int) { ...@@ -238,7 +242,7 @@ func (kcp *KCP) Recv(buffer []byte) (n int) {
buffer = buffer[len(seg.data):] buffer = buffer[len(seg.data):]
n += len(seg.data) n += len(seg.data)
count++ count++
kcp.delSegment(*seg) kcp.delSegment(seg)
if seg.frg == 0 { if seg.frg == 0 {
break break
} }
...@@ -382,10 +386,8 @@ func (kcp *KCP) parse_ack(sn uint32) { ...@@ -382,10 +386,8 @@ func (kcp *KCP) parse_ack(sn uint32) {
for k := range kcp.snd_buf { for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k] seg := &kcp.snd_buf[k]
if sn == seg.sn { if sn == seg.sn {
kcp.delSegment(*seg) seg.acked = 1
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:]) kcp.delSegment(seg)
kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
break break
} }
if _itimediff(sn, seg.sn) < 0 { if _itimediff(sn, seg.sn) < 0 {
...@@ -394,7 +396,7 @@ func (kcp *KCP) parse_ack(sn uint32) { ...@@ -394,7 +396,7 @@ func (kcp *KCP) parse_ack(sn uint32) {
} }
} }
func (kcp *KCP) parse_fastack(sn uint32) { func (kcp *KCP) parse_fastack(sn, ts uint32) {
if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 { if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
return return
} }
...@@ -403,7 +405,7 @@ func (kcp *KCP) parse_fastack(sn uint32) { ...@@ -403,7 +405,7 @@ func (kcp *KCP) parse_fastack(sn uint32) {
seg := &kcp.snd_buf[k] seg := &kcp.snd_buf[k]
if _itimediff(sn, seg.sn) < 0 { if _itimediff(sn, seg.sn) < 0 {
break break
} else if sn != seg.sn { } else if sn != seg.sn && _itimediff(seg.ts, ts) <= 0 {
seg.fastack++ seg.fastack++
} }
} }
...@@ -414,7 +416,7 @@ func (kcp *KCP) parse_una(una uint32) { ...@@ -414,7 +416,7 @@ func (kcp *KCP) parse_una(una uint32) {
for k := range kcp.snd_buf { for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k] seg := &kcp.snd_buf[k]
if _itimediff(una, seg.sn) > 0 { if _itimediff(una, seg.sn) > 0 {
kcp.delSegment(*seg) kcp.delSegment(seg)
count++ count++
} else { } else {
break break
...@@ -430,12 +432,12 @@ func (kcp *KCP) ack_push(sn, ts uint32) { ...@@ -430,12 +432,12 @@ func (kcp *KCP) ack_push(sn, ts uint32) {
kcp.acklist = append(kcp.acklist, ackItem{sn, ts}) kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
} }
func (kcp *KCP) parse_data(newseg segment) { // returns true if data has repeated
func (kcp *KCP) parse_data(newseg segment) bool {
sn := newseg.sn sn := newseg.sn
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 || if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
_itimediff(sn, kcp.rcv_nxt) < 0 { _itimediff(sn, kcp.rcv_nxt) < 0 {
kcp.delSegment(newseg) return true
return
} }
n := len(kcp.rcv_buf) - 1 n := len(kcp.rcv_buf) - 1
...@@ -445,7 +447,6 @@ func (kcp *KCP) parse_data(newseg segment) { ...@@ -445,7 +447,6 @@ func (kcp *KCP) parse_data(newseg segment) {
seg := &kcp.rcv_buf[i] seg := &kcp.rcv_buf[i]
if seg.sn == sn { if seg.sn == sn {
repeat = true repeat = true
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
break break
} }
if _itimediff(sn, seg.sn) > 0 { if _itimediff(sn, seg.sn) > 0 {
...@@ -455,6 +456,11 @@ func (kcp *KCP) parse_data(newseg segment) { ...@@ -455,6 +456,11 @@ func (kcp *KCP) parse_data(newseg segment) {
} }
if !repeat { if !repeat {
// replicate the content if it's new
dataCopy := xmitBuf.Get().([]byte)[:len(newseg.data)]
copy(dataCopy, newseg.data)
newseg.data = dataCopy
if insert_idx == n+1 { if insert_idx == n+1 {
kcp.rcv_buf = append(kcp.rcv_buf, newseg) kcp.rcv_buf = append(kcp.rcv_buf, newseg)
} else { } else {
...@@ -462,8 +468,6 @@ func (kcp *KCP) parse_data(newseg segment) { ...@@ -462,8 +468,6 @@ func (kcp *KCP) parse_data(newseg segment) {
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:]) copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
kcp.rcv_buf[insert_idx] = newseg kcp.rcv_buf[insert_idx] = newseg
} }
} else {
kcp.delSegment(newseg)
} }
// move available data from rcv_buf -> rcv_queue // move available data from rcv_buf -> rcv_queue
...@@ -481,18 +485,19 @@ func (kcp *KCP) parse_data(newseg segment) { ...@@ -481,18 +485,19 @@ func (kcp *KCP) parse_data(newseg segment) {
kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...) kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count) kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
} }
return repeat
} }
// Input when you received a low level packet (eg. UDP packet), call it // Input when you received a low level packet (eg. UDP packet), call it
// regular indicates a regular packet has received(not from FEC) // regular indicates a regular packet has received(not from FEC)
func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int { func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
una := kcp.snd_una snd_una := kcp.snd_una
if len(data) < IKCP_OVERHEAD { if len(data) < IKCP_OVERHEAD {
return -1 return -1
} }
var maxack uint32 var latest uint32 // the latest ack packet
var lastackts uint32
var flag int var flag int
var inSegs uint64 var inSegs uint64
...@@ -535,19 +540,15 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int { ...@@ -535,19 +540,15 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
if cmd == IKCP_CMD_ACK { if cmd == IKCP_CMD_ACK {
kcp.parse_ack(sn) kcp.parse_ack(sn)
kcp.shrink_buf() kcp.parse_fastack(sn, ts)
if flag == 0 { flag |= 1
flag = 1 latest = ts
maxack = sn
} else if _itimediff(sn, maxack) > 0 {
maxack = sn
}
lastackts = ts
} else if cmd == IKCP_CMD_PUSH { } else if cmd == IKCP_CMD_PUSH {
repeat := true
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 { if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
kcp.ack_push(sn, ts) kcp.ack_push(sn, ts)
if _itimediff(sn, kcp.rcv_nxt) >= 0 { if _itimediff(sn, kcp.rcv_nxt) >= 0 {
seg := kcp.newSegment(int(length)) var seg segment
seg.conv = conv seg.conv = conv
seg.cmd = cmd seg.cmd = cmd
seg.frg = frg seg.frg = frg
...@@ -555,12 +556,11 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int { ...@@ -555,12 +556,11 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
seg.ts = ts seg.ts = ts
seg.sn = sn seg.sn = sn
seg.una = una seg.una = una
copy(seg.data, data[:length]) seg.data = data[:length] // delayed data copying
kcp.parse_data(seg) repeat = kcp.parse_data(seg)
} else {
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
} }
} else { }
if regular && repeat {
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1) atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
} }
} else if cmd == IKCP_CMD_WASK { } else if cmd == IKCP_CMD_WASK {
...@@ -578,40 +578,42 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int { ...@@ -578,40 +578,42 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
} }
atomic.AddUint64(&DefaultSnmp.InSegs, inSegs) atomic.AddUint64(&DefaultSnmp.InSegs, inSegs)
// update rtt with the latest ts
// ignore the FEC packet
if flag != 0 && regular { if flag != 0 && regular {
kcp.parse_fastack(maxack)
current := currentMs() current := currentMs()
if _itimediff(current, lastackts) >= 0 { if _itimediff(current, latest) >= 0 {
kcp.update_ack(_itimediff(current, lastackts)) kcp.update_ack(_itimediff(current, latest))
} }
} }
if _itimediff(kcp.snd_una, una) > 0 { // cwnd update when packet arrived
if kcp.cwnd < kcp.rmt_wnd { if kcp.nocwnd == 0 {
mss := kcp.mss if _itimediff(kcp.snd_una, snd_una) > 0 {
if kcp.cwnd < kcp.ssthresh { if kcp.cwnd < kcp.rmt_wnd {
kcp.cwnd++ mss := kcp.mss
kcp.incr += mss if kcp.cwnd < kcp.ssthresh {
} else {
if kcp.incr < mss {
kcp.incr = mss
}
kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
if (kcp.cwnd+1)*mss <= kcp.incr {
kcp.cwnd++ kcp.cwnd++
kcp.incr += mss
} else {
if kcp.incr < mss {
kcp.incr = mss
}
kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
if (kcp.cwnd+1)*mss <= kcp.incr {
kcp.cwnd++
}
}
if kcp.cwnd > kcp.rmt_wnd {
kcp.cwnd = kcp.rmt_wnd
kcp.incr = kcp.rmt_wnd * mss
} }
}
if kcp.cwnd > kcp.rmt_wnd {
kcp.cwnd = kcp.rmt_wnd
kcp.incr = kcp.rmt_wnd * mss
} }
} }
} }
if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
kcp.flush(true) kcp.flush(true)
} else if kcp.rmt_wnd == 0 && len(kcp.acklist) > 0 { // window zero
kcp.flush(true)
} }
return 0 return 0
} }
...@@ -624,7 +626,7 @@ func (kcp *KCP) wnd_unused() uint16 { ...@@ -624,7 +626,7 @@ func (kcp *KCP) wnd_unused() uint16 {
} }
// flush pending data // flush pending data
func (kcp *KCP) flush(ackOnly bool) { func (kcp *KCP) flush(ackOnly bool) uint32 {
var seg segment var seg segment
seg.conv = kcp.conv seg.conv = kcp.conv
seg.cmd = IKCP_CMD_ACK seg.cmd = IKCP_CMD_ACK
...@@ -653,7 +655,7 @@ func (kcp *KCP) flush(ackOnly bool) { ...@@ -653,7 +655,7 @@ func (kcp *KCP) flush(ackOnly bool) {
if size > 0 { if size > 0 {
kcp.output(buffer, size) kcp.output(buffer, size)
} }
return return kcp.interval
} }
// probe window size (if remote window size equals zero) // probe window size (if remote window size equals zero)
...@@ -723,7 +725,6 @@ func (kcp *KCP) flush(ackOnly bool) { ...@@ -723,7 +725,6 @@ func (kcp *KCP) flush(ackOnly bool) {
kcp.snd_buf = append(kcp.snd_buf, newseg) kcp.snd_buf = append(kcp.snd_buf, newseg)
kcp.snd_nxt++ kcp.snd_nxt++
newSegsCount++ newSegsCount++
kcp.snd_queue[k].data = nil
} }
if newSegsCount > 0 { if newSegsCount > 0 {
kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount) kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount)
...@@ -738,9 +739,15 @@ func (kcp *KCP) flush(ackOnly bool) { ...@@ -738,9 +739,15 @@ func (kcp *KCP) flush(ackOnly bool) {
// check for retransmissions // check for retransmissions
current := currentMs() current := currentMs()
var change, lost, lostSegs, fastRetransSegs, earlyRetransSegs uint64 var change, lost, lostSegs, fastRetransSegs, earlyRetransSegs uint64
for k := range kcp.snd_buf { minrto := int32(kcp.interval)
segment := &kcp.snd_buf[k]
ref := kcp.snd_buf[:len(kcp.snd_buf)] // for bounds check elimination
for k := range ref {
segment := &ref[k]
needsend := false needsend := false
if segment.acked == 1 {
continue
}
if segment.xmit == 0 { // initial transmit if segment.xmit == 0 { // initial transmit
needsend = true needsend = true
segment.rto = kcp.rx_rto segment.rto = kcp.rx_rto
...@@ -772,6 +779,7 @@ func (kcp *KCP) flush(ackOnly bool) { ...@@ -772,6 +779,7 @@ func (kcp *KCP) flush(ackOnly bool) {
} }
if needsend { if needsend {
current = currentMs() // time update for a blocking call
segment.xmit++ segment.xmit++
segment.ts = current segment.ts = current
segment.wnd = seg.wnd segment.wnd = seg.wnd
...@@ -782,7 +790,6 @@ func (kcp *KCP) flush(ackOnly bool) { ...@@ -782,7 +790,6 @@ func (kcp *KCP) flush(ackOnly bool) {
if size+need > int(kcp.mtu) { if size+need > int(kcp.mtu) {
kcp.output(buffer, size) kcp.output(buffer, size)
current = currentMs() // time update for a blocking call
ptr = buffer ptr = buffer
} }
...@@ -794,6 +801,11 @@ func (kcp *KCP) flush(ackOnly bool) { ...@@ -794,6 +801,11 @@ func (kcp *KCP) flush(ackOnly bool) {
kcp.state = 0xFFFFFFFF kcp.state = 0xFFFFFFFF
} }
} }
// get the nearest rto
if rto := _itimediff(segment.resendts, current); rto > 0 && rto < minrto {
minrto = rto
}
} }
// flash remain segments // flash remain segments
...@@ -819,32 +831,37 @@ func (kcp *KCP) flush(ackOnly bool) { ...@@ -819,32 +831,37 @@ func (kcp *KCP) flush(ackOnly bool) {
atomic.AddUint64(&DefaultSnmp.RetransSegs, sum) atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
} }
// update ssthresh // cwnd update
// rate halving, https://tools.ietf.org/html/rfc6937 if kcp.nocwnd == 0 {
if change > 0 { // update ssthresh
inflight := kcp.snd_nxt - kcp.snd_una // rate halving, https://tools.ietf.org/html/rfc6937
kcp.ssthresh = inflight / 2 if change > 0 {
if kcp.ssthresh < IKCP_THRESH_MIN { inflight := kcp.snd_nxt - kcp.snd_una
kcp.ssthresh = IKCP_THRESH_MIN kcp.ssthresh = inflight / 2
if kcp.ssthresh < IKCP_THRESH_MIN {
kcp.ssthresh = IKCP_THRESH_MIN
}
kcp.cwnd = kcp.ssthresh + resent
kcp.incr = kcp.cwnd * kcp.mss
} }
kcp.cwnd = kcp.ssthresh + resent
kcp.incr = kcp.cwnd * kcp.mss
}
// congestion control, https://tools.ietf.org/html/rfc5681 // congestion control, https://tools.ietf.org/html/rfc5681
if lost > 0 { if lost > 0 {
kcp.ssthresh = cwnd / 2 kcp.ssthresh = cwnd / 2
if kcp.ssthresh < IKCP_THRESH_MIN { if kcp.ssthresh < IKCP_THRESH_MIN {
kcp.ssthresh = IKCP_THRESH_MIN kcp.ssthresh = IKCP_THRESH_MIN
}
kcp.cwnd = 1
kcp.incr = kcp.mss
} }
kcp.cwnd = 1
kcp.incr = kcp.mss
}
if kcp.cwnd < 1 { if kcp.cwnd < 1 {
kcp.cwnd = 1 kcp.cwnd = 1
kcp.incr = kcp.mss kcp.incr = kcp.mss
}
} }
return uint32(minrto)
} }
// Update updates state (call it repeatedly, every 10ms-100ms), or you can ask // Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
...@@ -991,8 +1008,5 @@ func (kcp *KCP) WaitSnd() int { ...@@ -991,8 +1008,5 @@ func (kcp *KCP) WaitSnd() int {
// remove front n elements from queue // remove front n elements from queue
func (kcp *KCP) remove_front(q []segment, n int) []segment { func (kcp *KCP) remove_front(q []segment, n int) []segment {
newn := copy(q, q[n:]) newn := copy(q, q[n:])
for i := newn; i < len(q); i++ {
q[i] = segment{} // manual set nil for GC
}
return q[:newn] return q[:newn]
} }
...@@ -85,20 +85,19 @@ func (h *updateHeap) updateTask() { ...@@ -85,20 +85,19 @@ func (h *updateHeap) updateTask() {
h.mu.Lock() h.mu.Lock()
hlen := h.Len() hlen := h.Len()
now := time.Now()
for i := 0; i < hlen; i++ { for i := 0; i < hlen; i++ {
entry := heap.Pop(h).(entry) entry := &h.entries[0]
if now.After(entry.ts) { if time.Now().After(entry.ts) {
entry.ts = now.Add(entry.s.update()) interval := entry.s.update()
heap.Push(h, entry) entry.ts = time.Now().Add(interval)
heap.Fix(h, 0)
} else { } else {
heap.Push(h, entry)
break break
} }
} }
if hlen > 0 { if hlen > 0 {
timer = time.After(h.entries[0].ts.Sub(now)) timer = time.After(h.entries[0].ts.Sub(time.Now()))
} }
h.mu.Unlock() h.mu.Unlock()
} }
......
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package kcp
import (
"runtime"
"unsafe"
)
const wordSize = int(unsafe.Sizeof(uintptr(0)))
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
// fastXORBytes xors in bulk. It only works on architectures that
// support unaligned read/writes.
func fastXORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
w := n / wordSize
if w > 0 {
wordBytes := w * wordSize
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
}
for i := (n - n%wordSize); i < n; i++ {
dst[i] = a[i] ^ b[i]
}
return n
}
func safeXORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
ex := n % 8
for i := 0; i < ex; i++ {
dst[i] = a[i] ^ b[i]
}
for i := ex; i < n; i += 8 {
_dst := dst[i : i+8]
_a := a[i : i+8]
_b := b[i : i+8]
_dst[0] = _a[0] ^ _b[0]
_dst[1] = _a[1] ^ _b[1]
_dst[2] = _a[2] ^ _b[2]
_dst[3] = _a[3] ^ _b[3]
_dst[4] = _a[4] ^ _b[4]
_dst[5] = _a[5] ^ _b[5]
_dst[6] = _a[6] ^ _b[6]
_dst[7] = _a[7] ^ _b[7]
}
return n
}
// xorBytes xors the bytes in a and b. The destination is assumed to have enough
// space. Returns the number of bytes xor'd.
func xorBytes(dst, a, b []byte) int {
if supportsUnaligned {
return fastXORBytes(dst, a, b)
}
// TODO(hanwen): if (dst, a, b) have common alignment
// we could still try fastXORBytes. It is not clear
// how often this happens, and it's only worth it if
// the block encryption itself is hardware
// accelerated.
return safeXORBytes(dst, a, b)
}
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
// The arguments are assumed to be of equal length.
func fastXORWords(dst, a, b []byte) {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
n := len(b) / wordSize
ex := n % 8
for i := 0; i < ex; i++ {
dw[i] = aw[i] ^ bw[i]
}
for i := ex; i < n; i += 8 {
_dw := dw[i : i+8]
_aw := aw[i : i+8]
_bw := bw[i : i+8]
_dw[0] = _aw[0] ^ _bw[0]
_dw[1] = _aw[1] ^ _bw[1]
_dw[2] = _aw[2] ^ _bw[2]
_dw[3] = _aw[3] ^ _bw[3]
_dw[4] = _aw[4] ^ _bw[4]
_dw[5] = _aw[5] ^ _bw[5]
_dw[6] = _aw[6] ^ _bw[6]
_dw[7] = _aw[7] ^ _bw[7]
}
}
func xorWords(dst, a, b []byte) {
if supportsUnaligned {
fastXORWords(dst, a, b)
} else {
safeXORBytes(dst, a, b)
}
}
...@@ -22,19 +22,3 @@ _testmain.go ...@@ -22,19 +22,3 @@ _testmain.go
*.exe *.exe
*.test *.test
*.prof *.prof
/.idea
/backup
/loopunroll/
cpu.out
mathtool/galois/
mathtool/matrix/
mem.out
/examples/
/.DS_Store
/mathtool/cntinverse
/invert
/bakcup
/buf.svg
*.svg
*.out
/escape
language: go
sudo: false
os:
- linux
- osx
go:
- 1.8.x
- 1.9.x
- 1.10.x
- master
script:
- go vet ./...
- go test -v ./...
- go test -race ./...
- diff <(gofmt -d .) <("")
matrix:
allow_failures:
- go: 'master'
fast_finish: true
Developer Certificate of Origin
Version 1.1
Copyright (C) 2015- Klaus Post & Contributors.
Email: klauspost@gmail.com
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
The MIT License (MIT)
Copyright (c) 2015 Klaus Post
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# cpuid
Package cpuid provides information about the CPU running the current program.
CPU features are detected on startup, and kept for fast access through the life of the application.
Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
You can access the CPU information by accessing the shared CPU variable of the cpuid library.
Package home: https://github.com/klauspost/cpuid
[![GoDoc][1]][2] [![Build Status][3]][4]
[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
[2]: https://godoc.org/github.com/klauspost/cpuid
[3]: https://travis-ci.org/klauspost/cpuid.svg
[4]: https://travis-ci.org/klauspost/cpuid
# features
## CPU Instructions
* **CMOV** (i686 CMOV)
* **NX** (NX (No-Execute) bit)
* **AMD3DNOW** (AMD 3DNOW)
* **AMD3DNOWEXT** (AMD 3DNowExt)
* **MMX** (standard MMX)
* **MMXEXT** (SSE integer functions or AMD MMX ext)
* **SSE** (SSE functions)
* **SSE2** (P4 SSE functions)
* **SSE3** (Prescott SSE3 functions)
* **SSSE3** (Conroe SSSE3 functions)
* **SSE4** (Penryn SSE4.1 functions)
* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
* **SSE42** (Nehalem SSE4.2 functions)
* **AVX** (AVX functions)
* **AVX2** (AVX2 functions)
* **FMA3** (Intel FMA 3)
* **FMA4** (Bulldozer FMA4 functions)
* **XOP** (Bulldozer XOP functions)
* **F16C** (Half-precision floating-point conversion)
* **BMI1** (Bit Manipulation Instruction Set 1)
* **BMI2** (Bit Manipulation Instruction Set 2)
* **TBM** (AMD Trailing Bit Manipulation)
* **LZCNT** (LZCNT instruction)
* **POPCNT** (POPCNT instruction)
* **AESNI** (Advanced Encryption Standard New Instructions)
* **CLMUL** (Carry-less Multiplication)
* **HTT** (Hyperthreading (enabled))
* **HLE** (Hardware Lock Elision)
* **RTM** (Restricted Transactional Memory)
* **RDRAND** (RDRAND instruction is available)
* **RDSEED** (RDSEED instruction is available)
* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
* **SHA** (Intel SHA Extensions)
* **AVX512F** (AVX-512 Foundation)
* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
* **AVX512PF** (AVX-512 Prefetch Instructions)
* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
* **AVX512CD** (AVX-512 Conflict Detection Instructions)
* **AVX512BW** (AVX-512 Byte and Word Instructions)
* **AVX512VL** (AVX-512 Vector Length Extensions)
* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
* **MPX** (Intel MPX (Memory Protection Extensions))
* **ERMS** (Enhanced REP MOVSB/STOSB)
* **RDTSCP** (RDTSCP Instruction)
* **CX16** (CMPXCHG16B Instruction)
* **SGX** (Software Guard Extensions, with activation details)
## Performance
* **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
* **SSE2SLOW** (SSE2 is supported, but usually not faster)
* **SSE3SLOW** (SSE3 is supported, but usually not faster)
* **ATOM** (Atom processor, some SSSE3 instructions are slower)
* **Cache line** (Probable size of a cache line).
* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
## Cpu Vendor/VM
* **Intel**
* **AMD**
* **VIA**
* **Transmeta**
* **NSC**
* **KVM** (Kernel-based Virtual Machine)
* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
* **VMware**
* **XenHVM**
# installing
```go get github.com/klauspost/cpuid```
# example
```Go
package main
import (
"fmt"
"github.com/klauspost/cpuid"
)
func main() {
// Print basic CPU information:
fmt.Println("Name:", cpuid.CPU.BrandName)
fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
fmt.Println("Features:", cpuid.CPU.Features)
fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
// Test if we have a specific feature:
if cpuid.CPU.SSE() {
fmt.Println("We have Streaming SIMD Extensions")
}
}
```
Sample output:
```
>go run main.go
Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
PhysicalCores: 2
ThreadsPerCore: 2
LogicalCores: 4
Family 6 Model: 42
Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
Cacheline bytes: 64
We have Streaming SIMD Extensions
```
# private package
In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
For this purpose all exports are removed, and functions and constants are lowercased.
This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
# license
This code is published under an MIT license. See LICENSE file for more information.
此差异已折叠。
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build amd64,!gccgo
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmXgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo amd64,!gccgo
package cpuid
func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func asmXgetbv(index uint32) (eax, edx uint32)
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
func initCPU() {
cpuid = asmCpuid
cpuidex = asmCpuidex
xgetbv = asmXgetbv
rdtscpAsm = asmRdtscpAsm
}
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build !amd64,!386 gccgo
package cpuid
func initCPU() {
cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
xgetbv = func(index uint32) (eax, edx uint32) {
return 0, 0
}
rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
}
package cpuid
//go:generate go run private-gen.go
//go:generate gofmt -w ./private
// +build ignore
package main
import (
"bytes"
"fmt"
"go/ast"
"go/parser"
"go/printer"
"go/token"
"io"
"io/ioutil"
"log"
"os"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
var inFiles = []string{"cpuid.go", "cpuid_test.go"}
var copyFiles = []string{"cpuid_amd64.s", "cpuid_386.s", "detect_ref.go", "detect_intel.go"}
var fileSet = token.NewFileSet()
var reWrites = []rewrite{
initRewrite("CPUInfo -> cpuInfo"),
initRewrite("Vendor -> vendor"),
initRewrite("Flags -> flags"),
initRewrite("Detect -> detect"),
initRewrite("CPU -> cpu"),
}
var excludeNames = map[string]bool{"string": true, "join": true, "trim": true,
// cpuid_test.go
"t": true, "println": true, "logf": true, "log": true, "fatalf": true, "fatal": true,
}
var excludePrefixes = []string{"test", "benchmark"}
func main() {
Package := "private"
parserMode := parser.ParseComments
exported := make(map[string]rewrite)
for _, file := range inFiles {
in, err := os.Open(file)
if err != nil {
log.Fatalf("opening input", err)
}
src, err := ioutil.ReadAll(in)
if err != nil {
log.Fatalf("reading input", err)
}
astfile, err := parser.ParseFile(fileSet, file, src, parserMode)
if err != nil {
log.Fatalf("parsing input", err)
}
for _, rw := range reWrites {
astfile = rw(astfile)
}
// Inspect the AST and print all identifiers and literals.
var startDecl token.Pos
var endDecl token.Pos
ast.Inspect(astfile, func(n ast.Node) bool {
var s string
switch x := n.(type) {
case *ast.Ident:
if x.IsExported() {
t := strings.ToLower(x.Name)
for _, pre := range excludePrefixes {
if strings.HasPrefix(t, pre) {
return true
}
}
if excludeNames[t] != true {
//if x.Pos() > startDecl && x.Pos() < endDecl {
exported[x.Name] = initRewrite(x.Name + " -> " + t)
}
}
case *ast.GenDecl:
if x.Tok == token.CONST && x.Lparen > 0 {
startDecl = x.Lparen
endDecl = x.Rparen
// fmt.Printf("Decl:%s -> %s\n", fileSet.Position(startDecl), fileSet.Position(endDecl))
}
}
if s != "" {
fmt.Printf("%s:\t%s\n", fileSet.Position(n.Pos()), s)
}
return true
})
for _, rw := range exported {
astfile = rw(astfile)
}
var buf bytes.Buffer
printer.Fprint(&buf, fileSet, astfile)
// Remove package documentation and insert information
s := buf.String()
ind := strings.Index(buf.String(), "\npackage cpuid")
s = s[ind:]
s = "// Generated, DO NOT EDIT,\n" +
"// but copy it to your own project and rename the package.\n" +
"// See more at http://github.com/klauspost/cpuid\n" +
s
outputName := Package + string(os.PathSeparator) + file
err = ioutil.WriteFile(outputName, []byte(s), 0644)
if err != nil {
log.Fatalf("writing output: %s", err)
}
log.Println("Generated", outputName)
}
for _, file := range copyFiles {
dst := ""
if strings.HasPrefix(file, "cpuid") {
dst = Package + string(os.PathSeparator) + file
} else {
dst = Package + string(os.PathSeparator) + "cpuid_" + file
}
err := copyFile(file, dst)
if err != nil {
log.Fatalf("copying file: %s", err)
}
log.Println("Copied", dst)
}
}
// CopyFile copies a file from src to dst. If src and dst files exist, and are
// the same, then return success. Copy the file contents from src to dst.
func copyFile(src, dst string) (err error) {
sfi, err := os.Stat(src)
if err != nil {
return
}
if !sfi.Mode().IsRegular() {
// cannot copy non-regular files (e.g., directories,
// symlinks, devices, etc.)
return fmt.Errorf("CopyFile: non-regular source file %s (%q)", sfi.Name(), sfi.Mode().String())
}
dfi, err := os.Stat(dst)
if err != nil {
if !os.IsNotExist(err) {
return
}
} else {
if !(dfi.Mode().IsRegular()) {
return fmt.Errorf("CopyFile: non-regular destination file %s (%q)", dfi.Name(), dfi.Mode().String())
}
if os.SameFile(sfi, dfi) {
return
}
}
err = copyFileContents(src, dst)
return
}
// copyFileContents copies the contents of the file named src to the file named
// by dst. The file will be created if it does not already exist. If the
// destination file exists, all it's contents will be replaced by the contents
// of the source file.
func copyFileContents(src, dst string) (err error) {
in, err := os.Open(src)
if err != nil {
return
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return
}
defer func() {
cerr := out.Close()
if err == nil {
err = cerr
}
}()
if _, err = io.Copy(out, in); err != nil {
return
}
err = out.Sync()
return
}
type rewrite func(*ast.File) *ast.File
// Mostly copied from gofmt
func initRewrite(rewriteRule string) rewrite {
f := strings.Split(rewriteRule, "->")
if len(f) != 2 {
fmt.Fprintf(os.Stderr, "rewrite rule must be of the form 'pattern -> replacement'\n")
os.Exit(2)
}
pattern := parseExpr(f[0], "pattern")
replace := parseExpr(f[1], "replacement")
return func(p *ast.File) *ast.File { return rewriteFile(pattern, replace, p) }
}
// parseExpr parses s as an expression.
// It might make sense to expand this to allow statement patterns,
// but there are problems with preserving formatting and also
// with what a wildcard for a statement looks like.
func parseExpr(s, what string) ast.Expr {
x, err := parser.ParseExpr(s)
if err != nil {
fmt.Fprintf(os.Stderr, "parsing %s %s at %s\n", what, s, err)
os.Exit(2)
}
return x
}
// Keep this function for debugging.
/*
func dump(msg string, val reflect.Value) {
fmt.Printf("%s:\n", msg)
ast.Print(fileSet, val.Interface())
fmt.Println()
}
*/
// rewriteFile applies the rewrite rule 'pattern -> replace' to an entire file.
func rewriteFile(pattern, replace ast.Expr, p *ast.File) *ast.File {
cmap := ast.NewCommentMap(fileSet, p, p.Comments)
m := make(map[string]reflect.Value)
pat := reflect.ValueOf(pattern)
repl := reflect.ValueOf(replace)
var rewriteVal func(val reflect.Value) reflect.Value
rewriteVal = func(val reflect.Value) reflect.Value {
// don't bother if val is invalid to start with
if !val.IsValid() {
return reflect.Value{}
}
for k := range m {
delete(m, k)
}
val = apply(rewriteVal, val)
if match(m, pat, val) {
val = subst(m, repl, reflect.ValueOf(val.Interface().(ast.Node).Pos()))
}
return val
}
r := apply(rewriteVal, reflect.ValueOf(p)).Interface().(*ast.File)
r.Comments = cmap.Filter(r).Comments() // recreate comments list
return r
}
// set is a wrapper for x.Set(y); it protects the caller from panics if x cannot be changed to y.
func set(x, y reflect.Value) {
// don't bother if x cannot be set or y is invalid
if !x.CanSet() || !y.IsValid() {
return
}
defer func() {
if x := recover(); x != nil {
if s, ok := x.(string); ok &&
(strings.Contains(s, "type mismatch") || strings.Contains(s, "not assignable")) {
// x cannot be set to y - ignore this rewrite
return
}
panic(x)
}
}()
x.Set(y)
}
// Values/types for special cases.
var (
objectPtrNil = reflect.ValueOf((*ast.Object)(nil))
scopePtrNil = reflect.ValueOf((*ast.Scope)(nil))
identType = reflect.TypeOf((*ast.Ident)(nil))
objectPtrType = reflect.TypeOf((*ast.Object)(nil))
positionType = reflect.TypeOf(token.NoPos)
callExprType = reflect.TypeOf((*ast.CallExpr)(nil))
scopePtrType = reflect.TypeOf((*ast.Scope)(nil))
)
// apply replaces each AST field x in val with f(x), returning val.
// To avoid extra conversions, f operates on the reflect.Value form.
func apply(f func(reflect.Value) reflect.Value, val reflect.Value) reflect.Value {
if !val.IsValid() {
return reflect.Value{}
}
// *ast.Objects introduce cycles and are likely incorrect after
// rewrite; don't follow them but replace with nil instead
if val.Type() == objectPtrType {
return objectPtrNil
}
// similarly for scopes: they are likely incorrect after a rewrite;
// replace them with nil
if val.Type() == scopePtrType {
return scopePtrNil
}
switch v := reflect.Indirect(val); v.Kind() {
case reflect.Slice:
for i := 0; i < v.Len(); i++ {
e := v.Index(i)
set(e, f(e))
}
case reflect.Struct:
for i := 0; i < v.NumField(); i++ {
e := v.Field(i)
set(e, f(e))
}
case reflect.Interface:
e := v.Elem()
set(v, f(e))
}
return val
}
func isWildcard(s string) bool {
rune, size := utf8.DecodeRuneInString(s)
return size == len(s) && unicode.IsLower(rune)
}
// match returns true if pattern matches val,
// recording wildcard submatches in m.
// If m == nil, match checks whether pattern == val.
func match(m map[string]reflect.Value, pattern, val reflect.Value) bool {
// Wildcard matches any expression. If it appears multiple
// times in the pattern, it must match the same expression
// each time.
if m != nil && pattern.IsValid() && pattern.Type() == identType {
name := pattern.Interface().(*ast.Ident).Name
if isWildcard(name) && val.IsValid() {
// wildcards only match valid (non-nil) expressions.
if _, ok := val.Interface().(ast.Expr); ok && !val.IsNil() {
if old, ok := m[name]; ok {
return match(nil, old, val)
}
m[name] = val
return true
}
}
}
// Otherwise, pattern and val must match recursively.
if !pattern.IsValid() || !val.IsValid() {
return !pattern.IsValid() && !val.IsValid()
}
if pattern.Type() != val.Type() {
return false
}
// Special cases.
switch pattern.Type() {
case identType:
// For identifiers, only the names need to match
// (and none of the other *ast.Object information).
// This is a common case, handle it all here instead
// of recursing down any further via reflection.
p := pattern.Interface().(*ast.Ident)
v := val.Interface().(*ast.Ident)
return p == nil && v == nil || p != nil && v != nil && p.Name == v.Name
case objectPtrType, positionType:
// object pointers and token positions always match
return true
case callExprType:
// For calls, the Ellipsis fields (token.Position) must
// match since that is how f(x) and f(x...) are different.
// Check them here but fall through for the remaining fields.
p := pattern.Interface().(*ast.CallExpr)
v := val.Interface().(*ast.CallExpr)
if p.Ellipsis.IsValid() != v.Ellipsis.IsValid() {
return false
}
}
p := reflect.Indirect(pattern)
v := reflect.Indirect(val)
if !p.IsValid() || !v.IsValid() {
return !p.IsValid() && !v.IsValid()
}
switch p.Kind() {
case reflect.Slice:
if p.Len() != v.Len() {
return false
}
for i := 0; i < p.Len(); i++ {
if !match(m, p.Index(i), v.Index(i)) {
return false
}
}
return true
case reflect.Struct:
for i := 0; i < p.NumField(); i++ {
if !match(m, p.Field(i), v.Field(i)) {
return false
}
}
return true
case reflect.Interface:
return match(m, p.Elem(), v.Elem())
}
// Handle token integers, etc.
return p.Interface() == v.Interface()
}
// subst returns a copy of pattern with values from m substituted in place
// of wildcards and pos used as the position of tokens from the pattern.
// if m == nil, subst returns a copy of pattern and doesn't change the line
// number information.
func subst(m map[string]reflect.Value, pattern reflect.Value, pos reflect.Value) reflect.Value {
if !pattern.IsValid() {
return reflect.Value{}
}
// Wildcard gets replaced with map value.
if m != nil && pattern.Type() == identType {
name := pattern.Interface().(*ast.Ident).Name
if isWildcard(name) {
if old, ok := m[name]; ok {
return subst(nil, old, reflect.Value{})
}
}
}
if pos.IsValid() && pattern.Type() == positionType {
// use new position only if old position was valid in the first place
if old := pattern.Interface().(token.Pos); !old.IsValid() {
return pattern
}
return pos
}
// Otherwise copy.
switch p := pattern; p.Kind() {
case reflect.Slice:
v := reflect.MakeSlice(p.Type(), p.Len(), p.Len())
for i := 0; i < p.Len(); i++ {
v.Index(i).Set(subst(m, p.Index(i), pos))
}
return v
case reflect.Struct:
v := reflect.New(p.Type()).Elem()
for i := 0; i < p.NumField(); i++ {
v.Field(i).Set(subst(m, p.Field(i), pos))
}
return v
case reflect.Ptr:
v := reflect.New(p.Type()).Elem()
if elem := p.Elem(); elem.IsValid() {
v.Set(subst(m, elem, pos).Addr())
}
return v
case reflect.Interface:
v := reflect.New(p.Type()).Elem()
if elem := p.Elem(); elem.IsValid() {
v.Set(subst(m, elem, pos))
}
return v
}
return pattern
}
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
.idea
\ No newline at end of file
language: go
sudo: false
os:
- linux
- osx
go:
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- master
install:
- go get ./...
script:
- go vet ./...
- go test -v -cpu=1,2,4 .
- go test -v -cpu=1,2,4 -short -race .
- go test -tags=noasm -v -cpu=1,2,4 -short -race .
- go build examples/simple-decoder.go
- go build examples/simple-encoder.go
- go build examples/stream-decoder.go
- go build examples/stream-encoder.go
- diff <(gofmt -d .) <("")
matrix:
allow_failures:
- go: 'master'
fast_finish: true
MIT License The MIT License (MIT)
Copyright (c) 2017 Templexxx
Copyright (c) 2015 Klaus Post Copyright (c) 2015 Klaus Post
Copyright (c) 2015 Backblaze Copyright (c) 2015 Backblaze
...@@ -21,3 +20,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ...@@ -21,3 +20,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. SOFTWARE.
# Reed-Solomon
[![GoDoc][1]][2] [![Build Status][3]][4]
[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg
[2]: https://godoc.org/github.com/klauspost/reedsolomon
[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
[4]: https://travis-ci.org/klauspost/reedsolomon
Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by [Backblaze](http://backblaze.com), with some additional optimizations.
For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
Package home: https://github.com/klauspost/reedsolomon
Godoc: https://godoc.org/github.com/klauspost/reedsolomon
# Installation
To get the package use the standard:
```bash
go get -u github.com/klauspost/reedsolomon
```
# Changes
## March 6, 2019
The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
## February 8, 2019
AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2. See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
## December 18, 2018
Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
## November 18, 2017
Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
## October 1, 2017
* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option. Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines) has been increased for better multi-core scaling.
* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to be used instead of allocating new memory.
## August 26, 2017
* The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update` function contributed by [chenzhongtao](https://github.com/chenzhongtao).
* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly, which gives a huge performance boost on this platform.
## July 20, 2017
`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface. This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
```Go
func (e *YourEnc) ReconstructData(shards [][]byte) error {
return ReconstructData(shards)
}
```
You can of course also do your own implementation. The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder) handles this without modifying the interface. This is a good lesson on why returning interfaces is not a good design.
# Usage
This section assumes you know the basics of Reed-Solomon encoding. A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
This package performs the calculation of the parity sets. The usage is therefore relatively simple.
First of all, you need to choose your distribution of data and parity shards. A 'good' distribution is very subjective, and will depend a lot on your usage scenario. A good starting point is above 5 and below 257 data shards (the maximum supported number), and the number of parity shards to be 2 or above, and below the number of data shards.
To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
```Go
enc, err := reedsolomon.New(10, 3)
```
This encoder will work for all parity sets with this distribution of data and parity shards. The error will only be set if you specify 0 or negative values in any of the parameters, or if you specify more than 256 data shards.
The you send and receive data is a simple slice of byte slices; `[][]byte`. In the example above, the top slice must have a length of 13.
```Go
data := make([][]byte, 13)
```
You should then fill the 10 first slices with *equally sized* data, and create parity shards that will be populated with parity data. In this case we create the data in memory, but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
```Go
// Create all shards, size them at 50000 each
for i := range input {
data[i] := make([]byte, 50000)
}
// Fill some data into the data shards
for i, in := range data[:10] {
for j:= range in {
in[j] = byte((i+j)&0xff)
}
}
```
To populate the parity shards, you simply call `Encode()` with your data.
```Go
err = enc.Encode(data)
```
The only cases where you should get an error is, if the data shards aren't of equal size. The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
```Go
ok, err = enc.Verify(data)
```
The final (and important) part is to be able to reconstruct missing shards. For this to work, you need to know which parts of your data is missing. The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario, you need to implement a hash check for each shard. If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
```Go
// Delete two data shards
data[3] = nil
data[7] = nil
// Reconstruct the missing shards
err := enc.Reconstruct(data)
```
The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
```Go
// Delete two data shards
data[3] = nil
data[7] = nil
// Reconstruct just the missing data shards
err := enc.ReconstructData(data)
```
So to sum up reconstruction:
* The number of data/parity shards must match the numbers used for encoding.
* The order of shards must be the same as used when encoding.
* You may only supply data you know is valid.
* Invalid shards should be set to nil.
For complete examples of an encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
# Splitting/Joining Data
You might have a large slice of data. To help you split this, there are some helper functions that can split and join a single byte slice.
```Go
bigfile, _ := ioutil.Readfile("myfile.data")
// Split the file
split, err := enc.Split(bigfile)
```
This will split the file into the number of data shards set when creating the encoder and create empty parity shards.
An important thing to note is that you have to *keep track of the exact input size*. If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply:
```Go
// Join a data set and write it to io.Discard.
err = enc.Join(io.Discard, data, len(bigfile))
```
# Streaming/Merging
It might seem like a limitation that all data should be in memory, but an important property is that *as long as the number of data/parity shards are the same, you can merge/split data sets*, and they will remain valid as a separate set.
```Go
// Split the data set of 50000 elements into two of 25000
splitA := make([][]byte, 13)
splitB := make([][]byte, 13)
// Merge into a 100000 element set
merged := make([][]byte, 13)
for i := range data {
splitA[i] = data[i][:25000]
splitB[i] = data[i][25000:]
// Concatenate it to itself
merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
merged[i] = append(merged[i], data[i]...)
}
// Each part should still verify as ok.
ok, err := enc.Verify(splitA)
if ok && err == nil {
log.Println("splitA ok")
}
ok, err = enc.Verify(splitB)
if ok && err == nil {
log.Println("splitB ok")
}
ok, err = enc.Verify(merge)
if ok && err == nil {
log.Println("merge ok")
}
```
This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks. For the best throughput, don't use too small blocks.
This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data. This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
# Streaming API
There has been added support for a streaming API, to help perform fully streaming operations, which enables you to do the same operations, but on streams. To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function to create the encoding/decoding interfaces. You can use [`NewStreamC`](https://godoc.org/github.com/klauspost/reedsolomon#NewStreamC) to ready an interface that reads/writes concurrently from the streams.
Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API. Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API.
If an error occurs in relation to a stream, a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError) or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError) will help you determine which stream was the offender.
There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
For complete examples of a streaming encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
# Advanced Options
You can modify internal options which affects how jobs are split between and processed by goroutines.
To create options, use the WithXXX functions. You can supply options to `New`, `NewStream` and `NewStreamC`. If no Options are supplied, default options are used.
Example of how to supply options:
```Go
enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
```
# Performance
Performance depends mainly on the number of parity shards. In rough terms, doubling the number of parity shards will double the encoding time.
Here are the throughput numbers with some different selections of data and parity shards. For reference each shard is 1MB random data, and 2 CPU cores are used for encoding.
| Data | Parity | Parity | MB/s | SSSE3 MB/s | SSSE3 Speed | Rel. Speed |
|------|--------|--------|--------|-------------|-------------|------------|
| 5 | 2 | 40% | 576,11 | 2599,2 | 451% | 100,00% |
| 10 | 2 | 20% | 587,73 | 3100,28 | 528% | 102,02% |
| 10 | 4 | 40% | 298,38 | 2470,97 | 828% | 51,79% |
| 50 | 20 | 40% | 59,81 | 713,28 | 1193% | 10,38% |
If `runtime.GOMAXPROCS()` is set to a value higher than 1, the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
Example of performance scaling on Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz - 4 physical cores, 8 logical cores. The example uses 10 blocks with 16MB data each and 4 parity blocks.
| Threads | MB/s | Speed |
|---------|---------|-------|
| 1 | 1355,11 | 100% |
| 2 | 2339,78 | 172% |
| 4 | 3179,33 | 235% |
| 8 | 4346,18 | 321% |
Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
```
benchmark all MB/s data MB/s speedup
BenchmarkReconstruct10x2x10000-8 2011.67 10530.10 5.23x
BenchmarkReconstruct50x5x50000-8 4585.41 14301.60 3.12x
BenchmarkReconstruct10x2x1M-8 8081.15 28216.41 3.49x
BenchmarkReconstruct5x2x1M-8 5780.07 28015.37 4.85x
BenchmarkReconstruct10x4x1M-8 4352.56 14367.61 3.30x
BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x
BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x
```
# Performance on AVX512
The performance on AVX512 has been accelerated for Intel CPUs. This gives speedups on a per-core basis of up to 4x compared to AVX2 as can be seen in the following table:
```
$ benchcmp avx2.txt avx512.txt
benchmark AVX2 MB/s AVX512 MB/s speedup
BenchmarkEncode8x8x1M-72 1681.35 4125.64 2.45x
BenchmarkEncode8x4x8M-72 1529.36 5507.97 3.60x
BenchmarkEncode8x8x8M-72 791.16 2952.29 3.73x
BenchmarkEncode8x8x32M-72 573.26 2168.61 3.78x
BenchmarkEncode12x4x12M-72 1234.41 4912.37 3.98x
BenchmarkEncode16x4x16M-72 1189.59 5138.01 4.32x
BenchmarkEncode24x8x24M-72 690.68 2583.70 3.74x
BenchmarkEncode24x8x48M-72 674.20 2643.31 3.92x
```
This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other. In doing so it is possible to minimize the memory bandwidth required for loading all data shards. At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM registers (32 in total) is used to keep more data around (most notably the matrix coefficients).
# Performance on ARM64 NEON
By exploiting NEON instructions the performance for ARM has been accelerated. Below are the performance numbers for a single core on an ARM Cortex-A53 CPU @ 1.2GHz (Debian 8.0 Jessie running Go: 1.7.4):
| Data | Parity | Parity | ARM64 Go MB/s | ARM64 NEON MB/s | NEON Speed |
|------|--------|--------|--------------:|----------------:|-----------:|
| 5 | 2 | 40% | 189 | 1304 | 588% |
| 10 | 2 | 20% | 188 | 1738 | 925% |
| 10 | 4 | 40% | 96 | 839 | 877% |
# Performance on ppc64le
The performance for ppc64le has been accelerated. This gives roughly a 10x performance improvement on this architecture as can been seen below:
```
benchmark old MB/s new MB/s speedup
BenchmarkGalois128K-160 948.87 8878.85 9.36x
BenchmarkGalois1M-160 968.85 9041.92 9.33x
BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
```
# asm2plan9s
[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
# Links
* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
# License
This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
os: Visual Studio 2015
platform: x64
clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
# environment variables
environment:
GOPATH: c:\gopath
install:
- echo %PATH%
- echo %GOPATH%
- go version
- go env
- go get -d ./...
build_script:
- go test -v -cpu=2 ./...
- go test -cpu=1,2,4 -short -race ./...
此差异已折叠。
//+build !noasm
//+build !appengine
//+build !gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
package reedsolomon
//go:noescape
func galMulSSSE3(low, high, in, out []byte)
//go:noescape
func galMulSSSE3Xor(low, high, in, out []byte)
//go:noescape
func galMulAVX2Xor(low, high, in, out []byte)
//go:noescape
func galMulAVX2(low, high, in, out []byte)
//go:noescape
func sSE2XorSlice(in, out []byte)
// This is what the assembler routines do in blocks of 16 bytes:
/*
func galMulSSSE3(low, high, in, out []byte) {
for n, input := range in {
l := input & 0xf
h := input >> 4
out[n] = low[l] ^ high[h]
}
}
func galMulSSSE3Xor(low, high, in, out []byte) {
for n, input := range in {
l := input & 0xf
h := input >> 4
out[n] ^= low[l] ^ high[h]
}
}
*/
func galMulSlice(c byte, in, out []byte, o *options) {
var done int
if o.useAVX2 {
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
} else if o.useSSSE3 {
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 4) << 4
}
remain := len(in) - done
if remain > 0 {
mt := mulTable[c][:256]
for i := done; i < len(in); i++ {
out[i] = mt[in[i]]
}
}
}
func galMulSliceXor(c byte, in, out []byte, o *options) {
var done int
if o.useAVX2 {
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
} else if o.useSSSE3 {
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 4) << 4
}
remain := len(in) - done
if remain > 0 {
mt := mulTable[c][:256]
for i := done; i < len(in); i++ {
out[i] ^= mt[in[i]]
}
}
}
// slice galois add
func sliceXor(in, out []byte, sse2 bool) {
var done int
if sse2 {
sSE2XorSlice(in, out)
done = (len(in) >> 4) << 4
}
remain := len(in) - done
if remain > 0 {
for i := done; i < len(in); i++ {
out[i] ^= in[i]
}
}
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
//+build !amd64 noasm appengine gccgo
//+build !arm64 noasm appengine gccgo
//+build !ppc64le noasm appengine gccgo
// Copyright 2015, Klaus Post, see LICENSE for details.
package reedsolomon
func galMulSlice(c byte, in, out []byte, o *options) {
mt := mulTable[c][:256]
out = out[:len(in)]
for n, input := range in {
out[n] = mt[input]
}
}
func galMulSliceXor(c byte, in, out []byte, o *options) {
mt := mulTable[c][:256]
out = out[:len(in)]
for n, input := range in {
out[n] ^= mt[input]
}
}
// slice galois add
func sliceXor(in, out []byte, sse2 bool) {
for n, input := range in {
out[n] ^= input
}
}
func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
panic("unreachable")
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
language: go
go:
- 1.9
install:
- go get github.com/templexxx/reedsolomon
script:
- go test -v
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册