Merge pull request #1157 from fatedier/dev

bump version to v0.25.2

Merge pull request #1157 from fatedier/dev
bump version to v0.25.2
4b7aef21 · fatedier · GitHub · e945c166 · f6d0046b · 4b7aef21
128 changed file
--- a/client/health/health.go
+++ b/client/health/health.go
@@ -18,6 +18,8 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
+	"io/ioutil"
 	"net"
 	"net/http"
 	"time"
@@ -170,6 +172,8 @@ func (monitor *HealthCheckMonitor) doHttpCheck(ctx context.Context) error {
 	if err != nil {
 		return err
 	}
+	defer resp.Body.Close()
+	io.Copy(ioutil.Discard, resp.Body)
 	if resp.StatusCode/100 != 2 {
 		return fmt.Errorf("do http health check, StatusCode is [%d] not 2xx", resp.StatusCode)

--- a/conf/systemd/frpc.service
+++ b/conf/systemd/frpc.service
+[Unit]
+Description=Frp Client Service
+After=network.target
+[Service]
+Type=simple
+User=nobody
+Restart=on-failure
+RestartSec=5s
+ExecStart=/usr/bin/frpc -c /etc/frp/frpc.ini
+ExecReload=/usr/bin/frpc reload -c /etc/frp/frpc.ini
+[Install]
+WantedBy=multi-user.target
--- a/conf/systemd/frpc@.service
+++ b/conf/systemd/frpc@.service
+[Unit]
+Description=Frp Client Service
+After=network.target
+[Service]
+Type=idle
+User=nobody
+Restart=on-failure
+RestartSec=5s
+ExecStart=/usr/bin/frpc -c /etc/frp/%i.ini
+ExecReload=/usr/bin/frpc reload -c /etc/frp/%i.ini
+[Install]
+WantedBy=multi-user.target
--- a/conf/systemd/frps.service
+++ b/conf/systemd/frps.service
+[Unit]
+Description=Frp Server Service
+After=network.target
+[Service]
+Type=simple
+User=nobody
+Restart=on-failure
+RestartSec=5s
+ExecStart=/usr/bin/frps -c /etc/frp/frps.ini
+[Install]
+WantedBy=multi-user.target
--- a/conf/systemd/frps@.service
+++ b/conf/systemd/frps@.service
+[Unit]
+Description=Frp Server Service
+After=network.target
+[Service]
+Type=simple
+User=nobody
+Restart=on-failure
+RestartSec=5s
+ExecStart=/usr/bin/frps -c /etc/frp/%i.ini
+[Install]
+WantedBy=multi-user.target
--- a/go.mod
+++ b/go.mod
@@ -7,13 +7,15 @@ require (
 	github.com/davecgh/go-spew v1.1.0 // indirect
 	github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb
 	github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049
-	github.com/fatedier/kcp-go v0.0.0-20171023144637-cd167d2f15f4
+	github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible
 	github.com/golang/snappy v0.0.0-20170215233205-553a64147049 // indirect
 	github.com/gorilla/context v1.1.1 // indirect
 	github.com/gorilla/mux v1.6.2
 	github.com/gorilla/websocket v1.2.0
 	github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
+	github.com/klauspost/cpuid v1.2.0 // indirect
+	github.com/klauspost/reedsolomon v1.9.1 // indirect
 	github.com/mattn/go-runewidth v0.0.4 // indirect
 	github.com/pkg/errors v0.8.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
@@ -23,7 +25,6 @@ require (
 	github.com/spf13/pflag v1.0.1 // indirect
 	github.com/stretchr/testify v1.2.1
 	github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047 // indirect
-	github.com/templexxx/reedsolomon v0.0.0-20170926020725-5e06b81a1c76 // indirect
 	github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554 // indirect
 	github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8 // indirect
 	github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec

--- a/go.sum
+++ b/go.sum
@@ -3,7 +3,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
 github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb/go.mod h1:wx3gB6dbIfBRcucp94PI9Bt3I0F2c/MyNEWuhzpWiwk=
 github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049 h1:teH578mf2ii42NHhIp3PhgvjU5bv+NFMq9fSQR8NaG8=
 github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049/go.mod h1:DqIrnl0rp3Zybg9zbJmozTy1n8fYJoX+QoAj9slIkKM=
-github.com/fatedier/kcp-go v0.0.0-20171023144637-cd167d2f15f4/go.mod h1:YpCOaxj7vvMThhIQ9AfTOPW2sfztQR5WDfs7AflSy4s=
+github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible h1:pNNeBKz1jtMDupiwvtEGFTujA3J86xoEXGSkwVeYFsw=
+github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible/go.mod h1:YpCOaxj7vvMThhIQ9AfTOPW2sfztQR5WDfs7AflSy4s=
 github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
 github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
@@ -13,6 +14,10 @@ github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d h1:kJCB4vdITiW1eC1
 github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
 github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
+github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/reedsolomon v1.9.1 h1:kYrT1MlR4JH6PqOpC+okdb9CDTcwEC/BqpzK4WFyXL8=
+github.com/klauspost/reedsolomon v1.9.1/go.mod h1:CwCi+NUr9pqSVktrkN+Ondf06rkhYZ/pcNv7fu+8Un4=
 github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y=
 github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -23,7 +28,6 @@ github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3
 github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047/go.mod h1:wM7WEvslTq+iOEAMDLSzhVuOt5BRZ05WirO+b09GHQU=
-github.com/templexxx/reedsolomon v0.0.0-20170926020725-5e06b81a1c76/go.mod h1:ToWcj2sZ6xHl14JjZiVDktYpFtrFZJXBlsu7TV23lNg=
 github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554/go.mod h1:5XA7W9S6mni3h5uvOC75dA3m9CCCaS83lltmc0ukdi4=
 github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8/go.mod h1:XxO4hdhhrzAd+G4CjDqaOkd0hUzmtPR/d3EiBBMn/wc=
 github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec/go.mod h1:owBmyHYMLkxyrugmfwE/DLJyW8Ro9mkphwuVErQ0iUw=

--- a/utils/version/version.go
+++ b/utils/version/version.go
@@ -19,7 +19,7 @@ import (
 	"strings"
 )
-var version string = "0.25.1"
+var version string = "0.25.2"
 func Full() string {
 	return version

--- a/vendor/github.com/fatedier/kcp-go/.travis.yml
+++ b/vendor/github.com/fatedier/kcp-go/.travis.yml
 language: go
 go:
-    - 1.9
+    - 1.9.x
+    - 1.10.x
+    - 1.11.x
 before_install:
    - go get -t -v ./...

--- a/vendor/github.com/fatedier/kcp-go/README.md
+++ b/vendor/github.com/fatedier/kcp-go/README.md
@@ -20,24 +20,20 @@
 **kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/). 
-It provides **fast, ordered and error-checked** delivery of streams over **UDP** packets, has been well tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) are running with **kcp-go** at present, including applications like **online games, live broadcasting, file synchronization and network acceleration**.
+This library intents to provide a **smooth, resilient, ordered, error-checked and anonymous** delivery of streams over **UDP** packets, it has been battle-tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) have deployed **kcp-go** powered program in a variety of forms like **online games, live broadcasting, file synchronization and network acceleration**.
 [Lastest Release](https://github.com/xtaci/kcp-go/releases)
 ## Features
-1. Optimized for **Realtime Online Games, Audio/Video Streaming and Latency-Sensitive Distributed Consensus**.
+1. Designed for **Latency-sensitive** scenarios.
-1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with language specific optimizations.
 1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core.
 1. Handles **>5K concurrent connections** on a single commodity server.
 1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn).
 1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
-1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode.
+1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode, which generates completely anonymous packet.
-1. **Fixed number of goroutines** created for the entire server application, minimized goroutine context switch.
+1. Only **A fixed number of goroutines** will be created for the entire server application, costs in **context switch** between goroutines have been taken into consideration.
+1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with various improvements.
-## Conventions
-Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
 ## Documentation
@@ -80,47 +76,59 @@ Server:   [full demo](https://github.com/xtaci/kcptun/blob/master/server/main.go
 lis, err := kcp.ListenWithOptions(":10000", nil, 10, 3)
 ```
-## Performance
+## Benchmark
 ```
  Model Name:	MacBook Pro
-  Model Identifier:	MacBookPro12,1
+  Model Identifier:	MacBookPro14,1
  Processor Name:	Intel Core i5
-  Processor Speed:	2.7 GHz
+  Processor Speed:	3.1 GHz
  Number of Processors:	1
  Total Number of Cores:	2
  L2 Cache (per Core):	256 KB
-  L3 Cache:	3 MB
+  L3 Cache:	4 MB
  Memory:	8 GB
 ```
 ```
 $ go test -v -run=^$ -bench .
 beginning tests, encryption:salsa20, fec:10/3
-BenchmarkAES128-4          	  200000	      8256 ns/op	 363.33 MB/s	       0 B/op	       0 allocs/op
+goos: darwin
-BenchmarkAES192-4          	  200000	      9153 ns/op	 327.74 MB/s	       0 B/op	       0 allocs/op
+goarch: amd64
-BenchmarkAES256-4          	  200000	     10079 ns/op	 297.64 MB/s	       0 B/op	       0 allocs/op
+pkg: github.com/xtaci/kcp-go
-BenchmarkTEA-4             	  100000	     18643 ns/op	 160.91 MB/s	       0 B/op	       0 allocs/op
+BenchmarkSM4-4                 	   50000	     32180 ns/op	  93.23 MB/s	       0 B/op	       0 allocs/op
-BenchmarkXOR-4             	 5000000	       316 ns/op	9486.46 MB/s	       0 B/op	       0 allocs/op
+BenchmarkAES128-4              	  500000	      3285 ns/op	 913.21 MB/s	       0 B/op	       0 allocs/op
-BenchmarkBlowfish-4        	   50000	     35643 ns/op	  84.17 MB/s	       0 B/op	       0 allocs/op
+BenchmarkAES192-4              	  300000	      3623 ns/op	 827.85 MB/s	       0 B/op	       0 allocs/op
-BenchmarkNone-4            	30000000	        56.2 ns/op	53371.83 MB/s	       0 B/op	       0 allocs/op
+BenchmarkAES256-4              	  300000	      3874 ns/op	 774.20 MB/s	       0 B/op	       0 allocs/op
-BenchmarkCast5-4           	   30000	     44744 ns/op	  67.05 MB/s	       0 B/op	       0 allocs/op
+BenchmarkTEA-4                 	  100000	     15384 ns/op	 195.00 MB/s	       0 B/op	       0 allocs/op
-Benchmark3DES-4            	    2000	    639839 ns/op	   4.69 MB/s	       2 B/op	       0 allocs/op
+BenchmarkXOR-4                 	20000000	        89.9 ns/op	33372.00 MB/s	       0 B/op	       0 allocs/op
-BenchmarkTwofish-4         	   30000	     43368 ns/op	  69.17 MB/s	       0 B/op	       0 allocs/op
+BenchmarkBlowfish-4            	   50000	     26927 ns/op	 111.41 MB/s	       0 B/op	       0 allocs/op
-BenchmarkXTEA-4            	   30000	     57673 ns/op	  52.02 MB/s	       0 B/op	       0 allocs/op
+BenchmarkNone-4                	30000000	        45.7 ns/op	65597.94 MB/s	       0 B/op	       0 allocs/op
-BenchmarkSalsa20-4         	  300000	      3917 ns/op	 765.80 MB/s	       0 B/op	       0 allocs/op
+BenchmarkCast5-4               	   50000	     34258 ns/op	  87.57 MB/s	       0 B/op	       0 allocs/op
-BenchmarkFlush-4           	10000000	       226 ns/op	       0 B/op	       0 allocs/op
+Benchmark3DES-4                	   10000	    117149 ns/op	  25.61 MB/s	       0 B/op	       0 allocs/op
-BenchmarkEchoSpeed4K-4     	    5000	    300030 ns/op	  13.65 MB/s	    5672 B/op	     177 allocs/op
+BenchmarkTwofish-4             	   50000	     33538 ns/op	  89.45 MB/s	       0 B/op	       0 allocs/op
-BenchmarkEchoSpeed64K-4    	     500	   3202335 ns/op	  20.47 MB/s	   73295 B/op	    2198 allocs/op
+BenchmarkXTEA-4                	   30000	     45666 ns/op	  65.69 MB/s	       0 B/op	       0 allocs/op
-BenchmarkEchoSpeed512K-4   	      50	  24926924 ns/op	  21.03 MB/s	  659339 B/op	   17602 allocs/op
+BenchmarkSalsa20-4             	  500000	      3308 ns/op	 906.76 MB/s	       0 B/op	       0 allocs/op
-BenchmarkEchoSpeed1M-4     	      20	  64857821 ns/op	  16.17 MB/s	 1772437 B/op	   42869 allocs/op
+BenchmarkCRC32-4               	20000000	        65.2 ns/op	15712.43 MB/s
-BenchmarkSinkSpeed4K-4     	   30000	     50230 ns/op	  81.54 MB/s	    2058 B/op	      48 allocs/op
+BenchmarkCsprngSystem-4        	 1000000	      1150 ns/op	  13.91 MB/s
-BenchmarkSinkSpeed64K-4    	    2000	    648718 ns/op	 101.02 MB/s	   31165 B/op	     687 allocs/op
+BenchmarkCsprngMD5-4           	10000000	       145 ns/op	 110.26 MB/s
-BenchmarkSinkSpeed256K-4   	     300	   4635905 ns/op	 113.09 MB/s	  286229 B/op	    5516 allocs/op
+BenchmarkCsprngSHA1-4          	10000000	       158 ns/op	 126.54 MB/s
-BenchmarkSinkSpeed1M-4     	     200	   9566933 ns/op	 109.60 MB/s	  463771 B/op	   10701 allocs/op
+BenchmarkCsprngNonceMD5-4      	10000000	       153 ns/op	 104.22 MB/s
+BenchmarkCsprngNonceAES128-4   	100000000	        19.1 ns/op	 837.81 MB/s
+BenchmarkFECDecode-4           	 1000000	      1119 ns/op	1339.61 MB/s	    1606 B/op	       2 allocs/op
+BenchmarkFECEncode-4           	 2000000	       832 ns/op	1801.83 MB/s	      17 B/op	       0 allocs/op
+BenchmarkFlush-4               	 5000000	       272 ns/op	       0 B/op	       0 allocs/op
+BenchmarkEchoSpeed4K-4         	    5000	    259617 ns/op	  15.78 MB/s	    5451 B/op	     149 allocs/op
+BenchmarkEchoSpeed64K-4        	    1000	   1706084 ns/op	  38.41 MB/s	   56002 B/op	    1604 allocs/op
+BenchmarkEchoSpeed512K-4       	     100	  14345505 ns/op	  36.55 MB/s	  482597 B/op	   13045 allocs/op
+BenchmarkEchoSpeed1M-4         	      30	  34859104 ns/op	  30.08 MB/s	 1143773 B/op	   27186 allocs/op
+BenchmarkSinkSpeed4K-4         	   50000	     31369 ns/op	 130.57 MB/s	    1566 B/op	      30 allocs/op
+BenchmarkSinkSpeed64K-4        	    5000	    329065 ns/op	 199.16 MB/s	   21529 B/op	     453 allocs/op
+BenchmarkSinkSpeed256K-4       	     500	   2373354 ns/op	 220.91 MB/s	  166332 B/op	    3554 allocs/op
+BenchmarkSinkSpeed1M-4         	     300	   5117927 ns/op	 204.88 MB/s	  310378 B/op	    6988 allocs/op
 PASS
-ok  	_/Users/xtaci/.godeps/src/github.com/xtaci/kcp-go	39.689s
+ok  	github.com/xtaci/kcp-go	50.349s
 ```
-## Design Considerations
+## Key Design Considerations
 1. slice vs. container/list
@@ -139,7 +147,9 @@ List structure introduces **heavy cache misses** compared to slice which owns be
 2. Timing accuracy vs. syscall clock_gettime
-Timing is **critical** to **RTT estimator**, inaccurate timing introduces false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz), the benchmark for time.Now():
+Timing is **critical** to **RTT estimator**, inaccurate timing leads to false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz). 
+The benchmark for time.Now() lies here:
 https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
@@ -147,14 +157,17 @@ https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
 BenchmarkNow-4         	100000000	        15.6 ns/op
 ```
-In kcp-go, after each `kcp.output()` function call, current time will be updated upon return, and each `kcp.flush()` will get current time once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(no packet needs to be sent by `kcp.output()`), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
+In kcp-go, after each `kcp.output()` function call, current clock time will be updated upon return, and for a single `kcp.flush()` operation, current time will be queried from system once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(a fixed cost while no packet needs to be sent), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
+## Connection Termination
+Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
-## Tuning
+## FAQ
-Q: I'm handling >5K connections on my server. the CPU utilization is high.
+Q: I'm handling >5K connections on my server, the CPU utilization is so high.
-A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load.
+A: A standalone `agent` or `gate` server for running kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements(timing) which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load, but lower the performance.
 ## Who is using this?
@@ -163,10 +176,9 @@ A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for C
 3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan.
 4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing.
 5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization.
-6. https://play.google.com/store/apps/details?id=com.k17game.k3 -- Battle Zone - Earth 2048, a world-wide strategy game.
 ## Links
 1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
 2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
-3. https://github.com/templexxx/reedsolomon -- Reed-Solomon Erasure Coding in Go
+3. https://github.com/klauspost/reedsolomon -- Reed-Solomon Erasure Coding in Go
--- a/vendor/github.com/fatedier/kcp-go/crypt.go
+++ b/vendor/github.com/fatedier/kcp-go/crypt.go
@@ -57,8 +57,8 @@ func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
 }
 type sm4BlockCrypt struct {
-	encbuf []byte
+	encbuf [sm4.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * sm4.BlockSize]byte
 	block  cipher.Block
 }
@@ -70,17 +70,15 @@ func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, sm4.BlockSize)
-	c.decbuf = make([]byte, 2*sm4.BlockSize)
 	return c, nil
 }
-func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type twofishBlockCrypt struct {
-	encbuf []byte
+	encbuf [twofish.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * twofish.BlockSize]byte
 	block  cipher.Block
 }
@@ -92,17 +90,15 @@ func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, twofish.BlockSize)
-	c.decbuf = make([]byte, 2*twofish.BlockSize)
 	return c, nil
 }
-func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type tripleDESBlockCrypt struct {
-	encbuf []byte
+	encbuf [des.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * des.BlockSize]byte
 	block  cipher.Block
 }
@@ -114,17 +110,15 @@ func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, des.BlockSize)
-	c.decbuf = make([]byte, 2*des.BlockSize)
 	return c, nil
 }
-func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type cast5BlockCrypt struct {
-	encbuf []byte
+	encbuf [cast5.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * cast5.BlockSize]byte
 	block  cipher.Block
 }
@@ -136,17 +130,15 @@ func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, cast5.BlockSize)
-	c.decbuf = make([]byte, 2*cast5.BlockSize)
 	return c, nil
 }
-func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type blowfishBlockCrypt struct {
-	encbuf []byte
+	encbuf [blowfish.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * blowfish.BlockSize]byte
 	block  cipher.Block
 }
@@ -158,17 +150,15 @@ func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, blowfish.BlockSize)
-	c.decbuf = make([]byte, 2*blowfish.BlockSize)
 	return c, nil
 }
-func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type aesBlockCrypt struct {
-	encbuf []byte
+	encbuf [aes.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * aes.BlockSize]byte
 	block  cipher.Block
 }
@@ -180,17 +170,15 @@ func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, aes.BlockSize)
-	c.decbuf = make([]byte, 2*aes.BlockSize)
 	return c, nil
 }
-func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type teaBlockCrypt struct {
-	encbuf []byte
+	encbuf [tea.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * tea.BlockSize]byte
 	block  cipher.Block
 }
@@ -202,17 +190,15 @@ func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, tea.BlockSize)
-	c.decbuf = make([]byte, 2*tea.BlockSize)
 	return c, nil
 }
-func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type xteaBlockCrypt struct {
-	encbuf []byte
+	encbuf [xtea.BlockSize]byte
-	decbuf []byte
+	decbuf [2 * xtea.BlockSize]byte
 	block  cipher.Block
 }
@@ -224,13 +210,11 @@ func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, xtea.BlockSize)
-	c.decbuf = make([]byte, 2*xtea.BlockSize)
 	return c, nil
 }
-func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
-func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 type simpleXORBlockCrypt struct {
 	xortbl []byte
@@ -258,31 +242,544 @@ func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
 // packet encryption with local CFB mode
 func encrypt(block cipher.Block, dst, src, buf []byte) {
+	switch block.BlockSize() {
+	case 8:
+		encrypt8(block, dst, src, buf)
+	case 16:
+		encrypt16(block, dst, src, buf)
+	default:
+		encryptVariant(block, dst, src, buf)
+	}
+}
+// optimized encryption for the ciphers which works in 8-bytes
+func encrypt8(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[:8]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 8
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:64]
+		d := dst[base:][0:64]
+		// 1
+		xor.BytesSrc1(d[0:8], s[0:8], tbl)
+		block.Encrypt(tbl, d[0:8])
+		// 2
+		xor.BytesSrc1(d[8:16], s[8:16], tbl)
+		block.Encrypt(tbl, d[8:16])
+		// 3
+		xor.BytesSrc1(d[16:24], s[16:24], tbl)
+		block.Encrypt(tbl, d[16:24])
+		// 4
+		xor.BytesSrc1(d[24:32], s[24:32], tbl)
+		block.Encrypt(tbl, d[24:32])
+		// 5
+		xor.BytesSrc1(d[32:40], s[32:40], tbl)
+		block.Encrypt(tbl, d[32:40])
+		// 6
+		xor.BytesSrc1(d[40:48], s[40:48], tbl)
+		block.Encrypt(tbl, d[40:48])
+		// 7
+		xor.BytesSrc1(d[48:56], s[48:56], tbl)
+		block.Encrypt(tbl, d[48:56])
+		// 8
+		xor.BytesSrc1(d[56:64], s[56:64], tbl)
+		block.Encrypt(tbl, d[56:64])
+		base += 64
+	}
+	switch left {
+	case 7:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 6:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 5:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 4:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 3:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 2:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 1:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+// optimized encryption for the ciphers which works in 16-bytes
+func encrypt16(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[:16]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 16
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:128]
+		d := dst[base:][0:128]
+		// 1
+		xor.BytesSrc1(d[0:16], s[0:16], tbl)
+		block.Encrypt(tbl, d[0:16])
+		// 2
+		xor.BytesSrc1(d[16:32], s[16:32], tbl)
+		block.Encrypt(tbl, d[16:32])
+		// 3
+		xor.BytesSrc1(d[32:48], s[32:48], tbl)
+		block.Encrypt(tbl, d[32:48])
+		// 4
+		xor.BytesSrc1(d[48:64], s[48:64], tbl)
+		block.Encrypt(tbl, d[48:64])
+		// 5
+		xor.BytesSrc1(d[64:80], s[64:80], tbl)
+		block.Encrypt(tbl, d[64:80])
+		// 6
+		xor.BytesSrc1(d[80:96], s[80:96], tbl)
+		block.Encrypt(tbl, d[80:96])
+		// 7
+		xor.BytesSrc1(d[96:112], s[96:112], tbl)
+		block.Encrypt(tbl, d[96:112])
+		// 8
+		xor.BytesSrc1(d[112:128], s[112:128], tbl)
+		block.Encrypt(tbl, d[112:128])
+		base += 128
+	}
+	switch left {
+	case 7:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 6:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 5:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 4:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 3:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 2:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 1:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+func encryptVariant(block cipher.Block, dst, src, buf []byte) {
 	blocksize := block.BlockSize()
 	tbl := buf[:blocksize]
 	block.Encrypt(tbl, initialVector)
 	n := len(src) / blocksize
 	base := 0
-	for i := 0; i < n; i++ {
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		// 1
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		// 2
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		// 3
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		// 4
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		// 5
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		// 6
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		// 7
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		// 8
 		xor.BytesSrc1(dst[base:], src[base:], tbl)
 		block.Encrypt(tbl, dst[base:])
 		base += blocksize
 	}
-	xor.BytesSrc0(dst[base:], src[base:], tbl)
+	switch left {
+	case 7:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 6:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 5:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 4:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 3:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 2:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 1:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
 }
+// decryption
 func decrypt(block cipher.Block, dst, src, buf []byte) {
+	switch block.BlockSize() {
+	case 8:
+		decrypt8(block, dst, src, buf)
+	case 16:
+		decrypt16(block, dst, src, buf)
+	default:
+		decryptVariant(block, dst, src, buf)
+	}
+}
+func decrypt8(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[0:8]
+	next := buf[8:16]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 8
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:64]
+		d := dst[base:][0:64]
+		// 1
+		block.Encrypt(next, s[0:8])
+		xor.BytesSrc1(d[0:8], s[0:8], tbl)
+		// 2
+		block.Encrypt(tbl, s[8:16])
+		xor.BytesSrc1(d[8:16], s[8:16], next)
+		// 3
+		block.Encrypt(next, s[16:24])
+		xor.BytesSrc1(d[16:24], s[16:24], tbl)
+		// 4
+		block.Encrypt(tbl, s[24:32])
+		xor.BytesSrc1(d[24:32], s[24:32], next)
+		// 5
+		block.Encrypt(next, s[32:40])
+		xor.BytesSrc1(d[32:40], s[32:40], tbl)
+		// 6
+		block.Encrypt(tbl, s[40:48])
+		xor.BytesSrc1(d[40:48], s[40:48], next)
+		// 7
+		block.Encrypt(next, s[48:56])
+		xor.BytesSrc1(d[48:56], s[48:56], tbl)
+		// 8
+		block.Encrypt(tbl, s[56:64])
+		xor.BytesSrc1(d[56:64], s[56:64], next)
+		base += 64
+	}
+	switch left {
+	case 7:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 6:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 5:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 4:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 3:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 2:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 1:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+func decrypt16(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[0:16]
+	next := buf[16:32]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 16
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:128]
+		d := dst[base:][0:128]
+		// 1
+		block.Encrypt(next, s[0:16])
+		xor.BytesSrc1(d[0:16], s[0:16], tbl)
+		// 2
+		block.Encrypt(tbl, s[16:32])
+		xor.BytesSrc1(d[16:32], s[16:32], next)
+		// 3
+		block.Encrypt(next, s[32:48])
+		xor.BytesSrc1(d[32:48], s[32:48], tbl)
+		// 4
+		block.Encrypt(tbl, s[48:64])
+		xor.BytesSrc1(d[48:64], s[48:64], next)
+		// 5
+		block.Encrypt(next, s[64:80])
+		xor.BytesSrc1(d[64:80], s[64:80], tbl)
+		// 6
+		block.Encrypt(tbl, s[80:96])
+		xor.BytesSrc1(d[80:96], s[80:96], next)
+		// 7
+		block.Encrypt(next, s[96:112])
+		xor.BytesSrc1(d[96:112], s[96:112], tbl)
+		// 8
+		block.Encrypt(tbl, s[112:128])
+		xor.BytesSrc1(d[112:128], s[112:128], next)
+		base += 128
+	}
+	switch left {
+	case 7:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 6:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 5:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 4:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 3:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 2:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 1:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+func decryptVariant(block cipher.Block, dst, src, buf []byte) {
 	blocksize := block.BlockSize()
 	tbl := buf[:blocksize]
 	next := buf[blocksize:]
 	block.Encrypt(tbl, initialVector)
 	n := len(src) / blocksize
 	base := 0
-	for i := 0; i < n; i++ {
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		// 1
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+		// 2
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+		// 3
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+		// 4
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+		// 5
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+		// 6
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+		// 7
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+		// 8
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+	}
+	switch left {
+	case 7:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 6:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 5:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 4:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 3:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 2:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 1:
 		block.Encrypt(next, src[base:])
 		xor.BytesSrc1(dst[base:], src[base:], tbl)
 		tbl, next = next, tbl
 		base += blocksize
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
 	}
-	xor.BytesSrc0(dst[base:], src[base:], tbl)
 }
--- a/vendor/github.com/fatedier/kcp-go/entropy.go
+++ b/vendor/github.com/fatedier/kcp-go/entropy.go
+package kcp
+import (
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/md5"
+	"crypto/rand"
+	"io"
+)
+// Entropy defines a entropy source
+type Entropy interface {
+	Init()
+	Fill(nonce []byte)
+}
+// nonceMD5 nonce generator for packet header
+type nonceMD5 struct {
+	seed [md5.Size]byte
+}
+func (n *nonceMD5) Init() { /*nothing required*/ }
+func (n *nonceMD5) Fill(nonce []byte) {
+	if n.seed[0] == 0 { // entropy update
+		io.ReadFull(rand.Reader, n.seed[:])
+	}
+	n.seed = md5.Sum(n.seed[:])
+	copy(nonce, n.seed[:])
+}
+// nonceAES128 nonce generator for packet headers
+type nonceAES128 struct {
+	seed  [aes.BlockSize]byte
+	block cipher.Block
+}
+func (n *nonceAES128) Init() {
+	var key [16]byte //aes-128
+	io.ReadFull(rand.Reader, key[:])
+	io.ReadFull(rand.Reader, n.seed[:])
+	block, _ := aes.NewCipher(key[:])
+	n.block = block
+}
+func (n *nonceAES128) Fill(nonce []byte) {
+	if n.seed[0] == 0 { // entropy update
+		io.ReadFull(rand.Reader, n.seed[:])
+	}
+	n.block.Encrypt(n.seed[:], n.seed[:])
+	copy(nonce, n.seed[:])
+}
--- a/vendor/github.com/fatedier/kcp-go/fec.go
+++ b/vendor/github.com/fatedier/kcp-go/fec.go
@@ -4,7 +4,7 @@ import (
 	"encoding/binary"
 	"sync/atomic"
-	"github.com/templexxx/reedsolomon"
+	"github.com/klauspost/reedsolomon"
 )
 const (
@@ -34,6 +34,9 @@ type (
 		decodeCache [][]byte
 		flagCache   []bool
+		// zeros
+		zeros []byte
 		// RS decoder
 		codec reedsolomon.Encoder
 	}
@@ -47,19 +50,20 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
 		return nil
 	}
-	fec := new(fecDecoder)
+	dec := new(fecDecoder)
-	fec.rxlimit = rxlimit
+	dec.rxlimit = rxlimit
-	fec.dataShards = dataShards
+	dec.dataShards = dataShards
-	fec.parityShards = parityShards
+	dec.parityShards = parityShards
-	fec.shardSize = dataShards + parityShards
+	dec.shardSize = dataShards + parityShards
-	enc, err := reedsolomon.New(dataShards, parityShards)
+	codec, err := reedsolomon.New(dataShards, parityShards)
 	if err != nil {
 		return nil
 	}
-	fec.codec = enc
+	dec.codec = codec
-	fec.decodeCache = make([][]byte, fec.shardSize)
+	dec.decodeCache = make([][]byte, dec.shardSize)
-	fec.flagCache = make([]bool, fec.shardSize)
+	dec.flagCache = make([]bool, dec.shardSize)
-	return fec
+	dec.zeros = make([]byte, mtuLimit)
+	return dec
 }
 // decodeBytes a fec packet
@@ -116,7 +120,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 	if searchEnd-searchBegin+1 >= dec.dataShards {
 		var numshard, numDataShard, first, maxlen int
-		// zero cache
+		// zero caches
 		shards := dec.decodeCache
 		shardsflag := dec.flagCache
 		for k := range dec.decodeCache {
@@ -146,15 +150,15 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 		}
 		if numDataShard == dec.dataShards {
-			// case 1:  no lost data shards
+			// case 1: no loss on data shards
 			dec.rx = dec.freeRange(first, numshard, dec.rx)
 		} else if numshard >= dec.dataShards {
-			// case 2: data shard lost, but  recoverable from parity shard
+			// case 2: loss on data shards, but it's recoverable from parity shards
 			for k := range shards {
 				if shards[k] != nil {
 					dlen := len(shards[k])
 					shards[k] = shards[k][:maxlen]
-					xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:])
+					copy(shards[k][dlen:], dec.zeros)
 				}
 			}
 			if err := dec.codec.ReconstructData(shards); err == nil {
@@ -170,7 +174,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 	// keep rxlimit
 	if len(dec.rx) > dec.rxlimit {
-		if dec.rx[0].flag == typeData { // record unrecoverable data
+		if dec.rx[0].flag == typeData { // track the unrecoverable data
 			atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
 		}
 		dec.rx = dec.freeRange(0, 1, dec.rx)
@@ -180,7 +184,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 // free a range of fecPacket, and zero for GC recycling
 func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
-	for i := first; i < first+n; i++ { // free
+	for i := first; i < first+n; i++ { // recycle buffer
 		xmitBuf.Put(q[i].data)
 	}
 	copy(q[first:], q[first+n:])
@@ -200,7 +204,7 @@ type (
 		next         uint32 // next seqid
 		shardCount int // count the number of datashards collected
-		maxSize    int // record maximum data length in datashard
+		maxSize    int // track maximum data length in datashard
 		headerOffset  int // FEC header offset
 		payloadOffset int // FEC payload offset
@@ -209,6 +213,9 @@ type (
 		shardCache  [][]byte
 		encodeCache [][]byte
+		// zeros
+		zeros []byte
 		// RS encoder
 		codec reedsolomon.Encoder
 	}
@@ -218,31 +225,32 @@ func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
 	if dataShards <= 0 || parityShards <= 0 {
 		return nil
 	}
-	fec := new(fecEncoder)
+	enc := new(fecEncoder)
-	fec.dataShards = dataShards
+	enc.dataShards = dataShards
-	fec.parityShards = parityShards
+	enc.parityShards = parityShards
-	fec.shardSize = dataShards + parityShards
+	enc.shardSize = dataShards + parityShards
-	fec.paws = (0xffffffff/uint32(fec.shardSize) - 1) * uint32(fec.shardSize)
+	enc.paws = (0xffffffff/uint32(enc.shardSize) - 1) * uint32(enc.shardSize)
-	fec.headerOffset = offset
+	enc.headerOffset = offset
-	fec.payloadOffset = fec.headerOffset + fecHeaderSize
+	enc.payloadOffset = enc.headerOffset + fecHeaderSize
-	enc, err := reedsolomon.New(dataShards, parityShards)
+	codec, err := reedsolomon.New(dataShards, parityShards)
 	if err != nil {
 		return nil
 	}
-	fec.codec = enc
+	enc.codec = codec
 	// caches
-	fec.encodeCache = make([][]byte, fec.shardSize)
+	enc.encodeCache = make([][]byte, enc.shardSize)
-	fec.shardCache = make([][]byte, fec.shardSize)
+	enc.shardCache = make([][]byte, enc.shardSize)
-	for k := range fec.shardCache {
+	for k := range enc.shardCache {
-		fec.shardCache[k] = make([]byte, mtuLimit)
+		enc.shardCache[k] = make([]byte, mtuLimit)
 	}
-	return fec
+	enc.zeros = make([]byte, mtuLimit)
+	return enc
 }
-// encode the packet, output parity shards if we have enough datashards
+// encodes the packet, outputs parity shards if we have collected quorum datashards
-// the content of returned parityshards will change in next encode
+// notice: the contents of 'ps' will be re-written in successive calling
 func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 	enc.markData(b[enc.headerOffset:])
 	binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
@@ -253,18 +261,18 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 	copy(enc.shardCache[enc.shardCount], b)
 	enc.shardCount++
-	// record max datashard length
+	// track max datashard length
 	if sz > enc.maxSize {
 		enc.maxSize = sz
 	}
-	//  calculate Reed-Solomon Erasure Code
+	//  Generation of Reed-Solomon Erasure Code
 	if enc.shardCount == enc.dataShards {
-		// bzero each datashard's tail
+		// fill '0' into the tail of each datashard
 		for i := 0; i < enc.dataShards; i++ {
 			shard := enc.shardCache[i]
 			slen := len(shard)
-			xorBytes(shard[slen:enc.maxSize], shard[slen:enc.maxSize], shard[slen:enc.maxSize])
+			copy(shard[slen:enc.maxSize], enc.zeros)
 		}
 		// construct equal-sized slice with stripped header
@@ -273,7 +281,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 			cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize]
 		}
-		// rs encode
+		// encoding
 		if err := enc.codec.Encode(cache); err == nil {
 			ps = enc.shardCache[enc.dataShards:]
 			for k := range ps {
@@ -282,7 +290,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 			}
 		}
-		// reset counters to zero
+		// counters resetting
 		enc.shardCount = 0
 		enc.maxSize = 0
 	}

--- a/vendor/github.com/fatedier/kcp-go/kcp.go
+++ b/vendor/github.com/fatedier/kcp-go/kcp.go
@@ -104,6 +104,7 @@ type segment struct {
 	xmit     uint32
 	resendts uint32
 	fastack  uint32
+	acked    uint32 // mark if the seg has acked
 	data     []byte
 }
@@ -181,8 +182,11 @@ func (kcp *KCP) newSegment(size int) (seg segment) {
 }
 // delSegment recycles a KCP segment
-func (kcp *KCP) delSegment(seg segment) {
+func (kcp *KCP) delSegment(seg *segment) {
-	xmitBuf.Put(seg.data)
+	if seg.data != nil {
+		xmitBuf.Put(seg.data)
+		seg.data = nil
+	}
 }
 // PeekSize checks the size of next message in the recv queue
@@ -238,7 +242,7 @@ func (kcp *KCP) Recv(buffer []byte) (n int) {
 		buffer = buffer[len(seg.data):]
 		n += len(seg.data)
 		count++
-		kcp.delSegment(*seg)
+		kcp.delSegment(seg)
 		if seg.frg == 0 {
 			break
 		}
@@ -382,10 +386,8 @@ func (kcp *KCP) parse_ack(sn uint32) {
 	for k := range kcp.snd_buf {
 		seg := &kcp.snd_buf[k]
 		if sn == seg.sn {
-			kcp.delSegment(*seg)
+			seg.acked = 1
-			copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
+			kcp.delSegment(seg)
-			kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
-			kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
 			break
 		}
 		if _itimediff(sn, seg.sn) < 0 {
@@ -394,7 +396,7 @@ func (kcp *KCP) parse_ack(sn uint32) {
 	}
 }
-func (kcp *KCP) parse_fastack(sn uint32) {
+func (kcp *KCP) parse_fastack(sn, ts uint32) {
 	if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
 		return
 	}
@@ -403,7 +405,7 @@ func (kcp *KCP) parse_fastack(sn uint32) {
 		seg := &kcp.snd_buf[k]
 		if _itimediff(sn, seg.sn) < 0 {
 			break
-		} else if sn != seg.sn {
+		} else if sn != seg.sn && _itimediff(seg.ts, ts) <= 0 {
 			seg.fastack++
 		}
 	}
@@ -414,7 +416,7 @@ func (kcp *KCP) parse_una(una uint32) {
 	for k := range kcp.snd_buf {
 		seg := &kcp.snd_buf[k]
 		if _itimediff(una, seg.sn) > 0 {
-			kcp.delSegment(*seg)
+			kcp.delSegment(seg)
 			count++
 		} else {
 			break
@@ -430,12 +432,12 @@ func (kcp *KCP) ack_push(sn, ts uint32) {
 	kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
 }
-func (kcp *KCP) parse_data(newseg segment) {
+// returns true if data has repeated
+func (kcp *KCP) parse_data(newseg segment) bool {
 	sn := newseg.sn
 	if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
 		_itimediff(sn, kcp.rcv_nxt) < 0 {
-		kcp.delSegment(newseg)
+		return true
-		return
 	}
 	n := len(kcp.rcv_buf) - 1
@@ -445,7 +447,6 @@ func (kcp *KCP) parse_data(newseg segment) {
 		seg := &kcp.rcv_buf[i]
 		if seg.sn == sn {
 			repeat = true
-			atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
 			break
 		}
 		if _itimediff(sn, seg.sn) > 0 {
@@ -455,6 +456,11 @@ func (kcp *KCP) parse_data(newseg segment) {
 	}
 	if !repeat {
+		// replicate the content if it's new
+		dataCopy := xmitBuf.Get().([]byte)[:len(newseg.data)]
+		copy(dataCopy, newseg.data)
+		newseg.data = dataCopy
 		if insert_idx == n+1 {
 			kcp.rcv_buf = append(kcp.rcv_buf, newseg)
 		} else {
@@ -462,8 +468,6 @@ func (kcp *KCP) parse_data(newseg segment) {
 			copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
 			kcp.rcv_buf[insert_idx] = newseg
 		}
-	} else {
-		kcp.delSegment(newseg)
 	}
 	// move available data from rcv_buf -> rcv_queue
@@ -481,18 +485,19 @@ func (kcp *KCP) parse_data(newseg segment) {
 		kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
 		kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
 	}
+	return repeat
 }
 // Input when you received a low level packet (eg. UDP packet), call it
 // regular indicates a regular packet has received(not from FEC)
 func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
-	una := kcp.snd_una
+	snd_una := kcp.snd_una
 	if len(data) < IKCP_OVERHEAD {
 		return -1
 	}
-	var maxack uint32
+	var latest uint32 // the latest ack packet
-	var lastackts uint32
 	var flag int
 	var inSegs uint64
@@ -535,19 +540,15 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
 		if cmd == IKCP_CMD_ACK {
 			kcp.parse_ack(sn)
-			kcp.shrink_buf()
+			kcp.parse_fastack(sn, ts)
-			if flag == 0 {
+			flag |= 1
-				flag = 1
+			latest = ts
-				maxack = sn
-			} else if _itimediff(sn, maxack) > 0 {
-				maxack = sn
-			}
-			lastackts = ts
 		} else if cmd == IKCP_CMD_PUSH {
+			repeat := true
 			if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
 				kcp.ack_push(sn, ts)
 				if _itimediff(sn, kcp.rcv_nxt) >= 0 {
-					seg := kcp.newSegment(int(length))
+					var seg segment
 					seg.conv = conv
 					seg.cmd = cmd
 					seg.frg = frg
@@ -555,12 +556,11 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
 					seg.ts = ts
 					seg.sn = sn
 					seg.una = una
-					copy(seg.data, data[:length])
+					seg.data = data[:length] // delayed data copying
-					kcp.parse_data(seg)
+					repeat = kcp.parse_data(seg)
-				} else {
-					atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
 				}
-			} else {
+			}
+			if regular && repeat {
 				atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
 			}
 		} else if cmd == IKCP_CMD_WASK {
@@ -578,40 +578,42 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
 	}
 	atomic.AddUint64(&DefaultSnmp.InSegs, inSegs)
+	// update rtt with the latest ts
+	// ignore the FEC packet
 	if flag != 0 && regular {
-		kcp.parse_fastack(maxack)
 		current := currentMs()
-		if _itimediff(current, lastackts) >= 0 {
+		if _itimediff(current, latest) >= 0 {
-			kcp.update_ack(_itimediff(current, lastackts))
+			kcp.update_ack(_itimediff(current, latest))
 		}
 	}
-	if _itimediff(kcp.snd_una, una) > 0 {
+	// cwnd update when packet arrived
-		if kcp.cwnd < kcp.rmt_wnd {
+	if kcp.nocwnd == 0 {
-			mss := kcp.mss
+		if _itimediff(kcp.snd_una, snd_una) > 0 {
-			if kcp.cwnd < kcp.ssthresh {
+			if kcp.cwnd < kcp.rmt_wnd {
-				kcp.cwnd++
+				mss := kcp.mss
-				kcp.incr += mss
+				if kcp.cwnd < kcp.ssthresh {
-			} else {
-				if kcp.incr < mss {
-					kcp.incr = mss
-				}
-				kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
-				if (kcp.cwnd+1)*mss <= kcp.incr {
 					kcp.cwnd++
+					kcp.incr += mss
+				} else {
+					if kcp.incr < mss {
+						kcp.incr = mss
+					}
+					kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
+					if (kcp.cwnd+1)*mss <= kcp.incr {
+						kcp.cwnd++
+					}
+				}
+				if kcp.cwnd > kcp.rmt_wnd {
+					kcp.cwnd = kcp.rmt_wnd
+					kcp.incr = kcp.rmt_wnd * mss
 				}
-			}
-			if kcp.cwnd > kcp.rmt_wnd {
-				kcp.cwnd = kcp.rmt_wnd
-				kcp.incr = kcp.rmt_wnd * mss
 			}
 		}
 	}
 	if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
 		kcp.flush(true)
-	} else if kcp.rmt_wnd == 0 && len(kcp.acklist) > 0 { // window zero
-		kcp.flush(true)
 	}
 	return 0
 }
@@ -624,7 +626,7 @@ func (kcp *KCP) wnd_unused() uint16 {
 }
 // flush pending data
-func (kcp *KCP) flush(ackOnly bool) {
+func (kcp *KCP) flush(ackOnly bool) uint32 {
 	var seg segment
 	seg.conv = kcp.conv
 	seg.cmd = IKCP_CMD_ACK
@@ -653,7 +655,7 @@ func (kcp *KCP) flush(ackOnly bool) {
 		if size > 0 {
 			kcp.output(buffer, size)
 		}
-		return
+		return kcp.interval
 	}
 	// probe window size (if remote window size equals zero)
@@ -723,7 +725,6 @@ func (kcp *KCP) flush(ackOnly bool) {
 		kcp.snd_buf = append(kcp.snd_buf, newseg)
 		kcp.snd_nxt++
 		newSegsCount++
-		kcp.snd_queue[k].data = nil
 	}
 	if newSegsCount > 0 {
 		kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount)
@@ -738,9 +739,15 @@ func (kcp *KCP) flush(ackOnly bool) {
 	// check for retransmissions
 	current := currentMs()
 	var change, lost, lostSegs, fastRetransSegs, earlyRetransSegs uint64
-	for k := range kcp.snd_buf {
+	minrto := int32(kcp.interval)
-		segment := &kcp.snd_buf[k]
+	ref := kcp.snd_buf[:len(kcp.snd_buf)] // for bounds check elimination
+	for k := range ref {
+		segment := &ref[k]
 		needsend := false
+		if segment.acked == 1 {
+			continue
+		}
 		if segment.xmit == 0 { // initial transmit
 			needsend = true
 			segment.rto = kcp.rx_rto
@@ -772,6 +779,7 @@ func (kcp *KCP) flush(ackOnly bool) {
 		}
 		if needsend {
+			current = currentMs() // time update for a blocking call
 			segment.xmit++
 			segment.ts = current
 			segment.wnd = seg.wnd
@@ -782,7 +790,6 @@ func (kcp *KCP) flush(ackOnly bool) {
 			if size+need > int(kcp.mtu) {
 				kcp.output(buffer, size)
-				current = currentMs() // time update for a blocking call
 				ptr = buffer
 			}
@@ -794,6 +801,11 @@ func (kcp *KCP) flush(ackOnly bool) {
 				kcp.state = 0xFFFFFFFF
 			}
 		}
+		// get the nearest rto
+		if rto := _itimediff(segment.resendts, current); rto > 0 && rto < minrto {
+			minrto = rto
+		}
 	}
 	// flash remain segments
@@ -819,32 +831,37 @@ func (kcp *KCP) flush(ackOnly bool) {
 		atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
 	}
-	// update ssthresh
+	// cwnd update
-	// rate halving, https://tools.ietf.org/html/rfc6937
+	if kcp.nocwnd == 0 {
-	if change > 0 {
+		// update ssthresh
-		inflight := kcp.snd_nxt - kcp.snd_una
+		// rate halving, https://tools.ietf.org/html/rfc6937
-		kcp.ssthresh = inflight / 2
+		if change > 0 {
-		if kcp.ssthresh < IKCP_THRESH_MIN {
+			inflight := kcp.snd_nxt - kcp.snd_una
-			kcp.ssthresh = IKCP_THRESH_MIN
+			kcp.ssthresh = inflight / 2
+			if kcp.ssthresh < IKCP_THRESH_MIN {
+				kcp.ssthresh = IKCP_THRESH_MIN
+			}
+			kcp.cwnd = kcp.ssthresh + resent
+			kcp.incr = kcp.cwnd * kcp.mss
 		}
-		kcp.cwnd = kcp.ssthresh + resent
-		kcp.incr = kcp.cwnd * kcp.mss
-	}
-	// congestion control, https://tools.ietf.org/html/rfc5681
+		// congestion control, https://tools.ietf.org/html/rfc5681
-	if lost > 0 {
+		if lost > 0 {
-		kcp.ssthresh = cwnd / 2
+			kcp.ssthresh = cwnd / 2
-		if kcp.ssthresh < IKCP_THRESH_MIN {
+			if kcp.ssthresh < IKCP_THRESH_MIN {
-			kcp.ssthresh = IKCP_THRESH_MIN
+				kcp.ssthresh = IKCP_THRESH_MIN
+			}
+			kcp.cwnd = 1
+			kcp.incr = kcp.mss
 		}
-		kcp.cwnd = 1
-		kcp.incr = kcp.mss
-	}
-	if kcp.cwnd < 1 {
+		if kcp.cwnd < 1 {
-		kcp.cwnd = 1
+			kcp.cwnd = 1
-		kcp.incr = kcp.mss
+			kcp.incr = kcp.mss
+		}
 	}
+	return uint32(minrto)
 }
 // Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
@@ -991,8 +1008,5 @@ func (kcp *KCP) WaitSnd() int {
 // remove front n elements from queue
 func (kcp *KCP) remove_front(q []segment, n int) []segment {
 	newn := copy(q, q[n:])
-	for i := newn; i < len(q); i++ {
-		q[i] = segment{} // manual set nil for GC
-	}
 	return q[:newn]
 }
--- a/vendor/github.com/fatedier/kcp-go/sess.go
+++ b/vendor/github.com/fatedier/kcp-go/sess.go
--- a/vendor/github.com/fatedier/kcp-go/updater.go
+++ b/vendor/github.com/fatedier/kcp-go/updater.go
@@ -85,20 +85,19 @@ func (h *updateHeap) updateTask() {
 		h.mu.Lock()
 		hlen := h.Len()
-		now := time.Now()
 		for i := 0; i < hlen; i++ {
-			entry := heap.Pop(h).(entry)
+			entry := &h.entries[0]
-			if now.After(entry.ts) {
+			if time.Now().After(entry.ts) {
-				entry.ts = now.Add(entry.s.update())
+				interval := entry.s.update()
-				heap.Push(h, entry)
+				entry.ts = time.Now().Add(interval)
+				heap.Fix(h, 0)
 			} else {
-				heap.Push(h, entry)
 				break
 			}
 		}
 		if hlen > 0 {
-			timer = time.After(h.entries[0].ts.Sub(now))
+			timer = time.After(h.entries[0].ts.Sub(time.Now()))
 		}
 		h.mu.Unlock()
 	}

--- a/vendor/github.com/fatedier/kcp-go/xor.go
+++ b/vendor/github.com/fatedier/kcp-go/xor.go
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-package kcp
-import (
-	"runtime"
-	"unsafe"
-)
-const wordSize = int(unsafe.Sizeof(uintptr(0)))
-const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
-// fastXORBytes xors in bulk. It only works on architectures that
-// support unaligned read/writes.
-func fastXORBytes(dst, a, b []byte) int {
-	n := len(a)
-	if len(b) < n {
-		n = len(b)
-	}
-	w := n / wordSize
-	if w > 0 {
-		wordBytes := w * wordSize
-		fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
-	}
-	for i := (n - n%wordSize); i < n; i++ {
-		dst[i] = a[i] ^ b[i]
-	}
-	return n
-}
-func safeXORBytes(dst, a, b []byte) int {
-	n := len(a)
-	if len(b) < n {
-		n = len(b)
-	}
-	ex := n % 8
-	for i := 0; i < ex; i++ {
-		dst[i] = a[i] ^ b[i]
-	}
-	for i := ex; i < n; i += 8 {
-		_dst := dst[i : i+8]
-		_a := a[i : i+8]
-		_b := b[i : i+8]
-		_dst[0] = _a[0] ^ _b[0]
-		_dst[1] = _a[1] ^ _b[1]
-		_dst[2] = _a[2] ^ _b[2]
-		_dst[3] = _a[3] ^ _b[3]
-		_dst[4] = _a[4] ^ _b[4]
-		_dst[5] = _a[5] ^ _b[5]
-		_dst[6] = _a[6] ^ _b[6]
-		_dst[7] = _a[7] ^ _b[7]
-	}
-	return n
-}
-// xorBytes xors the bytes in a and b. The destination is assumed to have enough
-// space. Returns the number of bytes xor'd.
-func xorBytes(dst, a, b []byte) int {
-	if supportsUnaligned {
-		return fastXORBytes(dst, a, b)
-	}
-	// TODO(hanwen): if (dst, a, b) have common alignment
-	// we could still try fastXORBytes. It is not clear
-	// how often this happens, and it's only worth it if
-	// the block encryption itself is hardware
-	// accelerated.
-	return safeXORBytes(dst, a, b)
-}
-// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
-// The arguments are assumed to be of equal length.
-func fastXORWords(dst, a, b []byte) {
-	dw := *(*[]uintptr)(unsafe.Pointer(&dst))
-	aw := *(*[]uintptr)(unsafe.Pointer(&a))
-	bw := *(*[]uintptr)(unsafe.Pointer(&b))
-	n := len(b) / wordSize
-	ex := n % 8
-	for i := 0; i < ex; i++ {
-		dw[i] = aw[i] ^ bw[i]
-	}
-	for i := ex; i < n; i += 8 {
-		_dw := dw[i : i+8]
-		_aw := aw[i : i+8]
-		_bw := bw[i : i+8]
-		_dw[0] = _aw[0] ^ _bw[0]
-		_dw[1] = _aw[1] ^ _bw[1]
-		_dw[2] = _aw[2] ^ _bw[2]
-		_dw[3] = _aw[3] ^ _bw[3]
-		_dw[4] = _aw[4] ^ _bw[4]
-		_dw[5] = _aw[5] ^ _bw[5]
-		_dw[6] = _aw[6] ^ _bw[6]
-		_dw[7] = _aw[7] ^ _bw[7]
-	}
-}
-func xorWords(dst, a, b []byte) {
-	if supportsUnaligned {
-		fastXORWords(dst, a, b)
-	} else {
-		safeXORBytes(dst, a, b)
-	}
-}
--- a/vendor/github.com/templexxx/reedsolomon/.gitignore
+++ b/vendor/github.com/templexxx/reedsolomon/.gitignore
@@ -22,19 +22,3 @@ _testmain.go
 *.exe
 *.test
 *.prof
-/.idea
-/backup
-/loopunroll/
-cpu.out
-mathtool/galois/
-mathtool/matrix/
-mem.out
-/examples/
-/.DS_Store
-/mathtool/cntinverse
-/invert
-/bakcup
-/buf.svg
-*.svg
-*.out
-/escape
--- a/vendor/github.com/klauspost/cpuid/.travis.yml
+++ b/vendor/github.com/klauspost/cpuid/.travis.yml
+language: go
+sudo: false
+os:
+  - linux
+  - osx  
+go:
+  - 1.8.x
+  - 1.9.x
+  - 1.10.x
+  - master
+script: 
+ - go vet ./...
+ - go test -v ./...
+ - go test -race ./...
+ - diff <(gofmt -d .) <("") 
+matrix:
+  allow_failures:
+    - go: 'master'
+  fast_finish: true 
--- a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
+++ b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
+Developer Certificate of Origin
+Version 1.1
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+Developer's Certificate of Origin 1.1
+By making a contribution to this project, I certify that:
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
--- a/vendor/github.com/klauspost/cpuid/LICENSE
+++ b/vendor/github.com/klauspost/cpuid/LICENSE
+The MIT License (MIT)
+Copyright (c) 2015 Klaus Post
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/vendor/github.com/klauspost/cpuid/README.md
+++ b/vendor/github.com/klauspost/cpuid/README.md
+# cpuid
+Package cpuid provides information about the CPU running the current program.
+CPU features are detected on startup, and kept for fast access through the life of the application.
+Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
+You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+Package home: https://github.com/klauspost/cpuid
+[![GoDoc][1]][2] [![Build Status][3]][4]
+[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
+[2]: https://godoc.org/github.com/klauspost/cpuid
+[3]: https://travis-ci.org/klauspost/cpuid.svg
+[4]: https://travis-ci.org/klauspost/cpuid
+# features
+## CPU Instructions
+*  **CMOV** (i686 CMOV)
+*  **NX** (NX (No-Execute) bit)
+*  **AMD3DNOW** (AMD 3DNOW)
+*  **AMD3DNOWEXT** (AMD 3DNowExt)
+*  **MMX** (standard MMX)
+*  **MMXEXT** (SSE integer functions or AMD MMX ext)
+*  **SSE** (SSE functions)
+*  **SSE2** (P4 SSE functions)
+*  **SSE3** (Prescott SSE3 functions)
+*  **SSSE3** (Conroe SSSE3 functions)
+*  **SSE4** (Penryn SSE4.1 functions)
+*  **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
+*  **SSE42** (Nehalem SSE4.2 functions)
+*  **AVX** (AVX functions)
+*  **AVX2** (AVX2 functions)
+*  **FMA3** (Intel FMA 3)
+*  **FMA4** (Bulldozer FMA4 functions)
+*  **XOP** (Bulldozer XOP functions)
+*  **F16C** (Half-precision floating-point conversion)
+*  **BMI1** (Bit Manipulation Instruction Set 1)
+*  **BMI2** (Bit Manipulation Instruction Set 2)
+*  **TBM** (AMD Trailing Bit Manipulation)
+*  **LZCNT** (LZCNT instruction)
+*  **POPCNT** (POPCNT instruction)
+*  **AESNI** (Advanced Encryption Standard New Instructions)
+*  **CLMUL** (Carry-less Multiplication)
+*  **HTT** (Hyperthreading (enabled))
+*  **HLE** (Hardware Lock Elision)
+*  **RTM** (Restricted Transactional Memory)
+*  **RDRAND** (RDRAND instruction is available)
+*  **RDSEED** (RDSEED instruction is available)
+*  **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
+*  **SHA** (Intel SHA Extensions)
+*  **AVX512F** (AVX-512 Foundation)
+*  **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
+*  **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
+*  **AVX512PF** (AVX-512 Prefetch Instructions)
+*  **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
+*  **AVX512CD** (AVX-512 Conflict Detection Instructions)
+*  **AVX512BW** (AVX-512 Byte and Word Instructions)
+*  **AVX512VL** (AVX-512 Vector Length Extensions)
+*  **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
+*  **MPX** (Intel MPX (Memory Protection Extensions))
+*  **ERMS** (Enhanced REP MOVSB/STOSB)
+*  **RDTSCP** (RDTSCP Instruction)
+*  **CX16** (CMPXCHG16B Instruction)
+*  **SGX** (Software Guard Extensions, with activation details)
+## Performance
+*  **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
+*  **SSE2SLOW** (SSE2 is supported, but usually not faster)
+*  **SSE3SLOW** (SSE3 is supported, but usually not faster)
+*  **ATOM** (Atom processor, some SSSE3 instructions are slower)
+*  **Cache line** (Probable size of a cache line).
+*  **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
+## Cpu Vendor/VM
+* **Intel**
+* **AMD**
+* **VIA**
+* **Transmeta**
+* **NSC**
+* **KVM**  (Kernel-based Virtual Machine)
+* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
+* **VMware**
+* **XenHVM**
+# installing
+```go get github.com/klauspost/cpuid```
+# example
+```Go
+package main
+import (
+	"fmt"
+	"github.com/klauspost/cpuid"
+)
+func main() {
+	// Print basic CPU information:
+	fmt.Println("Name:", cpuid.CPU.BrandName)
+	fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
+	fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
+	fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
+	fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
+	fmt.Println("Features:", cpuid.CPU.Features)
+	fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
+	fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
+	fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
+	// Test if we have a specific feature:
+	if cpuid.CPU.SSE() {
+		fmt.Println("We have Streaming SIMD Extensions")
+	}
+}
+```
+Sample output:
+```
+>go run main.go
+Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
+PhysicalCores: 2
+ThreadsPerCore: 2
+LogicalCores: 4
+Family 6 Model: 42
+Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
+Cacheline bytes: 64
+We have Streaming SIMD Extensions
+```
+# private package
+In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
+For this purpose all exports are removed, and functions and constants are lowercased.
+This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
+# license
+This code is published under an MIT license. See LICENSE file for more information.
--- a/vendor/github.com/klauspost/cpuid/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/cpuid.go
--- a/vendor/github.com/klauspost/cpuid/cpuid_386.s
+++ b/vendor/github.com/klauspost/cpuid/cpuid_386.s
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+// +build 386,!gccgo
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORL CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+4(FP)
+	MOVL BX, ebx+8(FP)
+	MOVL CX, ecx+12(FP)
+	MOVL DX, edx+16(FP)
+	RET
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+// func xgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+4(FP)
+	MOVL DX, edx+8(FP)
+	RET
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
--- a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
+++ b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+//+build amd64,!gccgo
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORQ CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+// func asmXgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
--- a/vendor/github.com/klauspost/cpuid/detect_intel.go
+++ b/vendor/github.com/klauspost/cpuid/detect_intel.go
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+// +build 386,!gccgo amd64,!gccgo
+package cpuid
+func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+func asmXgetbv(index uint32) (eax, edx uint32)
+func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+func initCPU() {
+	cpuid = asmCpuid
+	cpuidex = asmCpuidex
+	xgetbv = asmXgetbv
+	rdtscpAsm = asmRdtscpAsm
+}
--- a/vendor/github.com/klauspost/cpuid/detect_ref.go
+++ b/vendor/github.com/klauspost/cpuid/detect_ref.go
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+// +build !amd64,!386 gccgo
+package cpuid
+func initCPU() {
+	cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+	cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+	xgetbv = func(index uint32) (eax, edx uint32) {
+		return 0, 0
+	}
+	rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+}
--- a/vendor/github.com/klauspost/cpuid/generate.go
+++ b/vendor/github.com/klauspost/cpuid/generate.go
+package cpuid
+//go:generate go run private-gen.go
+//go:generate gofmt -w ./private
--- a/vendor/github.com/klauspost/cpuid/private-gen.go
+++ b/vendor/github.com/klauspost/cpuid/private-gen.go
+// +build ignore
+package main
+import (
+	"bytes"
+	"fmt"
+	"go/ast"
+	"go/parser"
+	"go/printer"
+	"go/token"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"reflect"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+var inFiles = []string{"cpuid.go", "cpuid_test.go"}
+var copyFiles = []string{"cpuid_amd64.s", "cpuid_386.s", "detect_ref.go", "detect_intel.go"}
+var fileSet = token.NewFileSet()
+var reWrites = []rewrite{
+	initRewrite("CPUInfo -> cpuInfo"),
+	initRewrite("Vendor -> vendor"),
+	initRewrite("Flags -> flags"),
+	initRewrite("Detect -> detect"),
+	initRewrite("CPU -> cpu"),
+}
+var excludeNames = map[string]bool{"string": true, "join": true, "trim": true,
+	// cpuid_test.go
+	"t": true, "println": true, "logf": true, "log": true, "fatalf": true, "fatal": true,
+}
+var excludePrefixes = []string{"test", "benchmark"}
+func main() {
+	Package := "private"
+	parserMode := parser.ParseComments
+	exported := make(map[string]rewrite)
+	for _, file := range inFiles {
+		in, err := os.Open(file)
+		if err != nil {
+			log.Fatalf("opening input", err)
+		}
+		src, err := ioutil.ReadAll(in)
+		if err != nil {
+			log.Fatalf("reading input", err)
+		}
+		astfile, err := parser.ParseFile(fileSet, file, src, parserMode)
+		if err != nil {
+			log.Fatalf("parsing input", err)
+		}
+		for _, rw := range reWrites {
+			astfile = rw(astfile)
+		}
+		// Inspect the AST and print all identifiers and literals.
+		var startDecl token.Pos
+		var endDecl token.Pos
+		ast.Inspect(astfile, func(n ast.Node) bool {
+			var s string
+			switch x := n.(type) {
+			case *ast.Ident:
+				if x.IsExported() {
+					t := strings.ToLower(x.Name)
+					for _, pre := range excludePrefixes {
+						if strings.HasPrefix(t, pre) {
+							return true
+						}
+					}
+					if excludeNames[t] != true {
+						//if x.Pos() > startDecl && x.Pos() < endDecl {
+						exported[x.Name] = initRewrite(x.Name + " -> " + t)
+					}
+				}
+			case *ast.GenDecl:
+				if x.Tok == token.CONST && x.Lparen > 0 {
+					startDecl = x.Lparen
+					endDecl = x.Rparen
+					// fmt.Printf("Decl:%s -> %s\n", fileSet.Position(startDecl), fileSet.Position(endDecl))
+				}
+			}
+			if s != "" {
+				fmt.Printf("%s:\t%s\n", fileSet.Position(n.Pos()), s)
+			}
+			return true
+		})
+		for _, rw := range exported {
+			astfile = rw(astfile)
+		}
+		var buf bytes.Buffer
+		printer.Fprint(&buf, fileSet, astfile)
+		// Remove package documentation and insert information
+		s := buf.String()
+		ind := strings.Index(buf.String(), "\npackage cpuid")
+		s = s[ind:]
+		s = "// Generated, DO NOT EDIT,\n" +
+			"// but copy it to your own project and rename the package.\n" +
+			"// See more at http://github.com/klauspost/cpuid\n" +
+			s
+		outputName := Package + string(os.PathSeparator) + file
+		err = ioutil.WriteFile(outputName, []byte(s), 0644)
+		if err != nil {
+			log.Fatalf("writing output: %s", err)
+		}
+		log.Println("Generated", outputName)
+	}
+	for _, file := range copyFiles {
+		dst := ""
+		if strings.HasPrefix(file, "cpuid") {
+			dst = Package + string(os.PathSeparator) + file
+		} else {
+			dst = Package + string(os.PathSeparator) + "cpuid_" + file
+		}
+		err := copyFile(file, dst)
+		if err != nil {
+			log.Fatalf("copying file: %s", err)
+		}
+		log.Println("Copied", dst)
+	}
+}
+// CopyFile copies a file from src to dst. If src and dst files exist, and are
+// the same, then return success. Copy the file contents from src to dst.
+func copyFile(src, dst string) (err error) {
+	sfi, err := os.Stat(src)
+	if err != nil {
+		return
+	}
+	if !sfi.Mode().IsRegular() {
+		// cannot copy non-regular files (e.g., directories,
+		// symlinks, devices, etc.)
+		return fmt.Errorf("CopyFile: non-regular source file %s (%q)", sfi.Name(), sfi.Mode().String())
+	}
+	dfi, err := os.Stat(dst)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return
+		}
+	} else {
+		if !(dfi.Mode().IsRegular()) {
+			return fmt.Errorf("CopyFile: non-regular destination file %s (%q)", dfi.Name(), dfi.Mode().String())
+		}
+		if os.SameFile(sfi, dfi) {
+			return
+		}
+	}
+	err = copyFileContents(src, dst)
+	return
+}
+// copyFileContents copies the contents of the file named src to the file named
+// by dst. The file will be created if it does not already exist. If the
+// destination file exists, all it's contents will be replaced by the contents
+// of the source file.
+func copyFileContents(src, dst string) (err error) {
+	in, err := os.Open(src)
+	if err != nil {
+		return
+	}
+	defer in.Close()
+	out, err := os.Create(dst)
+	if err != nil {
+		return
+	}
+	defer func() {
+		cerr := out.Close()
+		if err == nil {
+			err = cerr
+		}
+	}()
+	if _, err = io.Copy(out, in); err != nil {
+		return
+	}
+	err = out.Sync()
+	return
+}
+type rewrite func(*ast.File) *ast.File
+// Mostly copied from gofmt
+func initRewrite(rewriteRule string) rewrite {
+	f := strings.Split(rewriteRule, "->")
+	if len(f) != 2 {
+		fmt.Fprintf(os.Stderr, "rewrite rule must be of the form 'pattern -> replacement'\n")
+		os.Exit(2)
+	}
+	pattern := parseExpr(f[0], "pattern")
+	replace := parseExpr(f[1], "replacement")
+	return func(p *ast.File) *ast.File { return rewriteFile(pattern, replace, p) }
+}
+// parseExpr parses s as an expression.
+// It might make sense to expand this to allow statement patterns,
+// but there are problems with preserving formatting and also
+// with what a wildcard for a statement looks like.
+func parseExpr(s, what string) ast.Expr {
+	x, err := parser.ParseExpr(s)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "parsing %s %s at %s\n", what, s, err)
+		os.Exit(2)
+	}
+	return x
+}
+// Keep this function for debugging.
+/*
+func dump(msg string, val reflect.Value) {
+	fmt.Printf("%s:\n", msg)
+	ast.Print(fileSet, val.Interface())
+	fmt.Println()
+}
+*/
+// rewriteFile applies the rewrite rule 'pattern -> replace' to an entire file.
+func rewriteFile(pattern, replace ast.Expr, p *ast.File) *ast.File {
+	cmap := ast.NewCommentMap(fileSet, p, p.Comments)
+	m := make(map[string]reflect.Value)
+	pat := reflect.ValueOf(pattern)
+	repl := reflect.ValueOf(replace)
+	var rewriteVal func(val reflect.Value) reflect.Value
+	rewriteVal = func(val reflect.Value) reflect.Value {
+		// don't bother if val is invalid to start with
+		if !val.IsValid() {
+			return reflect.Value{}
+		}
+		for k := range m {
+			delete(m, k)
+		}
+		val = apply(rewriteVal, val)
+		if match(m, pat, val) {
+			val = subst(m, repl, reflect.ValueOf(val.Interface().(ast.Node).Pos()))
+		}
+		return val
+	}
+	r := apply(rewriteVal, reflect.ValueOf(p)).Interface().(*ast.File)
+	r.Comments = cmap.Filter(r).Comments() // recreate comments list
+	return r
+}
+// set is a wrapper for x.Set(y); it protects the caller from panics if x cannot be changed to y.
+func set(x, y reflect.Value) {
+	// don't bother if x cannot be set or y is invalid
+	if !x.CanSet() || !y.IsValid() {
+		return
+	}
+	defer func() {
+		if x := recover(); x != nil {
+			if s, ok := x.(string); ok &&
+				(strings.Contains(s, "type mismatch") || strings.Contains(s, "not assignable")) {
+				// x cannot be set to y - ignore this rewrite
+				return
+			}
+			panic(x)
+		}
+	}()
+	x.Set(y)
+}
+// Values/types for special cases.
+var (
+	objectPtrNil = reflect.ValueOf((*ast.Object)(nil))
+	scopePtrNil  = reflect.ValueOf((*ast.Scope)(nil))
+	identType     = reflect.TypeOf((*ast.Ident)(nil))
+	objectPtrType = reflect.TypeOf((*ast.Object)(nil))
+	positionType  = reflect.TypeOf(token.NoPos)
+	callExprType  = reflect.TypeOf((*ast.CallExpr)(nil))
+	scopePtrType  = reflect.TypeOf((*ast.Scope)(nil))
+)
+// apply replaces each AST field x in val with f(x), returning val.
+// To avoid extra conversions, f operates on the reflect.Value form.
+func apply(f func(reflect.Value) reflect.Value, val reflect.Value) reflect.Value {
+	if !val.IsValid() {
+		return reflect.Value{}
+	}
+	// *ast.Objects introduce cycles and are likely incorrect after
+	// rewrite; don't follow them but replace with nil instead
+	if val.Type() == objectPtrType {
+		return objectPtrNil
+	}
+	// similarly for scopes: they are likely incorrect after a rewrite;
+	// replace them with nil
+	if val.Type() == scopePtrType {
+		return scopePtrNil
+	}
+	switch v := reflect.Indirect(val); v.Kind() {
+	case reflect.Slice:
+		for i := 0; i < v.Len(); i++ {
+			e := v.Index(i)
+			set(e, f(e))
+		}
+	case reflect.Struct:
+		for i := 0; i < v.NumField(); i++ {
+			e := v.Field(i)
+			set(e, f(e))
+		}
+	case reflect.Interface:
+		e := v.Elem()
+		set(v, f(e))
+	}
+	return val
+}
+func isWildcard(s string) bool {
+	rune, size := utf8.DecodeRuneInString(s)
+	return size == len(s) && unicode.IsLower(rune)
+}
+// match returns true if pattern matches val,
+// recording wildcard submatches in m.
+// If m == nil, match checks whether pattern == val.
+func match(m map[string]reflect.Value, pattern, val reflect.Value) bool {
+	// Wildcard matches any expression.  If it appears multiple
+	// times in the pattern, it must match the same expression
+	// each time.
+	if m != nil && pattern.IsValid() && pattern.Type() == identType {
+		name := pattern.Interface().(*ast.Ident).Name
+		if isWildcard(name) && val.IsValid() {
+			// wildcards only match valid (non-nil) expressions.
+			if _, ok := val.Interface().(ast.Expr); ok && !val.IsNil() {
+				if old, ok := m[name]; ok {
+					return match(nil, old, val)
+				}
+				m[name] = val
+				return true
+			}
+		}
+	}
+	// Otherwise, pattern and val must match recursively.
+	if !pattern.IsValid() || !val.IsValid() {
+		return !pattern.IsValid() && !val.IsValid()
+	}
+	if pattern.Type() != val.Type() {
+		return false
+	}
+	// Special cases.
+	switch pattern.Type() {
+	case identType:
+		// For identifiers, only the names need to match
+		// (and none of the other *ast.Object information).
+		// This is a common case, handle it all here instead
+		// of recursing down any further via reflection.
+		p := pattern.Interface().(*ast.Ident)
+		v := val.Interface().(*ast.Ident)
+		return p == nil && v == nil || p != nil && v != nil && p.Name == v.Name
+	case objectPtrType, positionType:
+		// object pointers and token positions always match
+		return true
+	case callExprType:
+		// For calls, the Ellipsis fields (token.Position) must
+		// match since that is how f(x) and f(x...) are different.
+		// Check them here but fall through for the remaining fields.
+		p := pattern.Interface().(*ast.CallExpr)
+		v := val.Interface().(*ast.CallExpr)
+		if p.Ellipsis.IsValid() != v.Ellipsis.IsValid() {
+			return false
+		}
+	}
+	p := reflect.Indirect(pattern)
+	v := reflect.Indirect(val)
+	if !p.IsValid() || !v.IsValid() {
+		return !p.IsValid() && !v.IsValid()
+	}
+	switch p.Kind() {
+	case reflect.Slice:
+		if p.Len() != v.Len() {
+			return false
+		}
+		for i := 0; i < p.Len(); i++ {
+			if !match(m, p.Index(i), v.Index(i)) {
+				return false
+			}
+		}
+		return true
+	case reflect.Struct:
+		for i := 0; i < p.NumField(); i++ {
+			if !match(m, p.Field(i), v.Field(i)) {
+				return false
+			}
+		}
+		return true
+	case reflect.Interface:
+		return match(m, p.Elem(), v.Elem())
+	}
+	// Handle token integers, etc.
+	return p.Interface() == v.Interface()
+}
+// subst returns a copy of pattern with values from m substituted in place
+// of wildcards and pos used as the position of tokens from the pattern.
+// if m == nil, subst returns a copy of pattern and doesn't change the line
+// number information.
+func subst(m map[string]reflect.Value, pattern reflect.Value, pos reflect.Value) reflect.Value {
+	if !pattern.IsValid() {
+		return reflect.Value{}
+	}
+	// Wildcard gets replaced with map value.
+	if m != nil && pattern.Type() == identType {
+		name := pattern.Interface().(*ast.Ident).Name
+		if isWildcard(name) {
+			if old, ok := m[name]; ok {
+				return subst(nil, old, reflect.Value{})
+			}
+		}
+	}
+	if pos.IsValid() && pattern.Type() == positionType {
+		// use new position only if old position was valid in the first place
+		if old := pattern.Interface().(token.Pos); !old.IsValid() {
+			return pattern
+		}
+		return pos
+	}
+	// Otherwise copy.
+	switch p := pattern; p.Kind() {
+	case reflect.Slice:
+		v := reflect.MakeSlice(p.Type(), p.Len(), p.Len())
+		for i := 0; i < p.Len(); i++ {
+			v.Index(i).Set(subst(m, p.Index(i), pos))
+		}
+		return v
+	case reflect.Struct:
+		v := reflect.New(p.Type()).Elem()
+		for i := 0; i < p.NumField(); i++ {
+			v.Field(i).Set(subst(m, p.Field(i), pos))
+		}
+		return v
+	case reflect.Ptr:
+		v := reflect.New(p.Type()).Elem()
+		if elem := p.Elem(); elem.IsValid() {
+			v.Set(subst(m, elem, pos).Addr())
+		}
+		return v
+	case reflect.Interface:
+		v := reflect.New(p.Type()).Elem()
+		if elem := p.Elem(); elem.IsValid() {
+			v.Set(subst(m, elem, pos))
+		}
+		return v
+	}
+	return pattern
+}
--- a/vendor/github.com/klauspost/reedsolomon/.gitignore
+++ b/vendor/github.com/klauspost/reedsolomon/.gitignore
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+# Folders
+_obj
+_test
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+_testmain.go
+*.exe
+*.test
+*.prof
+.idea
\ No newline at end of file
--- a/vendor/github.com/klauspost/reedsolomon/.travis.yml
+++ b/vendor/github.com/klauspost/reedsolomon/.travis.yml
+language: go
+sudo: false
+os:
+  - linux
+  - osx 
+go:
+  - 1.9.x
+  - 1.10.x
+  - 1.11.x
+  - 1.12.x
+  - master
+install:
+ - go get ./...
+script: 
+ - go vet ./...
+ - go test -v -cpu=1,2,4 .
+ - go test -v -cpu=1,2,4 -short -race .
+ - go test -tags=noasm -v -cpu=1,2,4 -short -race .
+ - go build examples/simple-decoder.go
+ - go build examples/simple-encoder.go
+ - go build examples/stream-decoder.go
+ - go build examples/stream-encoder.go
+ - diff <(gofmt -d .) <("")
+matrix:
+  allow_failures:
+    - go: 'master'
+  fast_finish: true
--- a/vendor/github.com/templexxx/reedsolomon/LICENSE
+++ b/vendor/github.com/templexxx/reedsolomon/LICENSE
-MIT License
+The MIT License (MIT)
-Copyright (c) 2017 Templexxx
 Copyright (c) 2015 Klaus Post
 Copyright (c) 2015 Backblaze
@@ -21,3 +20,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/vendor/github.com/klauspost/reedsolomon/README.md
+++ b/vendor/github.com/klauspost/reedsolomon/README.md
+# Reed-Solomon
+[![GoDoc][1]][2] [![Build Status][3]][4]
+[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg
+[2]: https://godoc.org/github.com/klauspost/reedsolomon
+[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
+[4]: https://travis-ci.org/klauspost/reedsolomon
+Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
+This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by [Backblaze](http://backblaze.com), with some additional optimizations.
+For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
+Package home: https://github.com/klauspost/reedsolomon
+Godoc: https://godoc.org/github.com/klauspost/reedsolomon
+# Installation
+To get the package use the standard:
+```bash
+go get -u github.com/klauspost/reedsolomon
+```
+# Changes
+## March 6, 2019
+The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
+## February 8, 2019
+AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2. See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
+## December 18, 2018
+Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
+## November 18, 2017
+Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
+## October 1, 2017
+* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option. Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
+* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines) has been increased for better multi-core scaling.
+* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to be used instead of allocating new memory.
+## August 26, 2017
+*  The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update` function contributed by [chenzhongtao](https://github.com/chenzhongtao).
+* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly, which gives a huge performance boost on this platform.
+## July 20, 2017
+`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface. This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
+```Go
+func (e *YourEnc) ReconstructData(shards [][]byte) error {
+	return ReconstructData(shards)
+}
+```
+You can of course also do your own implementation. The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder) handles this without modifying the interface. This is a good lesson on why returning interfaces is not a good design.
+# Usage
+This section assumes you know the basics of Reed-Solomon encoding. A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
+This package performs the calculation of the parity sets. The usage is therefore relatively simple.
+First of all, you need to choose your distribution of data and parity shards. A 'good' distribution is very subjective, and will depend a lot on your usage scenario. A good starting point is above 5 and below 257 data shards (the maximum supported number), and the number of parity shards to be 2 or above, and below the number of data shards.
+To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
+```Go
+    enc, err := reedsolomon.New(10, 3)
+```
+This encoder will work for all parity sets with this distribution of data and parity shards. The error will only be set if you specify 0 or negative values in any of the parameters, or if you specify more than 256 data shards.
+The you send and receive data  is a simple slice of byte slices; `[][]byte`. In the example above, the top slice must have a length of 13.
+```Go
+    data := make([][]byte, 13)
+```
+You should then fill the 10 first slices with *equally sized* data, and create parity shards that will be populated with parity data. In this case we create the data in memory, but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
+```Go
+    // Create all shards, size them at 50000 each
+    for i := range input {
+      data[i] := make([]byte, 50000)
+    }
+  // Fill some data into the data shards
+    for i, in := range data[:10] {
+      for j:= range in {
+         in[j] = byte((i+j)&0xff)
+      }
+    }
+```
+To populate the parity shards, you simply call `Encode()` with your data.
+```Go
+    err = enc.Encode(data)
+```
+The only cases where you should get an error is, if the data shards aren't of equal size. The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
+```Go
+    ok, err = enc.Verify(data)
+```
+The final (and important) part is to be able to reconstruct missing shards. For this to work, you need to know which parts of your data is missing. The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario, you need to implement a hash check for each shard. If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
+To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    // Reconstruct the missing shards
+    err := enc.Reconstruct(data)
+```
+The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
+If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    // Reconstruct just the missing data shards
+    err := enc.ReconstructData(data)
+```
+So to sum up reconstruction:
+* The number of data/parity shards must match the numbers used for encoding.
+* The order of shards must be the same as used when encoding.
+* You may only supply data you know is valid.
+* Invalid shards should be set to nil.
+For complete examples of an encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+# Splitting/Joining Data
+You might have a large slice of data. To help you split this, there are some helper functions that can split and join a single byte slice.
+```Go
+   bigfile, _ := ioutil.Readfile("myfile.data")
+   // Split the file
+   split, err := enc.Split(bigfile)
+```
+This will split the file into the number of data shards set when creating the encoder and create empty parity shards. 
+An important thing to note is that you have to *keep track of the exact input size*. If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
+To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply: 
+```Go
+   // Join a data set and write it to io.Discard.
+   err = enc.Join(io.Discard, data, len(bigfile))
+```
+# Streaming/Merging
+It might seem like a limitation that all data should be in memory, but an important property is that *as long as the number of data/parity shards are the same, you can merge/split data sets*, and they will remain valid as a separate set.
+```Go
+    // Split the data set of 50000 elements into two of 25000
+    splitA := make([][]byte, 13)
+    splitB := make([][]byte, 13)
+    // Merge into a 100000 element set
+    merged := make([][]byte, 13)
+    for i := range data {
+      splitA[i] = data[i][:25000]
+      splitB[i] = data[i][25000:]
+      // Concatenate it to itself
+	  merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
+	  merged[i] = append(merged[i], data[i]...)
+    }
+    // Each part should still verify as ok.
+    ok, err := enc.Verify(splitA)
+    if ok && err == nil {
+        log.Println("splitA ok")
+    }
+    ok, err = enc.Verify(splitB)
+    if ok && err == nil {
+        log.Println("splitB ok")
+    }
+    ok, err = enc.Verify(merge)
+    if ok && err == nil {
+        log.Println("merge ok")
+    }
+```
+This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks. For the best throughput, don't use too small blocks.
+This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data. This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
+# Streaming API
+There has been added support for a streaming API, to help perform fully streaming operations, which enables you to do the same operations, but on streams. To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function to create the encoding/decoding interfaces. You can use [`NewStreamC`](https://godoc.org/github.com/klauspost/reedsolomon#NewStreamC) to ready an interface that reads/writes concurrently from the streams.
+Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API. Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API. 
+If an error occurs in relation to a stream, a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError) or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError) will help you determine which stream was the offender.
+There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
+For complete examples of a streaming encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+# Advanced Options
+You can modify internal options which affects how jobs are split between and processed by goroutines.
+To create options, use the WithXXX functions. You can supply options to `New`, `NewStream` and `NewStreamC`. If no Options are supplied, default options are used.
+Example of how to supply options:
+ ```Go
+     enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
+ ```
+# Performance
+Performance depends mainly on the number of parity shards. In rough terms, doubling the number of parity shards will double the encoding time.
+Here are the throughput numbers with some different selections of data and parity shards. For reference each shard is 1MB random data, and 2 CPU cores are used for encoding.
+| Data | Parity | Parity | MB/s   | SSSE3 MB/s  | SSSE3 Speed | Rel. Speed |
+|------|--------|--------|--------|-------------|-------------|------------|
+| 5    | 2      | 40%    | 576,11 | 2599,2      | 451%        | 100,00%    |
+| 10   | 2      | 20%    | 587,73 | 3100,28     | 528%        | 102,02%    |
+| 10   | 4      | 40%    | 298,38 | 2470,97     | 828%        | 51,79%     |
+| 50   | 20     | 40%    | 59,81  | 713,28      | 1193%       | 10,38%     |
+If `runtime.GOMAXPROCS()` is set to a value higher than 1, the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
+Example of performance scaling on Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz - 4 physical cores, 8 logical cores. The example uses 10 blocks with 16MB data each and 4 parity blocks.
+| Threads | MB/s    | Speed |
+|---------|---------|-------|
+| 1       | 1355,11 | 100%  |
+| 2       | 2339,78 | 172%  |
+| 4       | 3179,33 | 235%  |
+| 8       | 4346,18 | 321%  |
+Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
+```
+benchmark                            all MB/s     data MB/s    speedup
+BenchmarkReconstruct10x2x10000-8     2011.67      10530.10     5.23x
+BenchmarkReconstruct50x5x50000-8     4585.41      14301.60     3.12x
+BenchmarkReconstruct10x2x1M-8        8081.15      28216.41     3.49x
+BenchmarkReconstruct5x2x1M-8         5780.07      28015.37     4.85x
+BenchmarkReconstruct10x4x1M-8        4352.56      14367.61     3.30x
+BenchmarkReconstruct50x20x1M-8       1364.35      4189.79      3.07x
+BenchmarkReconstruct10x4x16M-8       1484.35      5779.53      3.89x
+```
+# Performance on AVX512
+The performance on AVX512 has been accelerated for Intel CPUs. This gives speedups on a per-core basis of up to 4x compared to AVX2 as can be seen in the following table:
+```
+$ benchcmp avx2.txt avx512.txt
+benchmark                      AVX2 MB/s    AVX512 MB/s   speedup
+BenchmarkEncode8x8x1M-72       1681.35      4125.64       2.45x
+BenchmarkEncode8x4x8M-72       1529.36      5507.97       3.60x
+BenchmarkEncode8x8x8M-72        791.16      2952.29       3.73x
+BenchmarkEncode8x8x32M-72       573.26      2168.61       3.78x
+BenchmarkEncode12x4x12M-72     1234.41      4912.37       3.98x
+BenchmarkEncode16x4x16M-72     1189.59      5138.01       4.32x
+BenchmarkEncode24x8x24M-72      690.68      2583.70       3.74x
+BenchmarkEncode24x8x48M-72      674.20      2643.31       3.92x
+```
+This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other. In doing so it is possible to minimize the memory bandwidth required for loading all data shards. At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM registers (32 in total) is used to keep more data around (most notably the matrix coefficients).
+# Performance on ARM64 NEON
+By exploiting NEON instructions the performance for ARM has been accelerated. Below are the performance numbers for a single core on an ARM Cortex-A53 CPU @ 1.2GHz (Debian 8.0 Jessie running Go: 1.7.4):
+| Data | Parity | Parity | ARM64 Go MB/s | ARM64 NEON MB/s | NEON Speed |
+|------|--------|--------|--------------:|----------------:|-----------:|
+| 5    | 2      | 40%    |           189 |            1304 |       588% |
+| 10   | 2      | 20%    |           188 |            1738 |       925% |
+| 10   | 4      | 40%    |            96 |             839 |       877% |
+# Performance on ppc64le
+The performance for ppc64le has been accelerated. This gives roughly a 10x performance improvement on this architecture as can been seen below:
+```
+benchmark                      old MB/s     new MB/s     speedup
+BenchmarkGalois128K-160        948.87       8878.85      9.36x
+BenchmarkGalois1M-160          968.85       9041.92      9.33x
+BenchmarkGaloisXor128K-160     862.02       7905.00      9.17x
+BenchmarkGaloisXor1M-160       784.60       6296.65      8.03x
+```
+# asm2plan9s
+[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
+# Links
+* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
+* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
+* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
+* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
+* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
+* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
+* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
+* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
+# License
+This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
--- a/vendor/github.com/klauspost/reedsolomon/appveyor.yml
+++ b/vendor/github.com/klauspost/reedsolomon/appveyor.yml
+os: Visual Studio 2015
+platform: x64
+clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
+# environment variables
+environment:
+  GOPATH: c:\gopath
+install:
+  - echo %PATH%
+  - echo %GOPATH%
+  - go version
+  - go env
+  - go get -d ./...
+build_script:
+  - go test -v -cpu=2 ./...
+  - go test -cpu=1,2,4 -short -race ./...
--- a/vendor/github.com/klauspost/reedsolomon/galois.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois.go
--- a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+// Copyright 2015, Klaus Post, see LICENSE for details.
+package reedsolomon
+//go:noescape
+func galMulSSSE3(low, high, in, out []byte)
+//go:noescape
+func galMulSSSE3Xor(low, high, in, out []byte)
+//go:noescape
+func galMulAVX2Xor(low, high, in, out []byte)
+//go:noescape
+func galMulAVX2(low, high, in, out []byte)
+//go:noescape
+func sSE2XorSlice(in, out []byte)
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulSSSE3(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] = low[l] ^ high[h]
+	}
+}
+func galMulSSSE3Xor(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] ^= low[l] ^ high[h]
+	}
+}
+*/
+func galMulSlice(c byte, in, out []byte, o *options) {
+	var done int
+	if o.useAVX2 {
+		galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 5) << 5
+	} else if o.useSSSE3 {
+		galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 4) << 4
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] = mt[in[i]]
+		}
+	}
+}
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	var done int
+	if o.useAVX2 {
+		galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 5) << 5
+	} else if o.useSSSE3 {
+		galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 4) << 4
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] ^= mt[in[i]]
+		}
+	}
+}
+// slice galois add
+func sliceXor(in, out []byte, sse2 bool) {
+	var done int
+	if sse2 {
+		sSE2XorSlice(in, out)
+		done = (len(in) >> 4) << 4
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		for i := done; i < len(in); i++ {
+			out[i] ^= in[i]
+		}
+	}
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
--- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
+//+build !amd64 noasm appengine gccgo
+//+build !arm64 noasm appengine gccgo
+//+build !ppc64le noasm appengine gccgo
+// Copyright 2015, Klaus Post, see LICENSE for details.
+package reedsolomon
+func galMulSlice(c byte, in, out []byte, o *options) {
+	mt := mulTable[c][:256]
+	out = out[:len(in)]
+	for n, input := range in {
+		out[n] = mt[input]
+	}
+}
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	mt := mulTable[c][:256]
+	out = out[:len(in)]
+	for n, input := range in {
+		out[n] ^= mt[input]
+	}
+}
+// slice galois add
+func sliceXor(in, out []byte, sse2 bool) {
+	for n, input := range in {
+		out[n] ^= input
+	}
+}
+func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+	panic("unreachable")
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
--- a/vendor/github.com/klauspost/reedsolomon/gentables.go
+++ b/vendor/github.com/klauspost/reedsolomon/gentables.go
--- a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
+++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
--- a/vendor/github.com/klauspost/reedsolomon/matrix.go
+++ b/vendor/github.com/klauspost/reedsolomon/matrix.go
--- a/vendor/github.com/klauspost/reedsolomon/options.go
+++ b/vendor/github.com/klauspost/reedsolomon/options.go
--- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
--- a/vendor/github.com/klauspost/reedsolomon/streaming.go
+++ b/vendor/github.com/klauspost/reedsolomon/streaming.go
--- a/vendor/github.com/templexxx/reedsolomon/.travis.yml
+++ b/vendor/github.com/templexxx/reedsolomon/.travis.yml
-language: go
-go:
-    - 1.9
-install:
-    - go get github.com/templexxx/reedsolomon
-script:
-    - go test -v
--- a/vendor/github.com/templexxx/reedsolomon/README.md
+++ b/vendor/github.com/templexxx/reedsolomon/README.md
--- a/vendor/github.com/templexxx/reedsolomon/matrix.go
+++ b/vendor/github.com/templexxx/reedsolomon/matrix.go
--- a/vendor/github.com/templexxx/reedsolomon/rs.go
+++ b/vendor/github.com/templexxx/reedsolomon/rs.go
--- a/vendor/github.com/templexxx/reedsolomon/rs_amd64.go
+++ b/vendor/github.com/templexxx/reedsolomon/rs_amd64.go
--- a/vendor/github.com/templexxx/reedsolomon/rs_amd64.s
+++ b/vendor/github.com/templexxx/reedsolomon/rs_amd64.s
--- a/vendor/github.com/templexxx/reedsolomon/rs_other.go
+++ b/vendor/github.com/templexxx/reedsolomon/rs_other.go
--- a/vendor/github.com/templexxx/reedsolomon/tbl.go
+++ b/vendor/github.com/templexxx/reedsolomon/tbl.go
--- a/vendor/golang.org/x/net/ipv6/batch.go
+++ b/vendor/golang.org/x/net/ipv6/batch.go
--- a/vendor/golang.org/x/net/ipv6/control.go
+++ b/vendor/golang.org/x/net/ipv6/control.go
--- a/vendor/golang.org/x/net/ipv6/control_rfc2292_unix.go
+++ b/vendor/golang.org/x/net/ipv6/control_rfc2292_unix.go
--- a/vendor/golang.org/x/net/ipv6/control_rfc3542_unix.go
+++ b/vendor/golang.org/x/net/ipv6/control_rfc3542_unix.go
--- a/vendor/golang.org/x/net/ipv6/control_stub.go
+++ b/vendor/golang.org/x/net/ipv6/control_stub.go
--- a/vendor/golang.org/x/net/ipv6/control_unix.go
+++ b/vendor/golang.org/x/net/ipv6/control_unix.go
--- a/vendor/golang.org/x/net/ipv6/control_windows.go
+++ b/vendor/golang.org/x/net/ipv6/control_windows.go
--- a/vendor/golang.org/x/net/ipv6/defs_darwin.go
+++ b/vendor/golang.org/x/net/ipv6/defs_darwin.go
--- a/vendor/golang.org/x/net/ipv6/defs_dragonfly.go
+++ b/vendor/golang.org/x/net/ipv6/defs_dragonfly.go
--- a/vendor/golang.org/x/net/ipv6/defs_freebsd.go
+++ b/vendor/golang.org/x/net/ipv6/defs_freebsd.go
--- a/vendor/golang.org/x/net/ipv6/defs_linux.go
+++ b/vendor/golang.org/x/net/ipv6/defs_linux.go
--- a/vendor/golang.org/x/net/ipv6/defs_netbsd.go
+++ b/vendor/golang.org/x/net/ipv6/defs_netbsd.go
--- a/vendor/golang.org/x/net/ipv6/defs_openbsd.go
+++ b/vendor/golang.org/x/net/ipv6/defs_openbsd.go
--- a/vendor/golang.org/x/net/ipv6/defs_solaris.go
+++ b/vendor/golang.org/x/net/ipv6/defs_solaris.go
--- a/vendor/golang.org/x/net/ipv6/dgramopt.go
+++ b/vendor/golang.org/x/net/ipv6/dgramopt.go
--- a/vendor/golang.org/x/net/ipv6/doc.go
+++ b/vendor/golang.org/x/net/ipv6/doc.go
--- a/vendor/golang.org/x/net/ipv6/endpoint.go
+++ b/vendor/golang.org/x/net/ipv6/endpoint.go
--- a/vendor/golang.org/x/net/ipv6/gen.go
+++ b/vendor/golang.org/x/net/ipv6/gen.go
--- a/vendor/golang.org/x/net/ipv6/genericopt.go
+++ b/vendor/golang.org/x/net/ipv6/genericopt.go
--- a/vendor/golang.org/x/net/ipv6/header.go
+++ b/vendor/golang.org/x/net/ipv6/header.go
--- a/vendor/golang.org/x/net/ipv6/helper.go
+++ b/vendor/golang.org/x/net/ipv6/helper.go
--- a/vendor/golang.org/x/net/ipv6/iana.go
+++ b/vendor/golang.org/x/net/ipv6/iana.go
--- a/vendor/golang.org/x/net/ipv6/icmp.go
+++ b/vendor/golang.org/x/net/ipv6/icmp.go
--- a/vendor/golang.org/x/net/ipv6/icmp_bsd.go
+++ b/vendor/golang.org/x/net/ipv6/icmp_bsd.go
--- a/vendor/golang.org/x/net/ipv6/icmp_linux.go
+++ b/vendor/golang.org/x/net/ipv6/icmp_linux.go
--- a/vendor/golang.org/x/net/ipv6/icmp_solaris.go
+++ b/vendor/golang.org/x/net/ipv6/icmp_solaris.go
--- a/vendor/golang.org/x/net/ipv6/icmp_stub.go
+++ b/vendor/golang.org/x/net/ipv6/icmp_stub.go
--- a/vendor/golang.org/x/net/ipv6/icmp_windows.go
+++ b/vendor/golang.org/x/net/ipv6/icmp_windows.go
--- a/vendor/golang.org/x/net/ipv6/payload.go
+++ b/vendor/golang.org/x/net/ipv6/payload.go
--- a/vendor/golang.org/x/net/ipv6/payload_cmsg.go
+++ b/vendor/golang.org/x/net/ipv6/payload_cmsg.go
--- a/vendor/golang.org/x/net/ipv6/payload_cmsg_go1_8.go
+++ b/vendor/golang.org/x/net/ipv6/payload_cmsg_go1_8.go
--- a/vendor/golang.org/x/net/ipv6/payload_cmsg_go1_9.go
+++ b/vendor/golang.org/x/net/ipv6/payload_cmsg_go1_9.go
--- a/vendor/golang.org/x/net/ipv6/payload_nocmsg.go
+++ b/vendor/golang.org/x/net/ipv6/payload_nocmsg.go
--- a/vendor/golang.org/x/net/ipv6/sockopt.go
+++ b/vendor/golang.org/x/net/ipv6/sockopt.go
--- a/vendor/golang.org/x/net/ipv6/sockopt_posix.go
+++ b/vendor/golang.org/x/net/ipv6/sockopt_posix.go
--- a/vendor/golang.org/x/net/ipv6/sockopt_stub.go
+++ b/vendor/golang.org/x/net/ipv6/sockopt_stub.go
--- a/vendor/golang.org/x/net/ipv6/sys_asmreq.go
+++ b/vendor/golang.org/x/net/ipv6/sys_asmreq.go
--- a/vendor/golang.org/x/net/ipv6/sys_asmreq_stub.go
+++ b/vendor/golang.org/x/net/ipv6/sys_asmreq_stub.go
--- a/vendor/golang.org/x/net/ipv6/sys_bpf.go
+++ b/vendor/golang.org/x/net/ipv6/sys_bpf.go
--- a/vendor/golang.org/x/net/ipv6/sys_bpf_stub.go
+++ b/vendor/golang.org/x/net/ipv6/sys_bpf_stub.go
--- a/vendor/golang.org/x/net/ipv6/sys_bsd.go
+++ b/vendor/golang.org/x/net/ipv6/sys_bsd.go
--- a/vendor/golang.org/x/net/ipv6/sys_darwin.go
+++ b/vendor/golang.org/x/net/ipv6/sys_darwin.go
--- a/vendor/golang.org/x/net/ipv6/sys_freebsd.go
+++ b/vendor/golang.org/x/net/ipv6/sys_freebsd.go
--- a/vendor/golang.org/x/net/ipv6/sys_linux.go
+++ b/vendor/golang.org/x/net/ipv6/sys_linux.go
--- a/vendor/golang.org/x/net/ipv6/sys_solaris.go
+++ b/vendor/golang.org/x/net/ipv6/sys_solaris.go
--- a/vendor/golang.org/x/net/ipv6/sys_ssmreq.go
+++ b/vendor/golang.org/x/net/ipv6/sys_ssmreq.go
--- a/vendor/golang.org/x/net/ipv6/sys_ssmreq_stub.go
+++ b/vendor/golang.org/x/net/ipv6/sys_ssmreq_stub.go
--- a/vendor/golang.org/x/net/ipv6/sys_stub.go
+++ b/vendor/golang.org/x/net/ipv6/sys_stub.go
--- a/vendor/golang.org/x/net/ipv6/sys_windows.go
+++ b/vendor/golang.org/x/net/ipv6/sys_windows.go
--- a/vendor/golang.org/x/net/ipv6/zsys_darwin.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_darwin.go
--- a/vendor/golang.org/x/net/ipv6/zsys_dragonfly.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_dragonfly.go
--- a/vendor/golang.org/x/net/ipv6/zsys_freebsd_386.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_freebsd_386.go
--- a/vendor/golang.org/x/net/ipv6/zsys_freebsd_amd64.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_freebsd_amd64.go
--- a/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_freebsd_arm.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_386.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_386.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_amd64.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_amd64.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_arm.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_arm.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_arm64.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_arm64.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mips.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_mips.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mips64.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_mips64.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mips64le.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_mips64le.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_mipsle.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_mipsle.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_ppc.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64le.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_ppc64le.go
--- a/vendor/golang.org/x/net/ipv6/zsys_linux_s390x.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_linux_s390x.go
--- a/vendor/golang.org/x/net/ipv6/zsys_netbsd.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_netbsd.go
--- a/vendor/golang.org/x/net/ipv6/zsys_openbsd.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_openbsd.go
--- a/vendor/golang.org/x/net/ipv6/zsys_solaris.go
+++ b/vendor/golang.org/x/net/ipv6/zsys_solaris.go
--- a/vendor/modules.txt
+++ b/vendor/modules.txt