未验证 提交 6036ef11 编写于 作者: D Davies Liu 提交者: GitHub

Export metrics to Prometheus (#181)

* Export metrics to Prometheus

* ignore error

* listen in background

* add metrics for request to object store

* metrics for open files/dirs

* fix lint
上级 4ff517c6
......@@ -31,17 +31,68 @@ import (
"github.com/google/gops/agent"
"github.com/juicedata/godaemon"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/urfave/cli/v2"
"github.com/juicedata/juicefs/pkg/chunk"
"github.com/juicedata/juicefs/pkg/fuse"
"github.com/juicedata/juicefs/pkg/meta"
"github.com/juicedata/juicefs/pkg/object"
"github.com/juicedata/juicefs/pkg/usage"
"github.com/juicedata/juicefs/pkg/utils"
"github.com/juicedata/juicefs/pkg/version"
"github.com/juicedata/juicefs/pkg/vfs"
)
var (
cpu = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "cpu_usage",
Help: "Accumulated CPU usage in seconds.",
})
memory = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "memory",
Help: "Used memory in bytes.",
})
uptime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "uptime",
Help: "Total running time in seconds.",
})
usedSpace = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "used_space",
Help: "Total used space in bytes.",
})
usedInodes = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "used_inodes",
Help: "Total number of inodes.",
})
)
func updateMetrics(m meta.Meta) {
prometheus.MustRegister(cpu)
prometheus.MustRegister(memory)
prometheus.MustRegister(uptime)
prometheus.MustRegister(usedSpace)
prometheus.MustRegister(usedInodes)
ctx := meta.Background
start := time.Now()
for {
uptime.Set(time.Since(start).Seconds())
ru := utils.GetRusage()
cpu.Set(ru.GetStime() + ru.GetUtime())
_, rss := utils.MemoryUsage()
memory.Set(float64(rss))
var totalSpace, availSpace, iused, iavail uint64
err := m.StatFS(ctx, &totalSpace, &availSpace, &iused, &iavail)
if err == 0 {
usedSpace.Set(float64(totalSpace - availSpace))
usedInodes.Set(float64(iused))
}
time.Sleep(time.Second * 5)
}
}
func makeDaemon(onExit func(int) error) error {
_, _, err := godaemon.MakeDaemon(&godaemon.DaemonAttr{OnExit: onExit})
return err
......@@ -140,8 +191,9 @@ func mount(c *cli.Context) error {
logger.Fatalf("object storage: %s", err)
}
logger.Infof("Data use %s", blob)
logger.Infof("Mounting volume %s at %s ...", format.Name, mp)
blob = object.WithMetrics(blob)
logger.Infof("Mounting volume %s at %s ...", format.Name, mp)
if c.Bool("background") && os.Getenv("JFS_FOREGROUND") == "" {
err := makeDaemon(func(stage int) error {
if stage != 0 {
......@@ -188,6 +240,22 @@ func mount(c *cli.Context) error {
vfs.Init(conf, m, store)
installHandler(mp)
go updateMetrics(m)
http.Handle("/metrics", promhttp.HandlerFor(
prometheus.DefaultGatherer,
promhttp.HandlerOpts{
// Opt into OpenMetrics to support exemplars.
EnableOpenMetrics: true,
},
))
prometheus.MustRegister(prometheus.NewBuildInfoCollector())
go func() {
err = http.ListenAndServe(c.String("metrics"), nil)
if err != nil {
logger.Errorf("listen and serve for metrics: %s", err)
}
}()
if !c.Bool("no-usage-report") {
go usage.ReportUsage(m, version.Version())
}
......@@ -303,6 +371,11 @@ func mountFlags() *cli.Command {
Usage: "cache only random/small read",
},
&cli.StringFlag{
Name: "metrics",
Value: ":9567",
Usage: "address to export metrics",
},
&cli.BoolFlag{
Name: "no-usage-report",
Usage: "do not send usage report",
......
......@@ -11,7 +11,7 @@ require (
github.com/IBM/ibm-cos-sdk-go v1.6.0
github.com/NetEase-Object-Storage/nos-golang-sdk v0.0.0-20171031020902-cc8892cb2b05
github.com/aliyun/aliyun-oss-go-sdk v2.1.0+incompatible
github.com/aws/aws-sdk-go v1.12.10
github.com/aws/aws-sdk-go v1.27.0
github.com/baidubce/bce-sdk-go v0.0.0-20180401121131-aa0c7bd66b01
github.com/ceph/go-ceph v0.4.0
github.com/colinmarc/hdfs/v2 v2.2.0
......@@ -33,6 +33,7 @@ require (
github.com/pengsrc/go-shared v0.2.0 // indirect
github.com/pkg/errors v0.9.1
github.com/pkg/sftp v1.10.0
github.com/prometheus/client_golang v1.9.0
github.com/qiniu/api.v7/v7 v7.8.0
github.com/satori/uuid v1.2.0 // indirect
github.com/sirupsen/logrus v1.7.0
......@@ -42,6 +43,6 @@ require (
github.com/yunify/qingstor-sdk-go v2.2.15+incompatible
golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9
golang.org/x/oauth2 v0.0.0-20190517181255-950ef44c6e07
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f
golang.org/x/sys v0.0.0-20201214210602-f9fddec55a1e
google.golang.org/api v0.5.0
)
此差异已折叠。
......@@ -560,11 +560,17 @@ func (r *redisMeta) txn(ctx Context, txf func(tx *redis.Tx) error, keys ...strin
var khash = fnv.New32()
_, _ = khash.Write([]byte(keys[0]))
l := &r.txlocks[int(khash.Sum32())%len(r.txlocks)]
start := time.Now()
defer func() {
used := time.Since(start)
redisTxDist.Observe(used.Seconds())
}()
l.Lock()
defer l.Unlock()
for i := 0; i < 50; i++ {
err = r.rdb.Watch(ctx, txf, keys...)
if err == redis.TxFailedErr {
redisTxRestart.Add(1)
time.Sleep(time.Microsecond * 100 * time.Duration(rand.Int()%(i+1)))
continue
}
......
/*
* JuiceFS, Copyright (C) 2021 Juicedata, Inc.
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package meta
import "github.com/prometheus/client_golang/prometheus"
var (
redisTxDist = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "redis_tx_durations_histogram_seconds",
Help: "Redis transactions latency distributions.",
Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30),
})
redisTxRestart = prometheus.NewCounter(prometheus.CounterOpts{
Name: "redis_transaction_restart",
Help: "The number of times a Redis transaction is restarted.",
})
)
func init() {
prometheus.MustRegister(redisTxDist)
prometheus.MustRegister(redisTxRestart)
}
/*
* JuiceFS, Copyright (C) 2021 Juicedata, Inc.
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package object
import (
"io"
"time"
"github.com/prometheus/client_golang/prometheus"
)
var (
reqsHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "object_request_durations_histogram_seconds",
Help: "Object requests latency distributions.",
Buckets: prometheus.ExponentialBuckets(0.01, 1.5, 20),
}, []string{"method"})
reqErrors = prometheus.NewCounter(prometheus.CounterOpts{
Name: "object_request_errors",
Help: "failed requests to object store",
})
)
type withMetrics struct {
ObjectStorage
}
// WithMetrics retuns a object storage that exposes metrics of requests.
func WithMetrics(os ObjectStorage) ObjectStorage {
return &withMetrics{os}
}
func (p *withMetrics) track(method string, fn func() error) error {
start := time.Now()
err := fn()
used := time.Since(start)
reqsHistogram.WithLabelValues(method).Observe(used.Seconds())
if err != nil {
reqErrors.Add(1)
}
return err
}
func (p *withMetrics) Head(key string) (obj *Object, err error) {
err = p.track("HEAD", func() error {
obj, err = p.ObjectStorage.Head(key)
return err
})
return
}
func (p *withMetrics) Get(key string, off, limit int64) (r io.ReadCloser, err error) {
err = p.track("GET", func() error {
r, err = p.ObjectStorage.Get(key, off, limit)
return err
})
return
}
func (p *withMetrics) Put(key string, in io.Reader) error {
return p.track("PUT", func() error {
return p.ObjectStorage.Put(key, in)
})
}
func (p *withMetrics) Delete(key string) error {
return p.track("DELETE", func() error {
return p.ObjectStorage.Delete(key)
})
}
var _ ObjectStorage = &withMetrics{}
/*
* JuiceFS, Copyright (C) 2021 Juicedata, Inc.
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package utils
import (
"bytes"
"io/ioutil"
"strconv"
"syscall"
)
func MemoryUsage() (virt, rss uint64) {
stat, err := ioutil.ReadFile("/proc/self/stat")
if err == nil {
stats := bytes.Split(stat, []byte(" "))
if len(stats) >= 24 {
v, _ := strconv.ParseUint(string(stats[22]), 10, 64)
r, _ := strconv.ParseUint(string(stats[23]), 10, 64)
return uint64(v), uint64(r) * 4096
}
}
var ru syscall.Rusage
err = syscall.Getrusage(syscall.RUSAGE_SELF, &ru)
if err == nil {
return uint64(ru.Maxrss), uint64(ru.Maxrss)
}
return
}
/*
* JuiceFS, Copyright (C) 2021 Juicedata, Inc.
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package utils
import "syscall"
type Rusage struct {
syscall.Rusage
}
func (ru *Rusage) GetUtime() float64 {
return float64(ru.Utime.Sec) + float64(ru.Utime.Usec)/1e6
}
func (ru *Rusage) GetStime() float64 {
return float64(ru.Stime.Sec) + float64(ru.Stime.Usec)/1e6
}
func GetRusage() *Rusage {
var ru syscall.Rusage
_ = syscall.Getrusage(syscall.RUSAGE_SELF, &ru)
return &Rusage{ru}
}
......@@ -19,8 +19,22 @@ import (
"fmt"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
var (
opsDurationsHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "fuse_ops_durations_histogram_seconds",
Help: "Operations latency distributions.",
Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30),
})
)
func init() {
prometheus.MustRegister(opsDurationsHistogram)
}
type logReader struct {
sync.Mutex
buffer chan []byte
......@@ -38,6 +52,7 @@ func init() {
func logit(ctx Context, format string, args ...interface{}) {
used := ctx.Duration()
opsDurationsHistogram.Observe(used.Seconds())
readerLock.Lock()
defer readerLock.Unlock()
if len(readers) == 0 && used < time.Second*10 {
......
......@@ -22,6 +22,18 @@ import (
"github.com/juicedata/juicefs/pkg/meta"
"github.com/juicedata/juicefs/pkg/utils"
"github.com/prometheus/client_golang/prometheus"
)
var (
handlersGause = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "fuse_open_handlers",
Help: "number of open files and directories.",
}, func() float64 {
hanleLock.Lock()
defer hanleLock.Unlock()
return float64(len(handles))
})
)
type handle struct {
......
......@@ -23,6 +23,7 @@ import (
"github.com/juicedata/juicefs/pkg/chunk"
"github.com/juicedata/juicefs/pkg/meta"
"github.com/juicedata/juicefs/pkg/utils"
"github.com/prometheus/client_golang/prometheus"
)
type Ino = meta.Ino
......@@ -51,6 +52,19 @@ var (
writer DataWriter
)
var (
readSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "fuse_read_size_bytes",
Help: "size of read distributions.",
Buckets: prometheus.LinearBuckets(4096, 4096, 32),
})
writtenSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "fuse_written_size_bytes",
Help: "size of write distributions.",
Buckets: prometheus.LinearBuckets(4096, 4096, 32),
})
)
func Lookup(ctx Context, parent Ino, name string) (entry *meta.Entry, err syscall.Errno) {
defer func() {
logit(ctx, "lookup (%d,%s): %s%s", parent, name, strerr(err), (*Entry)(entry))
......@@ -495,6 +509,7 @@ func Read(ctx Context, ino Ino, buf []byte, off uint64, fh uint64) (n int, err s
}
defer func() {
readSizeHistogram.Observe(float64(n))
logit(ctx, "read (%d,%d,%d): %s (%d)", ino, size, off, strerr(err), n)
}()
h := findHandle(ino, fh)
......@@ -561,6 +576,7 @@ func Write(ctx Context, ino Ino, buf []byte, off, fh uint64) (err syscall.Errno)
if err != 0 {
return
}
writtenSizeHistogram.Observe(float64(len(buf)))
reader.Truncate(ino, writer.GetLength(ino))
reader.Invalidate(ino, off, uint64(len(buf)))
return
......@@ -848,4 +864,7 @@ func Init(conf *Config, m_ meta.Meta, store chunk.ChunkStore) {
reader = NewDataReader(conf, m, store)
writer = NewDataWriter(conf, m, store)
handles = make(map[Ino][]*handle)
prometheus.MustRegister(readSizeHistogram)
prometheus.MustRegister(writtenSizeHistogram)
prometheus.MustRegister(handlersGause)
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册