未验证 提交 02d8b408 编写于 作者: D Davies Liu 提交者: GitHub

push metrics to prometheous gateway from Java SDK (#327)

* push metrics to prometheous gateway from Java SDK

* add logging for failed push

* fix metrics

* remove print

* add license header

* add docs
上级 6f3115b1
......@@ -34,6 +34,7 @@ import (
"github.com/juicedata/juicefs/pkg/chunk"
"github.com/juicedata/juicefs/pkg/meta"
"github.com/juicedata/juicefs/pkg/metric"
"github.com/juicedata/juicefs/pkg/object"
"github.com/juicedata/juicefs/pkg/usage"
"github.com/juicedata/juicefs/pkg/utils"
......@@ -41,54 +42,6 @@ import (
"github.com/juicedata/juicefs/pkg/vfs"
)
var (
cpu = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "cpu_usage",
Help: "Accumulated CPU usage in seconds.",
})
memory = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "memory",
Help: "Used memory in bytes.",
})
uptime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "uptime",
Help: "Total running time in seconds.",
})
usedSpace = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "used_space",
Help: "Total used space in bytes.",
})
usedInodes = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "used_inodes",
Help: "Total number of inodes.",
})
)
func updateMetrics(m meta.Meta) {
prometheus.MustRegister(cpu)
prometheus.MustRegister(memory)
prometheus.MustRegister(uptime)
prometheus.MustRegister(usedSpace)
prometheus.MustRegister(usedInodes)
ctx := meta.Background
start := time.Now()
for {
uptime.Set(time.Since(start).Seconds())
ru := utils.GetRusage()
cpu.Set(ru.GetStime() + ru.GetUtime())
_, rss := utils.MemoryUsage()
memory.Set(float64(rss))
var totalSpace, availSpace, iused, iavail uint64
err := m.StatFS(ctx, &totalSpace, &availSpace, &iused, &iavail)
if err == 0 {
usedSpace.Set(float64(totalSpace - availSpace))
usedInodes.Set(float64(iused))
}
time.Sleep(time.Second * 5)
}
}
func installHandler(mp string) {
// Go will catch all the signals
signal.Ignore(syscall.SIGPIPE)
......@@ -150,12 +103,12 @@ func mount(c *cli.Context) error {
logger.Fatalf("load setting: %s", err)
}
mntLabels := prometheus.Labels{
metricLabels := prometheus.Labels{
"vol_name": format.Name,
"mp": mp,
}
// Wrap the default registry, all prometheus.MustRegister() calls should be afterwards
prometheus.DefaultRegisterer = prometheus.WrapRegistererWith(mntLabels,
prometheus.DefaultRegisterer = prometheus.WrapRegistererWith(metricLabels,
prometheus.WrapRegistererWithPrefix("juicefs_", prometheus.DefaultRegisterer))
chunkConf := chunk.Config{
......@@ -244,7 +197,7 @@ func mount(c *cli.Context) error {
meta.InitMetrics()
vfs.InitMetrics()
go updateMetrics(m)
go metric.UpdateMetrics(m)
http.Handle("/metrics", promhttp.HandlerFor(
prometheus.DefaultGatherer,
promhttp.HandlerOpts{
......
......@@ -76,6 +76,7 @@ $ make
| ------------------ | ------ | ------------------------------------------------------------ |
| `juicefs.access-log` | | 访问日志的路径。需要所有应用都有写权限,可以配置为 `/tmp/juicefs.access.log`。该文件会自动轮转,保留最近 7 个文件。 |
| `juicefs.superuser` | `hdfs` | 超级用户 |
| `juicefs.push-gateway` | | Promethous Push Gateway 的地址,`host:port` 形式。 |
| `juicefs.no-usage-report` | `false` | 是否上报数据,它只上报诸如版本号等使用量数据,不包含任何用户信息。 |
当使用多个 JuiceFS 文件系统时,上述所有配置项均可对单个文件系统指定,需要将文件系统名字 `{JFS_NAME}` 放在配置项的中间,比如:
......@@ -178,6 +179,25 @@ CREATE TABLE IF NOT EXISTS person
) LOCATION 'jfs://{JFS_NAME}/tmp/person';
```
## 指标收集
JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式上报到 [Push Gateway](https://github.com/prometheus/pushgateway), 然后可以通过 [Grafana](https://grafana.com/) 以及我们[预定义的模板](../en/k8s_grafana_template.json)来展示收集的运行指标。
请用如下参数启用 指标收集:
```xml
<property>
<name>juicefs.push-gateway</name>
<value>host:port</value>
</property>
```
**注意: 每一个使用 JuiceFS Java SDK 的进程会有唯一的指标,而 Push Gateway 会一直记住所有收集到的指标,导致指标数持续积累占用过多内存,也会使得 Promethous 抓取指标时变慢,建议定期清理 Push Gateway 上 `job``juicefs` 的指标。建议每个小时使用下面的命令清理一次,运行中的 Java SDK 会指标清空后继续更新,基本不影响使用。
```bash
curl -X DELETE http://host:9091/metrics/job/juicefs
```
## Benchmark
当部署完成后,可以运行 JuiceFS 自带的压测工具进行性能测试。
......
......@@ -158,13 +158,14 @@ func NewFileSystem(conf *vfs.Config, m meta.Meta, d chunk.ChunkStore) (*FileSyst
}
func (fs *FileSystem) log(ctx LogContext, format string, args ...interface{}) {
used := ctx.Duration()
opsDurationsHistogram.Observe(used.Seconds())
if fs.logBuffer == nil {
return
}
now := utils.Now()
cmd := fmt.Sprintf(format, args...)
ts := now.Format("2006.01.02 15:04:05.000000")
used := ctx.Duration()
cmd += fmt.Sprintf(" <%.6f>", used.Seconds())
line := fmt.Sprintf("%s [uid:%d,gid:%d,pid:%d] %s\n", ts, ctx.Uid(), ctx.Gid(), ctx.Pid(), cmd)
select {
......@@ -771,6 +772,7 @@ func (f *File) pread(ctx meta.Context, b []byte, offset int64) (n int, err error
if got == 0 {
return 0, io.EOF
}
readSizeHistogram.Observe(float64(got))
return got, nil
}
......@@ -805,6 +807,7 @@ func (f *File) pwrite(ctx meta.Context, b []byte, offset int64) (n int, err sysc
f.wdata = nil
return
}
writtenSizeHistogram.Observe(float64(len(b)))
return len(b), 0
}
......
/*
* JuiceFS, Copyright (C) 2021 Juicedata, Inc.
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package fs
import "github.com/prometheus/client_golang/prometheus"
var (
readSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "sdk_read_size_bytes",
Help: "size of read distributions.",
Buckets: prometheus.LinearBuckets(4096, 4096, 32),
})
writtenSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "sdk_written_size_bytes",
Help: "size of write distributions.",
Buckets: prometheus.LinearBuckets(4096, 4096, 32),
})
opsDurationsHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "sdk_ops_durations_histogram_seconds",
Help: "Operations latency distributions.",
Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30),
})
)
/*
* JuiceFS, Copyright (C) 2020 Juicedata, Inc.
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package metric
import (
"time"
"github.com/juicedata/juicefs/pkg/meta"
"github.com/juicedata/juicefs/pkg/utils"
"github.com/prometheus/client_golang/prometheus"
)
var (
start = time.Now()
cpu = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "cpu_usage",
Help: "Accumulated CPU usage in seconds.",
}, func() float64 {
ru := utils.GetRusage()
return ru.GetStime() + ru.GetUtime()
})
memory = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "memory",
Help: "Used memory in bytes.",
}, func() float64 {
_, rss := utils.MemoryUsage()
return float64(rss)
})
uptime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "uptime",
Help: "Total running time in seconds.",
}, func() float64 {
return time.Since(start).Seconds()
})
usedSpace = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "used_space",
Help: "Total used space in bytes.",
})
usedInodes = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "used_inodes",
Help: "Total number of inodes.",
})
)
func UpdateMetrics(m meta.Meta) {
prometheus.MustRegister(cpu)
prometheus.MustRegister(memory)
prometheus.MustRegister(uptime)
prometheus.MustRegister(usedSpace)
prometheus.MustRegister(usedInodes)
ctx := meta.Background
for {
var totalSpace, availSpace, iused, iavail uint64
err := m.StatFS(ctx, &totalSpace, &availSpace, &iused, &iavail)
if err == 0 {
usedSpace.Set(float64(totalSpace - availSpace))
usedInodes.Set(float64(iused))
}
time.Sleep(time.Second * 10)
}
}
......@@ -43,11 +43,14 @@ import (
"github.com/juicedata/juicefs/pkg/chunk"
"github.com/juicedata/juicefs/pkg/fs"
"github.com/juicedata/juicefs/pkg/meta"
"github.com/juicedata/juicefs/pkg/metric"
"github.com/juicedata/juicefs/pkg/object"
"github.com/juicedata/juicefs/pkg/usage"
"github.com/juicedata/juicefs/pkg/utils"
"github.com/juicedata/juicefs/pkg/version"
"github.com/juicedata/juicefs/pkg/vfs"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/push"
"github.com/sirupsen/logrus"
)
......@@ -61,6 +64,7 @@ var (
handlers = make(map[uintptr]*wrapper)
activefs = make(map[string][]*wrapper)
logger = utils.GetLogger("juicefs")
pusher *push.Pusher
)
func errno(err error) int {
......@@ -175,6 +179,9 @@ type javaConf struct {
Debug bool `json:"debug"`
NoUsageReport bool `json:"noUsageReport"`
AccessLog string `json:"accessLog"`
PushGateway string `json:"pushGateway"`
PushInterval int `json:"pushInterval"`
PushAuth string `json:"pushAuth"`
}
func getOrCreate(name, user, group, superuser, supergroup string, f func() *fs.FileSystem) uintptr {
......@@ -260,11 +267,44 @@ func jfs_init(cname, jsonConf, user, group, superuser, supergroup *C.char) uintp
if err != nil {
logger.Fatalf("load setting: %s", err)
}
if jConf.PushGateway != "" && pusher == nil {
prometheus.DefaultRegisterer = prometheus.WrapRegistererWithPrefix("juicefs_", prometheus.DefaultRegisterer)
// TODO: support multiple volumes
pusher = push.New(jConf.PushGateway, "juicefs").Gatherer(prometheus.DefaultGatherer)
pusher = pusher.Grouping("vol_name", format.Name).Grouping("mp", "sdk-"+strconv.Itoa(os.Getpid()))
if h, err := os.Hostname(); err == nil {
pusher = pusher.Grouping("instance", h)
} else {
logger.Warnf("cannot get hostname: %s", err)
}
if strings.Contains(jConf.PushAuth, ":") {
parts := strings.Split(jConf.PushAuth, ":")
pusher = pusher.BasicAuth(parts[0], parts[1])
}
interval := time.Second * 10
if jConf.PushInterval > 0 {
interval = time.Second * time.Duration(jConf.PushInterval)
}
go func() {
for {
time.Sleep(interval)
if err := pusher.Push(); err != nil {
logger.Warnf("push metrics to %s: %s", jConf.PushGateway, err)
}
}
}()
meta.InitMetrics()
vfs.InitMetrics()
go metric.UpdateMetrics(m)
}
blob, err := createStorage(format)
if err != nil {
logger.Fatalf("object storage: %s", err)
}
logger.Infof("Data use %s", blob)
blob = object.WithMetrics(blob)
var freeSpaceRatio = 0.2
if jConf.FreeSpace != "" {
......@@ -379,6 +419,11 @@ func jfs_term(pid int, h uintptr) int {
}
}
}
if pusher != nil {
if err := pusher.Push(); err != nil {
logger.Warnf("push metrics: %s", err)
}
}
return 0
}
......
......@@ -327,6 +327,9 @@ public class JuiceFileSystemImpl extends FileSystem {
obj.put("putTimeout", Integer.valueOf(getConf(conf, "put-timeout", getConf(conf, "object-timeout", "60"))));
obj.put("memorySize", Integer.valueOf(getConf(conf, "memory-size", "300")));
obj.put("readahead", Integer.valueOf(getConf(conf, "max-readahead", "0")));
obj.put("pushGateway", getConf(conf, "push-gateway", ""));
obj.put("pushInterval", Integer.valueOf(getConf(conf, "push-interval", "10")));
obj.put("pushAuth", getConf(conf, "push-auth", ""));
obj.put("noUsageReport", Boolean.valueOf(getConf(conf, "no-usage-report", "false")));
obj.put("freeSpace", getConf(conf, "free-space", ""));
obj.put("accessLog", getConf(conf, "access-log", ""));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册