Doc: add missing configurations of Hadoop Java SDK (#433)

3e1b1b1f · Changjian Gao · GitHub · bd1d98ae · 3e1b1b1f · bd1d98ae
7 changed file
--- a/docs/en/hadoop_java_sdk.md
+++ b/docs/en/hadoop_java_sdk.md
@@ -4,7 +4,7 @@ JuiceFS provides [Hadoop-compatible FileSystem](https://hadoop.apache.org/docs/c

 > **NOTICE**:
 >
->  JuiceFS use local mapping of user and UID. So, you should [sync all the needed users and their UIDs](./how_to_sync_the_same_account.md) across the whole Hadoop cluster to avoid permission error.
+> JuiceFS use local mapping of user and UID. So, you should [sync all the needed users and their UIDs](sync_accounts_between_multiple_hosts.md) across the whole Hadoop cluster to avoid permission error.

 ## Hadoop Compatibility

@@ -24,6 +24,8 @@ $ cd sdk/java
 $ make
 ```

+> **Tip**: For users in China, it's recommended to set a local Maven mirror to speed-up compilation, e.g. [Aliyun Maven Mirror](https://maven.aliyun.com).
+
 ## Deploy JuiceFS Hadoop Java SDK

 After compiling you could find the JAR file in `sdk/java/target` directory, e.g. `juicefs-hadoop-0.10.0.jar`. Beware that file with `original-` prefix, it doesn't contain third-party dependencies. It's recommended to use the JAR file with third-party dependencies.
@@ -70,15 +72,31 @@ Then put the JAR file and `$JAVA_HOME/lib/tools.jar` to the classpath of each Ha
 | -------------                | ------------- | -----------                                                                                                                                                                                                                                                                                           |
 | `juicefs.cache-dir`          |               | Directory paths of local cache. Use colon to separate multiple paths. Also support wildcard in path. **It's recommended create these directories manually and set `0777` permission so that different applications could share the cache data.**                                                      |
 | `juicefs.cache-size`         | 0             | Maximum size of local cache in MiB. It's the total size when set multiple cache directories.                                                                                                                                                                                                          |
+| `juicefs.cache-full-block`   | `true`        | Whether cache every read blocks, `false` means only cache random/small read blocks.                                                                                                                                                                                                                   |
+| `juicefs.free-space`         | 0.2           | Min free space ratio of cache directory                                                                                                                                                                                                                                                               |
 | `juicefs.discover-nodes-url` |               | The URL to discover cluster nodes, refresh every 10 minutes.<br /><br />YARN: `yarn`<br />Spark Standalone: `http://spark-master:web-ui-port/json/`<br />Spark ThriftServer: `http://thrift-server:4040/api/v1/applications/`<br />Presto: `http://coordinator:discovery-uri-port/v1/service/presto/` |

+### I/O Configurations
+
+| Configuration         | Default Value | Description                                     |
+| -------------         | ------------- | -----------                                     |
+| `juicefs.max-uploads` | 50            | The max number of connections to upload         |
+| `juicefs.get-timeout` | 5             | The max number of seconds to download an object |
+| `juicefs.put-timeout` | 60            | The max number of seconds to upload an object   |
+| `juicefs.memory-size` | 300           | Total read/write buffering in MiB               |
+| `juicefs.prefetch`    | 3             | Prefetch N blocks in parallel                   |

 ### Others

 | Configuration             | Default Value | Description                                                                                                                                                       |
 | -------------             | ------------- | -----------                                                                                                                                                       |
+| `juicefs.debug`           | `false`       | Whether enable debug log                                                                                                                                          |
 | `juicefs.access-log`      |               | Access log path. Ensure Hadoop application has write permission, e.g. `/tmp/juicefs.access.log`. The log file will rotate  automatically to keep at most 7 files. |
 | `juicefs.superuser`       | `hdfs`        | The super user                                                                                                                                                    |
+| `juicefs.push-gateway`    |               | [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) address, format is `<host>:<port>`.                                                           |
+| `juicefs.push-interval`   | 10            | Prometheus push interval in seconds                                                                                                                               |
+| `juicefs.push-auth`       |               | [Prometheus basic auth](https://prometheus.io/docs/guides/basic-auth) information, format is `<username>:<password>`.                                             |
+| `juicefs.fast-resolve`    | `true`        | Whether enable faster metadata lookup using Redis Lua script                                                                                                      |
 | `juicefs.no-usage-report` | `false`       | Whether disable usage reporting. JuiceFS only collects anonymous usage data (e.g. version number), no user or any sensitive data will be collected.               |

 When you use multiple JuiceFS file systems, all these configurations could be set to specific file system alone. You need put file system name in the middle of configuration, for example (replace `{JFS_NAME}` with appropriate value):
@@ -182,6 +200,27 @@ CREATE TABLE IF NOT EXISTS person
 ) LOCATION 'jfs://{JFS_NAME}/tmp/person';
 ```

+## Metrics
+
+JuiceFS Hadoop Java SDK supports reporting metrics to [Prometheus Pushgateway](https://github.com/prometheus/pushgateway), then you can use [Grafana](https://grafana.com) and [dashboard template](k8s_grafana_template.json) to visualize these metrics.
+
+Enable metrics reporting through following configurations:
+
+```xml
+<property>
+  <name>juicefs.push-gateway</name>
+  <value>host:port</value>
+</property>
+```
+
+> **Note**: Each process using JuiceFS Hadoop Java SDK will have a unique metric, and Pushgateway will always remember all the collected metrics, resulting in the continuous accumulation of metrics and taking up too much memory, which will also slow down Prometheus crawling metrics. It is recommended to clean up metrics which `job` is `juicefs` on Pushgateway regularly. It is recommended to use the following command to clean up once every hour. The running Hadoop Java SDK will continue to update after the metrics are cleared, which basically does not affect the use.
+
+```bash
+$ curl -X DELETE http://host:9091/metrics/job/juicefs
+```
+
+For a description of all monitoring metrics, please see [JuiceFS Metrics](p8s_metrics.md).
+
 ## Benchmark

 JuiceFS provides some benchmark tools for you when JuiceFS has been deployed
@@ -219,12 +258,12 @@ JuiceFS provides some benchmark tools for you when JuiceFS has been deployed

 - for reference

-| operation   | tps  | delay(ms) |
-| ------ | ---- | ---- |
-| create | 546  | 1.83 |
-| open   | 1135 | 0.88 |
-| rename | 364  | 2.75 |
-| delete | 289  | 3.46 |
+| Operation | TPS  | Delay (ms) |
+| --------- | ---  | ---------- |
+| create    | 546  | 1.83       |
+| open      | 1135 | 0.88       |
+| rename    | 364  | 2.75       |
+| delete    | 289  | 3.46       |

 #### IO Performance

@@ -244,10 +283,10 @@ JuiceFS provides some benchmark tools for you when JuiceFS has been deployed

 - for reference

-| operation   | throughput(MB/s)  |
-| ------ | ---- |
-| write | 453  |
-| read   | 141 |
+| Operation | Throughput (MB/s) |
+| --------- | ----------------- |
+| write     | 453               |
+| read      | 141               |

 ### Distribute Benchmark

@@ -255,7 +294,7 @@ Distribute benchmark use MapReduce program to test meta and IO throughput perfor

 Enough resources should be provided to make sure all Map task can be started at the same time

-We use 3 4c32g ecs(5Gbit/s) and AliYun Redis 5.0 4G redis for the benchmark
+We use 3 4c32g ECS (5Gbit/s) and Aliyun Redis 5.0 4G redis for the benchmark

 #### Meta

@@ -296,21 +335,21 @@ We use 3 4c32g ecs(5Gbit/s) and AliYun Redis 5.0 4G redis for the benchmark

    - 10 threads

-  | operation   | tps | delay(ms) |
-  | ------ | ---- | ---- |
-  | create | 2307 | 3.6 |
-  | open   | 3215 | 2.3 |
-  | rename | 1700 | 5.22 |
-  | delete | 1378 | 6.7      |
+  | Operation | TPS  | Delay (ms) |
+  | --------- | ---  | ---------- |
+  | create    | 2307 | 3.6        |
+  | open      | 3215 | 2.3        |
+  | rename    | 1700 | 5.22       |
+  | delete    | 1378 | 6.7        |

    - 100 threads

-  | operation   | tps | delay(ms) |
-  | ------ | ---- | ---- |
-  | create | 8375 | 11.5 |
-  | open   | 12691 | 7.5 |
-  | rename | 5343 | 18.4 |
-  | delete | 3576 | 27.6 |
+  | Operation | TPS   | Delay (ms) |
+  | --------- | ---   | ---------- |
+  | create    | 8375  | 11.5       |
+  | open      | 12691 | 7.5        |
+  | rename    | 5343  | 18.4       |
+  | delete    | 3576  | 27.6       |

 #### IO Performance

@@ -333,10 +372,10 @@ We use 3 4c32g ecs(5Gbit/s) and AliYun Redis 5.0 4G redis for the benchmark

 - for reference

-| operation   | total throughput(MB/s)  |
-| ------ | ---- |
-| write | 1792  |
-| read   | 1409 |
+| Operation | Total Throughput (MB/s) |
+| --------- | ----------------------- |
+| write     | 1792                    |
+| read      | 1409                    |


 ## FAQ

--- a/docs/en/how_to_sync_the_same_account.md
+++ b/docs/en/how_to_sync_the_same_account.md
-# Sync Account on Multiple Hosts
-
-JuiceFS supports POSIX compatible ACL to manage permissions in the granularity of directory or file. The behavior is the same as a local file system.
-
-In order to make the permission experience intuitive to user (e.g. the files accessible by user A in host X should be accessible in host Y with the same user), the same user who want to access JuiceFS should have the same UID and GID on all hosts.
-
-Here we provide a simple [Ansible](https://www.ansible.com/community) playbook to demonstrate how to ensure an account with same UID and GID on multiple hosts.
-
-
-
-## Install ansible
-
-Select a host as a [control node](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#managed-node-requirements) which can access all hosts using `ssh` with the same privileged account like `root` or other sudo account. Install ansible on this host. Read [Installing Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#installing-ansible) for more installation details.
-
-
-
-## Ensure the same account on all hosts
-
-Create an empty directory `account-sync` , save below content in `play.yaml` under this directory.
-
-```yaml
---
- hosts: all
-  tasks:
-    - name: "Ensure group {{ group }} with gid {{ gid }} exists"
-      group:
-        name: "{{ group }}"
-        gid: "{{ gid }}"
-        state: present
-
-    - name: "Ensure user {{ user }} with uid {{ uid }} exists"
-      user:
-        name: "{{ user }}"
-        uid: "{{ uid }}"
-        group: "{{ gid }}"
-        state: present
-```
-
-
-
-Create a file named `hosts` in this directory, place IP addresses of all hosts need to create account in this file, each line with a host's IP.
-
-Here we ensure an account `alice` with UID 1200 and  group `staff` with GID 500 on 2 hosts:
-
-```
-~/account-sync$ cat hosts
-172.16.255.163
-172.16.255.180
-~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=staff gid=500 user=alice uid=1200" play.yaml
-
-PLAY [all] ************************************************************************************************
-
-TASK [Gathering Facts] ************************************************************************************
-ok: [172.16.255.180]
-ok: [172.16.255.163]
-
-TASK [Ensure group staff with gid 500 exists] *************************************************************
-ok: [172.16.255.163]
-ok: [172.16.255.180]
-
-TASK [Ensure user alice with uid 1200 exists] *************************************************************
-changed: [172.16.255.180]
-changed: [172.16.255.163]
-
-PLAY RECAP ************************************************************************************************
-172.16.255.163             : ok=3    changed=1    unreachable=0    failed=0
-172.16.255.180             : ok=3    changed=1    unreachable=0    failed=0
-```
-
-Now the new account `alice:staff` has been created on these 2 hosts.
-
-If the UID or GID specified has been allocated to another user or group on some hosts, the creation would failed.
-
-```
-~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=ubuntu gid=1000 user=ubuntu uid=1000" play.yaml
-
-PLAY [all] ************************************************************************************************
-
-TASK [Gathering Facts] ************************************************************************************
-ok: [172.16.255.180]
-ok: [172.16.255.163]
-
-TASK [Ensure group ubuntu with gid 1000 exists] ***********************************************************
-ok: [172.16.255.163]
-fatal: [172.16.255.180]: FAILED! => {"changed": false, "msg": "groupmod: GID '1000' already exists\n", "name": "ubuntu"}
-
-TASK [Ensure user ubuntu with uid 1000 exists] ************************************************************
-ok: [172.16.255.163]
-	to retry, use: --limit @/home/ubuntu/account-sync/play.retry
-
-PLAY RECAP ************************************************************************************************
-172.16.255.163             : ok=3    changed=0    unreachable=0    failed=0
-172.16.255.180             : ok=1    changed=0    unreachable=0    failed=1
-```
-
-In above example,  the group ID 1000 has been allocated to another group on host `172.16.255.180` , we should **change the GID**  or **delete the group with GID 1000** on host `172.16.255.180` , then run the playbook again.
-
-
-
-> **CAUTION**
->
-> If the user account has already existed on the host and we change it to another UID or GID value, the user may loss permissions to the files and directories which they previously have. For example:
->
-> ```
-> $ ls -l /tmp/hello.txt
-> -rw-r--r-- 1 alice staff 6 Apr 26 21:43 /tmp/hello.txt
-> $ id alice
-> uid=1200(alice) gid=500(staff) groups=500(staff)
-> ```
->
-> We change the UID of alice from 1200 to 1201
->
-> ```
-> ~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
-> --extra-vars "group=staff gid=500 user=alice uid=1201" play.yaml
-> ```
->
-> Now we have no permission to remove this file as its owner is not alice:
->
-> ```
-> $ ls -l /tmp/hello.txt
-> -rw-r--r-- 1 1200 staff 6 Apr 26 21:43 /tmp/hello.txt
-> $ rm /tmp/hello.txt
-> rm: remove write-protected regular file '/tmp/hello.txt'? y
-> rm: cannot remove '/tmp/hello.txt': Operation not permitted
-> ```
--- a/docs/en/sync_accounts_between_multiple_hosts.md
+++ b/docs/en/sync_accounts_between_multiple_hosts.md
-# Sync Account on Multiple Hosts
+# Sync Accounts between Multiple Hosts

 JuiceFS supports POSIX compatible ACL to manage permissions in the granularity of directory or file. The behavior is the same as a local file system.

@@ -125,4 +125,4 @@ In above example,  the group ID 1000 has been allocated to another group on host
 > $ rm /tmp/hello.txt
 > rm: remove write-protected regular file '/tmp/hello.txt'? y
 > rm: cannot remove '/tmp/hello.txt': Operation not permitted
-> ```
\ No newline at end of file
+> ```
--- a/docs/zh_cn/hadoop_java_sdk.md
+++ b/docs/zh_cn/hadoop_java_sdk.md
@@ -4,7 +4,7 @@ JuiceFS 提供兼容 HDFS 接口的 Java 客户端来支持 Hadoop 生态中的

 > **注意**：
 >
-> 由于 JuiceFS 使用本地的 user 和 UID 映射。因此，在分布式环境下使用，需要[同步所有需要使用的 user 和 UID](../en/how_to_sync_the_same_account.md) 到所有的 Hadoop 节点上，以避免权限问题。
+> 由于 JuiceFS 使用本地的 user 和 UID 映射。因此，在分布式环境下使用，需要[同步所有需要使用的 user 和 UID](sync_accounts_between_multiple_hosts.md) 到所有的 Hadoop 节点上，以避免权限问题。

 ## Hadoop 兼容性

@@ -24,7 +24,7 @@ $ cd sdk/java
 $ make
 ```

-对于中国用户，建议设置更快的 Maven 镜像仓库以加速编译，比如[阿里云 Maven 仓库](https://maven.aliyun.com)。
+> **提示**：对于中国用户，建议设置更快的 Maven 镜像仓库以加速编译，比如[阿里云 Maven 仓库](https://maven.aliyun.com)。

 ## 部署 JuiceFS Hadoop Java SDK

@@ -68,19 +68,35 @@ $ make

 ### 缓存配置

-| 配置项                       | 默认值 | 描述                                                                                                                                                                                                                                                                                |
-| --------------------------   | ------ | ------------------------------------------------------------                                                                                                                                                                                                                        |
-| `juicefs.cache-dir`          |        | 本地缓存目录，可以指定多个文件夹，用冒号 `:` 分隔，也可以使用通配符（比如 `*` ）。**通常应用没有权限创建这些目录，需要手动创建并给予 `0777` 权限，便于多个应用共享缓存数据。**                                                                                                      |
-| `juicefs.cache-size`         | 0      | 磁盘缓存容量，单位 MiB。如果配置多个目录，这是所有缓存目录的空间总和。                                                                                                                                                                                                              |
-| `juicefs.discover-nodes-url` |        | 指定发现集群节点列表的方式，每 10 分钟刷新一次。<br />YARN：`yarn`<br />Spark Standalone：`http://spark-master:web-ui-port/json/`<br />Spark ThriftServer：`http://thrift-server:4040/api/v1/applications/`<br />Presto：`http://coordinator:discovery-uri-port/v1/service/presto/` |
+| 配置项                       | 默认值 | 描述                                                                                                                                                                                                                                                                                      |
+| --------------------------   | ------ | ------------------------------------------------------------                                                                                                                                                                                                                              |
+| `juicefs.cache-dir`          |        | 本地缓存目录，可以指定多个文件夹，用冒号 `:` 分隔，也可以使用通配符（比如 `*` ）。**通常应用没有权限创建这些目录，需要手动创建并给予 `0777` 权限，便于多个应用共享缓存数据。**                                                                                                            |
+| `juicefs.cache-size`         | 0      | 磁盘缓存容量，单位 MiB。如果配置多个目录，这是所有缓存目录的空间总和。                                                                                                                                                                                                                    |
+| `juicefs.cache-full-block`   | `true` | 是否缓存所有读取的数据块，`false` 表示只缓存随机读的数据块。                                                                                                                                                                                                                              |
+| `juicefs.free-space`         | 0.2    | 本地缓存目录的最小可用空间比例                                                                                                                                                                                                                                                            |
+| `juicefs.discover-nodes-url` |        | 指定发现集群节点列表的方式，每 10 分钟刷新一次。<br /><br />YARN：`yarn`<br />Spark Standalone：`http://spark-master:web-ui-port/json/`<br />Spark ThriftServer：`http://thrift-server:4040/api/v1/applications/`<br />Presto：`http://coordinator:discovery-uri-port/v1/service/presto/` |
+
+### I/O 配置
+
+| 配置项                | 默认值 | 描述                                                         |
+| ------------------    | ------ | ------------------------------------------------------------ |
+| `juicefs.max-uploads` | 50     | 上传数据的最大连接数                                         |
+| `juicefs.get-timeout` | 5      | 下载一个对象的超时时间，单位为秒。                           |
+| `juicefs.put-timeout` | 60     | 上传一个对象的超时时间，单位为秒。                           |
+| `juicefs.memory-size` | 300    | 读写数据的缓冲区最大空间，单位为 MiB。                       |
+| `juicefs.prefetch`    | 3      | 预读数据块的最大并发数                                       |

 ### 其他配置

 | 配置项                    | 默认值  | 描述                                                                                                                |
 | ------------------        | ------  | ------------------------------------------------------------                                                        |
+| `juicefs.debug`           | `false` | 是否开启 debug 日志                                                                                                 |
 | `juicefs.access-log`      |         | 访问日志的路径。需要所有应用都有写权限，可以配置为 `/tmp/juicefs.access.log`。该文件会自动轮转，保留最近 7 个文件。 |
 | `juicefs.superuser`       | `hdfs`  | 超级用户                                                                                                            |
-| `juicefs.push-gateway`    |         | Promethous Push Gateway 的地址，`host:port` 形式。                                                |
+| `juicefs.push-gateway`    |         | [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) 地址，格式为 `<host>:<port>`。                  |
+| `juicefs.push-interval`   | 10      | 推送数据到 Prometheus 的时间间隔，单位为秒。                                                                        |
+| `juicefs.push-auth`       |         | [Prometheus 基本认证](https://prometheus.io/docs/guides/basic-auth)信息，格式为 `<username>:<password>`。           |
+| `juicefs.fast-resolve`    | `true`  | 是否开启快速元数据查找（通过 Redis Lua 脚本实现）                                                                   |
 | `juicefs.no-usage-report` | `false` | 是否上报数据，它只上报诸如版本号等使用量数据，不包含任何用户信息。                                                  |

 当使用多个 JuiceFS 文件系统时，上述所有配置项均可对单个文件系统指定，需要将文件系统名字 `{JFS_NAME}` 放在配置项的中间，比如：
@@ -185,9 +201,9 @@ CREATE TABLE IF NOT EXISTS person

 ## 指标收集

-JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式上报到 [Push Gateway](https://github.com/prometheus/pushgateway), 然后可以通过 [Grafana](https://grafana.com/) 以及我们[预定义的模板](../en/k8s_grafana_template.json)来展示收集的运行指标。
+JuiceFS Hadoop Java SDK 支持把运行指标以 [Prometheus](https://prometheus.io) 格式上报到 [Pushgateway](https://github.com/prometheus/pushgateway)，然后可以通过 [Grafana](https://grafana.com) 以及我们[预定义的模板](../en/k8s_grafana_template.json)来展示收集的运行指标。

-请用如下参数启用 指标收集：
+请用如下参数启用指标收集：

 ```xml
 <property>
@@ -196,12 +212,14 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式
 </property>
 ```

-**注意: 每一个使用 JuiceFS Java SDK 的进程会有唯一的指标，而 Push Gateway 会一直记住所有收集到的指标，导致指标数持续积累占用过多内存，也会使得 Promethous 抓取指标时变慢，建议定期清理 Push Gateway 上 `job` 为 `juicefs` 的指标。建议每个小时使用下面的命令清理一次，运行中的 Java SDK 会指标清空后继续更新，基本不影响使用。
+> **注意**：每一个使用 JuiceFS Hadoop Java SDK 的进程会有唯一的指标，而 Pushgateway 会一直记住所有收集到的指标，导致指标数持续积累占用过多内存，也会使得 Prometheus 抓取指标时变慢，建议定期清理 Pushgateway 上 `job` 为 `juicefs` 的指标。建议每个小时使用下面的命令清理一次，运行中的 Hadoop Java SDK 会在指标清空后继续更新，基本不影响使用。

 ```bash
-  curl -X DELETE http://host:9091/metrics/job/juicefs
+$ curl -X DELETE http://host:9091/metrics/job/juicefs
 ```

+关于所有监控指标的描述，请查看 [JuiceFS 监控指标](p8s_metrics.md)。
+
 ## Benchmark

 当部署完成后，可以运行 JuiceFS 自带的压测工具进行性能测试。
@@ -213,7 +231,7 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式
 - create

  ```shell
-  hadoop jar juicefs-hadoop.jar io.juicefs.bench.NNBenchWithoutMR -operation create -numberOfFiles 10000 -baseDir jfs://{JFS_NAME}/benchmarks/nnbench_local  
+  hadoop jar juicefs-hadoop.jar io.juicefs.bench.NNBenchWithoutMR -operation create -numberOfFiles 10000 -baseDir jfs://{JFS_NAME}/benchmarks/nnbench_local
  ```

  此命令会 create 10000 个空文件
@@ -223,15 +241,15 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式
  ```shell
  hadoop jar juicefs-hadoop.jar io.juicefs.bench.NNBenchWithoutMR -operation open -numberOfFiles 10000 -baseDir jfs://{JFS_NAME}/benchmarks/nnbench_local
  ```
-  
+
  此命令会 open 10000 个文件，并不读取数据

 - rename
-  
+
  ```shell
  hadoop jar juicefs-hadoop.jar io.juicefs.bench.NNBenchWithoutMR -operation rename -numberOfFiles 10000 -bytesPerBlock 134217728 -baseDir jfs://{JFS_NAME}/benchmarks/nnbench_local
  ```
-  
+
 - delete

  ```shell
@@ -240,12 +258,12 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式

 - 参考值

-| 操作   | tps  | 时延(ms) |
-| ------ | ---- | ---- |
-| create | 546  | 1.83 |
-| open   | 1135 | 0.88 |
-| rename | 364  | 2.75 |
-| delete | 289  | 3.46 |
+| 操作   | TPS  | 时延（ms） |
+| ------ | ---- | ----       |
+| create | 546  | 1.83       |
+| open   | 1135 | 0.88       |
+| rename | 364  | 2.75       |
+| delete | 289  | 3.46       |

 #### IO 性能

@@ -260,17 +278,17 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式
  ```shell
  hadoop jar juicefs-hadoop.jar io.juicefs.bench.TestFSIO -read -fileSize 20000 -baseDir jfs://{JFS_NAME}/benchmarks/fsio
  ```
-  
+
  如果多次运行此命令，可能会出现数据被缓存到了系统缓存而导致读取速度非常快，只需清除 JuiceFS 的本地磁盘缓存即可

 - 参考值

-| 操作   | 吞吐(MB/s)  |
-| ------ | ---- |
-| write | 453  |
-| read   | 141 |
+| 操作   | 吞吐（MB/s） |
+| ------ | ----         |
+| write  | 453          |
+| read   | 141          |

-如果机器的网络带宽比较低，则一般能达到网络带宽瓶颈  
+如果机器的网络带宽比较低，则一般能达到网络带宽瓶颈

 ### 分布式测试

@@ -278,7 +296,7 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式

 以下测试需要保证集群有足够的资源能够同时启动所需的 map 数量

-此测试使用了 3 台 4c32g 内存的计算节点，突发带宽 5Gbit/s，阿里云 Redis 5.0 社区 4G 主从版 
+此测试使用了 3 台 4c32g 内存的计算节点，突发带宽 5Gbit/s，阿里云 Redis 5.0 社区 4G 主从版

 #### 元数据

@@ -313,26 +331,26 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式
  ```

  此命令会启动 10 个 map task，每个 task 有 10 个线程，每个线程会 delete 1000 个文件，总共 delete 100000 个文件
-  
+
 - 参考值

  - 10 并发
-  
-  | 操作   | IOPS | 时延(ms) |
-  | ------ | ---- | ---- |
-  | create | 2307 | 3.6 |
-  | open   | 3215 | 2.3 |
-  | rename | 1700 | 5.22 |
-  | delete | 1378 | 6.7      |
+
+  | 操作   | IOPS | 时延（ms） |
+  | ------ | ---- | ----       |
+  | create | 2307 | 3.6        |
+  | open   | 3215 | 2.3        |
+  | rename | 1700 | 5.22       |
+  | delete | 1378 | 6.7        |

  - 100 并发

-  | 操作   | IOPS | 时延(ms) |
-  | ------ | ---- | ---- |
-  | create | 8375 | 11.5 |
-  | open   | 12691 | 7.5 |
-  | rename | 5343 | 18.4 |
-  | delete | 3576 | 27.6 |
+  | 操作   | IOPS  | 时延（ms） |
+  | ------ | ----  | ----       |
+  | create | 8375  | 11.5       |
+  | open   | 12691 | 7.5        |
+  | rename | 5343  | 18.4       |
+  | delete | 3576  | 27.6       |

 #### IO 性能

@@ -355,10 +373,10 @@ JuiceFS SDK 支持把运行指标以 [Promethous](https://prometheus.io/) 格式

 - 参考值

-| 操作   | 平均吞吐(MB/s)  | 总吞吐(MB/s)  |
-| ------ | ---- | ---- |
-| write | 180  | 1792  |
-| read   | 141 | 1409 |
+| 操作   | 平均吞吐（MB/s） | 总吞吐（MB/s） |
+| ------ | ----             | ----           |
+| write  | 180              | 1792           |
+| read   | 141              | 1409           |


 ## FAQ

--- a/docs/zh_cn/sync_accounts_between_multiple_hosts.md
+++ b/docs/zh_cn/sync_accounts_between_multiple_hosts.md
 # JuiceFS 多主机间同步账户

-# Sync Account on Multiple Hosts
-
 JuiceFS supports POSIX compatible ACL to manage permissions in the granularity of directory or file. The behavior is the same as a local file system.

 In order to make the permission experience intuitive to user (e.g. the files accessible by user A in host X should be accessible in host Y with the same user), the same user who want to access JuiceFS should have the same UID and GID on all hosts.
@@ -127,4 +125,4 @@ In above example,  the group ID 1000 has been allocated to another group on host
 > $ rm /tmp/hello.txt
 > rm: remove write-protected regular file '/tmp/hello.txt'? y
 > rm: cannot remove '/tmp/hello.txt': Operation not permitted
-> ```
\ No newline at end of file
+> ```
--- a/sdk/java/libjfs/main.go
+++ b/sdk/java/libjfs/main.go
@@ -221,6 +221,7 @@ type javaConf struct {
 	Writeback      bool   `json:"writeback"`
 	OpenCache      bool   `json:"opencache"`
 	MemorySize     int    `json:"memorySize"`
+	Prefetch       int    `json:"prefetch"`
 	Readahead      int    `json:"readahead"`
 	UploadLimit    int    `json:"uploadLimit"`
 	MaxUploads     int    `json:"maxUploads"`
@@ -369,7 +370,7 @@ func jfs_init(cname, jsonConf, user, group, superuser, supergroup *C.char) uintp
 			AutoCreate:     jConf.AutoCreate,
 			CacheFullBlock: jConf.CacheFullBlock,
 			MaxUpload:      jConf.MaxUploads,
-			Prefetch:       3,
+			Prefetch:       jConf.Prefetch,
 			Writeback:      jConf.Writeback,
 			Partitions:     format.Partitions,
 			UploadLimit:    jConf.UploadLimit,

--- a/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java
+++ b/sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java
@@ -329,6 +329,7 @@ public class JuiceFileSystemImpl extends FileSystem {
    obj.put("getTimeout", Integer.valueOf(getConf(conf, "get-timeout", getConf(conf, "object-timeout", "5"))));
    obj.put("putTimeout", Integer.valueOf(getConf(conf, "put-timeout", getConf(conf, "object-timeout", "60"))));
    obj.put("memorySize", Integer.valueOf(getConf(conf, "memory-size", "300")));
+    obj.put("prefetch", Integer.valueOf(getConf(conf, "prefetch", "3")));
    obj.put("readahead", Integer.valueOf(getConf(conf, "max-readahead", "0")));
    obj.put("pushGateway", getConf(conf, "push-gateway", ""));
    obj.put("pushInterval", Integer.valueOf(getConf(conf, "push-interval", "10")));