diff --git a/doc/HOT_LOADING_IN_SERVING.md b/doc/HOT_LOADING_IN_SERVING.md index 093b703786c228558739a87be948e46ee4575045..c4aae6ba884cc35654fbc4d472c8eaa4921a1e4f 100644 --- a/doc/HOT_LOADING_IN_SERVING.md +++ b/doc/HOT_LOADING_IN_SERVING.md @@ -1,32 +1,37 @@ # Hot Loading in Paddle Serving +([简体中文](HOT_LOADING_IN_SERVING_CN.md)|English) + ## Background In the industrial scenario, it is usually the remote periodic output model, and the online server needs to pull down the new model to update the old model without service interruption. +## Server Monitor + Paddle Serving provides an automatic monitoring script. After the remote address updates the model, the new model will be pulled to update the local model. At the same time, the `fluid_time_stamp` in the local model folder will be updated to realize model hot loading. Currently, the following types of Monitors are supported: | Monitor Type | Description | Specific options | | :----------: | :----------------------------------------------------------: | :----------------------------------------------------------: | -| General | Without authentication, you can directly access the download file by `wget` (such as FTP and BOS which do not need authentication) | `general_host` General remote host. | +| general | Without authentication, you can directly access the download file by `wget` (such as FTP and BOS which do not need authentication) | `general_host` General remote host. | | HDFS | The remote is HDFS, and relevant commands are executed through HDFS binary | `hdfs_bin` Path of HDFS binary file. | -| FTP | The remote is FTP, and relevant commands are executed through `ftplib`(Using this monitor, you need to install `ftplib` with command `pip install ftplib`) | `ftp_host` FTP remote host.
`ftp_port` FTP remote port.
`ftp_username` FTP username. Not used if anonymous access.
`ftp_password` FTP password. Not used if anonymous access. | -| AFS | The remote is AFS, and relevant commands are executed through Hadoop-client | `hadoop_bin` Path of Hadoop binary file.
`hadoop_host` AFS host. Not used if set in Hadoop-client.
`hadoop_ugi` AFS ugi, Not used if set in Hadoop-client. | - -| Monitor Shared options | Description | Default | -| :--------------------: | :----------------------------------------------------------: | :--------------------: | -| `type` | Specify the type of monitor | / | -| `remote_path` | Specify the base path for the remote | / | -| `remote_model_name` | Specify the model name to be pulled from the remote | / | -| `remote_donefile_name` | Specify the donefile name that marks the completion of the remote model update | / | -| `local_path` | Specify local work path | / | -| `local_model_name` | Specify local model name | / | -| `local_timestamp_file` | Specify the timestamp file used locally for hot loading, The file is considered to be placed in the `local_path/local_model_name` folder. | `fluid_time_file` | -| `local_tmp_path` | Specify the path of the folder where temporary files are stored locally. If it does not exist, it will be created automatically. | `_serving_monitor_tmp` | -| `interval` | Specify the polling interval in seconds. | `10` | -| `unpacked_filename` | Monitor supports the `tarfile` packaged remote model file. If the remote model is in a packaged format, you need to set this option to tell monitor the name of the extracted file. | `None` | +| ftp | The remote is FTP, and relevant commands are executed through `ftplib`(Using this monitor, you need to install `ftplib` with command `pip install ftplib`) | `ftp_host` FTP remote host.
`ftp_port` FTP remote port.
`ftp_username` FTP username. Not used if anonymous access.
`ftp_password` FTP password. Not used if anonymous access. | +| Afs | The remote is AFS, and relevant commands are executed through Hadoop-client | `hadoop_bin` Path of Hadoop binary file.
`hadoop_host` AFS host. Not used if set in Hadoop-client.
`hadoop_ugi` AFS ugi, Not used if set in Hadoop-client. | + +| Monitor Shared options | Description | Default | +| :--------------------: | :----------------------------------------------------------: | :----------------------------------: | +| `type` | Specify the type of monitor | / | +| `remote_path` | Specify the base path for the remote | / | +| `remote_model_name` | Specify the model name to be pulled from the remote | / | +| `remote_donefile_name` | Specify the donefile name that marks the completion of the remote model update | / | +| `local_path` | Specify local work path | / | +| `local_model_name` | Specify local model name | / | +| `local_timestamp_file` | Specify the timestamp file used locally for hot loading, The file is considered to be placed in the `local_path/local_model_name` folder. | `fluid_time_file` | +| `local_tmp_path` | Specify the path of the folder where temporary files are stored locally. If it does not exist, it will be created automatically. | `_serving_monitor_tmp` | +| `interval` | Specify the polling interval in seconds. | `10` | +| `unpacked_filename` | Monitor supports the `tarfile` packaged remote model file. If the remote model is in a packaged format, you need to set this option to tell monitor the name of the extracted file. | `None` | +| `debug` | If the `-- debug` option is added, more detailed intermediate information will be output. | This option is not added by default. | The following is an example of HDFSMonitor to show the model hot loading of Paddle Serving. @@ -150,11 +155,44 @@ python -m paddle_serving_server.monitor \ --remote_model_name='uci_housing.tar.gz' --remote_donefile_name='donefile' \ --local_path='.' --local_model_name='uci_housing_model' \ --local_timestamp_file='fluid_time_file' --local_tmp_path='_tmp' \ - --unpacked_filename='uci_housing_model' + --unpacked_filename='uci_housing_model' --debug ``` The above code monitors the remote timestamp file `/donefile` of the remote HDFS address `/` every 10 seconds by polling. When the remote timestamp file changes, the remote model is considered to have been updated. Pull the remote packaging model `/uci_housing.tar.gz` to the local temporary path `./_tmp/uci_housing.tar.gz`. After unpacking to get the model file `./_tmp/uci_housing_model`, update the local model `./uci_housing_model` and the model timestamp file `./uci_housing_model/fluid_time_file` of Paddle Serving. +The expected output is as follows: + +```shell +2020-04-02 08:38 INFO [monitor.py:85] _hdfs_bin: /hadoop-3.1.2/bin/hdfs +2020-04-02 08:38 INFO [monitor.py:244] HDFS prefix cmd: /hadoop-3.1.2/bin/hdfs dfs +2020-04-02 08:38 INFO [monitor.py:85] _remote_path: / +2020-04-02 08:38 INFO [monitor.py:85] _remote_model_name: uci_housing.tar.gz +2020-04-02 08:38 INFO [monitor.py:85] _remote_donefile_name: donefile +2020-04-02 08:38 INFO [monitor.py:85] _local_model_name: uci_housing_model +2020-04-02 08:38 INFO [monitor.py:85] _local_path: . +2020-04-02 08:38 INFO [monitor.py:85] _local_timestamp_file: fluid_time_file +2020-04-02 08:38 INFO [monitor.py:85] _local_tmp_path: _tmp +2020-04-02 08:38 INFO [monitor.py:85] _interval: 10 +2020-04-02 08:38 DEBUG [monitor.py:249] check cmd: /hadoop-3.1.2/bin/hdfs dfs -stat "%Y" /donefile +2020-04-02 08:38 DEBUG [monitor.py:251] resp: 1585816693193 +2020-04-02 08:38 INFO [monitor.py:138] doneilfe(donefile) changed. +2020-04-02 08:38 DEBUG [monitor.py:261] pull cmd: /hadoop-3.1.2/bin/hdfs dfs -get -f /uci_housing.tar.gz _tmp +2020-04-02 08:38 INFO [monitor.py:144] pull remote model(uci_housing.tar.gz). +2020-04-02 08:38 INFO [monitor.py:98] unpack remote file(uci_housing.tar.gz). +2020-04-02 08:38 DEBUG [monitor.py:108] remove packed file(uci_housing.tar.gz). +2020-04-02 08:38 INFO [monitor.py:110] using unpacked filename: uci_housing_model. +2020-04-02 08:38 DEBUG [monitor.py:175] update model cmd: cp -r _tmp/uci_housing_model/* ./uci_housing_model +2020-04-02 08:38 INFO [monitor.py:152] update local model(uci_housing_model). +2020-04-02 08:38 DEBUG [monitor.py:184] update timestamp cmd: touch ./uci_housing_model/fluid_time_file +2020-04-02 08:38 INFO [monitor.py:157] update model timestamp(fluid_time_file). +2020-04-02 08:38 INFO [monitor.py:161] sleep 10s. +2020-04-02 08:38 DEBUG [monitor.py:249] check cmd: /hadoop-3.1.2/bin/hdfs dfs -stat "%Y" /donefile +2020-04-02 08:38 DEBUG [monitor.py:251] resp: 1585816693193 +2020-04-02 08:38 INFO [monitor.py:161] sleep 10s. +``` + + + #### View server logs View the running log of the server with the following command: diff --git a/doc/HOT_LOADING_IN_SERVING_CN.md b/doc/HOT_LOADING_IN_SERVING_CN.md index c210bf90b4b982ef4b777ee44e85a1684eacc9cb..688bd6dccf368dad97f3423cfbe6ddaf111defa2 100644 --- a/doc/HOT_LOADING_IN_SERVING_CN.md +++ b/doc/HOT_LOADING_IN_SERVING_CN.md @@ -1,5 +1,7 @@ # Paddle Serving中的模型热加载 +(简体中文|[English](HOT_LOADING_IN_SERVING.md)) + ## 背景 在实际的工业场景下,通常是远端定期不间断产出模型,线上服务端需要在服务不中断的情况下拉取新模型对旧模型进行更新迭代。 @@ -29,6 +31,7 @@ Paddle Serving提供了一个自动监控脚本,远端地址更新模型后会 | `local_tmp_path` | 指定本地存放临时文件的文件夹路径,若不存在则自动创建。 | `_serving_monitor_tmp` | | `interval` | 指定轮询间隔时间,单位为秒。 | `10` | | `unpacked_filename` | Monitor支持tarfile打包的远程模型。如果远程模型是打包格式,则需要设置该选项来告知Monitor解压后的文件名。 | `None` | +| `debug` | 如果添加`--debug`选项,则输出更详细的中间信息。 | 默认不添加该选项 | 下面通过HDFSMonitor示例来展示Paddle Serving的模型热加载功能。 @@ -148,15 +151,46 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ```shell python -m paddle_serving_server.monitor \ ---type='hdfs' --hdfs_bin='/hadoop-3.1.2/bin/hdfs' --remote_path='/' \ ---remote_model_name='uci_housing.tar.gz' --remote_donefile_name='donefile' \ ---local_path='.' --local_model_name='uci_housing_model' \ ---local_timestamp_file='fluid_time_file' --local_tmp_path='_tmp' \ ---unpacked_filename='uci_housing_model' + --type='hdfs' --hdfs_bin='/hadoop-3.1.2/bin/hdfs' --remote_path='/' \ + --remote_model_name='uci_housing.tar.gz' --remote_donefile_name='donefile' \ + --local_path='.' --local_model_name='uci_housing_model' \ + --local_timestamp_file='fluid_time_file' --local_tmp_path='_tmp' \ + --unpacked_filename='uci_housing_model' --debug ``` 上面代码通过轮询方式监控远程HDFS地址`/`的时间戳文件`/donefile`,当时间戳变更则认为远程模型已经更新,将远程打包模型`/uci_housing.tar.gz`拉取到本地临时路径`./_tmp/uci_housing.tar.gz`下,解包出模型文件`./_tmp/uci_housing_model`后,更新本地模型`./uci_housing_model`以及Paddle Serving的时间戳文件`./uci_housing_model/fluid_time_file`。 +预计输出如下: + +```shell +2020-04-02 08:38 INFO [monitor.py:85] _hdfs_bin: /hadoop-3.1.2/bin/hdfs +2020-04-02 08:38 INFO [monitor.py:244] HDFS prefix cmd: /hadoop-3.1.2/bin/hdfs dfs +2020-04-02 08:38 INFO [monitor.py:85] _remote_path: / +2020-04-02 08:38 INFO [monitor.py:85] _remote_model_name: uci_housing.tar.gz +2020-04-02 08:38 INFO [monitor.py:85] _remote_donefile_name: donefile +2020-04-02 08:38 INFO [monitor.py:85] _local_model_name: uci_housing_model +2020-04-02 08:38 INFO [monitor.py:85] _local_path: . +2020-04-02 08:38 INFO [monitor.py:85] _local_timestamp_file: fluid_time_file +2020-04-02 08:38 INFO [monitor.py:85] _local_tmp_path: _tmp +2020-04-02 08:38 INFO [monitor.py:85] _interval: 10 +2020-04-02 08:38 DEBUG [monitor.py:249] check cmd: /hadoop-3.1.2/bin/hdfs dfs -stat "%Y" /donefile +2020-04-02 08:38 DEBUG [monitor.py:251] resp: 1585816693193 +2020-04-02 08:38 INFO [monitor.py:138] doneilfe(donefile) changed. +2020-04-02 08:38 DEBUG [monitor.py:261] pull cmd: /hadoop-3.1.2/bin/hdfs dfs -get -f /uci_housing.tar.gz _tmp +2020-04-02 08:38 INFO [monitor.py:144] pull remote model(uci_housing.tar.gz). +2020-04-02 08:38 INFO [monitor.py:98] unpack remote file(uci_housing.tar.gz). +2020-04-02 08:38 DEBUG [monitor.py:108] remove packed file(uci_housing.tar.gz). +2020-04-02 08:38 INFO [monitor.py:110] using unpacked filename: uci_housing_model. +2020-04-02 08:38 DEBUG [monitor.py:175] update model cmd: cp -r _tmp/uci_housing_model/* ./uci_housing_model +2020-04-02 08:38 INFO [monitor.py:152] update local model(uci_housing_model). +2020-04-02 08:38 DEBUG [monitor.py:184] update timestamp cmd: touch ./uci_housing_model/fluid_time_file +2020-04-02 08:38 INFO [monitor.py:157] update model timestamp(fluid_time_file). +2020-04-02 08:38 INFO [monitor.py:161] sleep 10s. +2020-04-02 08:38 DEBUG [monitor.py:249] check cmd: /hadoop-3.1.2/bin/hdfs dfs -stat "%Y" /donefile +2020-04-02 08:38 DEBUG [monitor.py:251] resp: 1585816693193 +2020-04-02 08:38 INFO [monitor.py:161] sleep 10s. +``` + #### 查看Server日志 通过下面命令查看Server的运行日志: diff --git a/python/paddle_serving_server/monitor.py b/python/paddle_serving_server/monitor.py index 5cd0803931b857e278ff4934f80f388ad3207e19..146bde1f9cd7c9a7336c3a036f93ba17b9c23f4f 100644 --- a/python/paddle_serving_server/monitor.py +++ b/python/paddle_serving_server/monitor.py @@ -240,15 +240,15 @@ class HDFSMonitor(Monitor): super(HDFSMonitor, self).__init__(interval) self._hdfs_bin = hdfs_bin self._print_params(['_hdfs_bin']) - self._prefix_cmd = '{} dfs '.format(self._hdfs_bin_path) - _LOGGER.info('HDFS prefix cmd: {}'.format(self._cmd_prefix)) + self._prefix_cmd = '{} dfs '.format(self._hdfs_bin) + _LOGGER.info('HDFS prefix cmd: {}'.format(self._prefix_cmd)) def _exist_remote_file(self, path, filename, local_tmp_path): remote_filepath = os.path.join(path, filename) cmd = '{} -stat "%Y" {}'.format(self._prefix_cmd, remote_filepath) _LOGGER.debug('check cmd: {}'.format(cmd)) [status, timestamp] = commands.getstatusoutput(cmd) - _LOGGER.debug('resp: {}'.format(output)) + _LOGGER.debug('resp: {}'.format(timestamp)) if status == 0: return [True, timestamp] else: @@ -302,9 +302,10 @@ class FTPMonitor(Monitor): return else: with open(local_fullpath, 'wb') as f: - _LOGGER.debug('cwd: {}'.format(path)) + _LOGGER.debug('cwd: {}'.format(remote_path)) self._ftp.cwd(remote_path) - _LOGGER.debug('download remote file({})'.format(remote_path)) + _LOGGER.debug('download remote file({})'.format( + remote_filename)) self._ftp.retrbinary('RETR {}'.format(remote_filename), f.write) def _download_remote_files(self, @@ -423,6 +424,9 @@ def parse_args(): ) parser.add_argument( "--interval", type=int, default=10, help="Time interval") + parser.add_argument( + "--debug", action='store_true', help="If true, output more details") + parser.set_defaults(debug=False) # general monitor parser.add_argument( "--general_host", type=str, help="Host of general remote server") @@ -488,10 +492,16 @@ def start_monitor(monitor, args): if __name__ == "__main__": - logging.basicConfig( - format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', - datefmt='%Y-%m-%d %H:%M', - level=logging.INFO) args = parse_args() + if args.debug: + logging.basicConfig( + format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + datefmt='%Y-%m-%d %H:%M', + level=logging.DEBUG) + else: + logging.basicConfig( + format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + datefmt='%Y-%m-%d %H:%M', + level=logging.INFO) monitor = get_monitor(args.type) start_monitor(monitor, args)