hub_server.py 8.0 KB
Newer Older
S
Steffy-zxf 已提交
1
#coding:utf-8
W
wuzewu 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
W
wuzewu 已提交
19

W
wuzewu 已提交
20
import os
21
import time
22
import re
B
BinLong 已提交
23 24 25
import requests
import json
import yaml
W
wuzewu 已提交
26

B
BinLong 已提交
27
from random import randint
W
wuzewu 已提交
28 29
from paddlehub.common import utils
from paddlehub.common.downloader import default_downloader
B
BinLong 已提交
30
from paddlehub.common.server_config import default_server_config
W
wuzewu 已提交
31
from paddlehub.io.parser import yaml_parser
W
wuzewu 已提交
32
import paddlehub as hub
W
wuzewu 已提交
33

W
wuzewu 已提交
34
RESOURCE_LIST_FILE = "resource_list_file.yml"
35
CACHE_TIME = 60 * 10
W
wuzewu 已提交
36 37


W
wuzewu 已提交
38
class HubServer(object):
B
BinLong 已提交
39 40
    def __init__(self, config_file_path=None):
        if not config_file_path:
B
BinLong 已提交
41
            config_file_path = os.path.join(hub.CONF_HOME, 'config.json')
B
BinLong 已提交
42 43 44 45 46 47 48 49
        if not os.path.exists(hub.CONF_HOME):
            utils.mkdir(hub.CONF_HOME)
        if not os.path.exists(config_file_path):
            with open(config_file_path, 'w+') as fp:
                fp.write(json.dumps(default_server_config))

        with open(config_file_path) as fp:
            self.config = json.load(fp)
B
BinLong 已提交
50

B
BinLong 已提交
51 52
        utils.check_url(self.config['server_url'])
        self.server_url = self.config['server_url']
W
wuzewu 已提交
53
        self._load_resource_list_file_if_valid()
54

B
BinLong 已提交
55 56 57 58 59 60 61
    def get_server_url(self):
        HS_ENV = os.environ.get('HUB_SERVER')
        if HS_ENV:
            HUB_SERVERS = HS_ENV.split(';')
            return HUB_SERVERS[uniform(0, len(self.server_url))]
        return self.server_url[uniform(0, len(self.server_url))]

W
wuzewu 已提交
62 63
    def resource_list_file_path(self):
        return os.path.join(hub.CACHE_HOME, RESOURCE_LIST_FILE)
64

W
wuzewu 已提交
65 66 67
    def _load_resource_list_file_if_valid(self):
        self.resource_list_file = {}
        if not os.path.exists(self.resource_list_file_path()):
W
wuzewu 已提交
68
            return False
W
wuzewu 已提交
69
        file_create_time = os.path.getctime(self.resource_list_file_path())
70 71 72 73
        now_time = time.time()

        # if file is out of date, remove it
        if now_time - file_create_time >= CACHE_TIME:
W
wuzewu 已提交
74
            os.remove(self.resource_list_file_path())
75
            return False
W
wuzewu 已提交
76
        for resource in yaml_parser.parse(
W
wuzewu 已提交
77 78 79 80 81
                self.resource_list_file_path())['resource_list']:
            for key in resource:
                if key not in self.resource_list_file:
                    self.resource_list_file[key] = []
                self.resource_list_file[key].append(resource[key])
82

83
        # if file format is invalid, remove it
W
wuzewu 已提交
84 85 86
        if "version" not in self.resource_list_file or "name" not in self.resource_list_file:
            self.resource_list_file = {}
            os.remove(self.resource_list_file_path())
87 88
            return False
        return True
W
wuzewu 已提交
89

W
wuzewu 已提交
90
    def search_resource(self, resource_key, resource_type=None, update=False):
B
BinLong 已提交
91 92 93 94
        try:
            payload = {'word': resource_key}
            if resource_type:
                payload['type'] = resource_type
B
BinLong 已提交
95 96
            r = requests.get(
                self.get_server_url() + '/' + 'search', params=payload)
B
BinLong 已提交
97 98
            r = json.loads(r.text)
            if r['status'] == 0 and len(r['data']) > 0:
B
BinLong 已提交
99 100
                return [(item['name'], item['type'], item['version'],
                         item['summary']) for item in r['data']]
B
BinLong 已提交
101 102 103
        except:
            pass

W
wuzewu 已提交
104
        if update or not self.resource_list_file:
W
wuzewu 已提交
105 106
            self.request()

107 108 109
        if not self._load_resource_list_file_if_valid():
            return None

110 111 112
        match_resource_index_list = []
        for index, resource in enumerate(self.resource_list_file['name']):
            try:
W
wuzewu 已提交
113
                is_match = re.search(resource_key, resource)
114 115 116 117 118 119
                if is_match and (resource_type is None
                                 or self.resource_list_file['type'][index] ==
                                 resource_type):
                    match_resource_index_list.append(index)
            except:
                pass
W
wuzewu 已提交
120

W
wuzewu 已提交
121 122 123 124 125
        return [(self.resource_list_file['name'][index],
                 self.resource_list_file['type'][index],
                 self.resource_list_file['version'][index],
                 self.resource_list_file['summary'][index])
                for index in match_resource_index_list]
W
wuzewu 已提交
126

W
wuzewu 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139
    def search_module(self, module_key, update=False):
        self.search_resource(
            resource_key=module_key, resource_type="Module", update=update)

    def search_model(self, module_key, update=False):
        self.search_resource(
            resource_key=module_key, resource_type="Model", update=update)

    def get_resource_url(self,
                         resource_name,
                         resource_type=None,
                         version=None,
                         update=False):
B
BinLong 已提交
140 141 142 143 144 145
        try:
            payload = {'word': resource_name}
            if resource_type:
                payload['type'] = resource_type
            if version:
                payload['version'] = version
B
BinLong 已提交
146 147
            r = requests.get(
                self.get_server_url() + '/' + 'search', params=payload)
B
BinLong 已提交
148 149 150 151 152 153
            r = json.loads(r.text)
            if r['status'] == 0 and len(r['data']) > 0:
                return r['data'][0]
        except:
            pass

W
wuzewu 已提交
154
        if update or not self.resource_list_file:
W
wuzewu 已提交
155 156
            self.request()

157
        if not self._load_resource_list_file_if_valid():
W
wuzewu 已提交
158
            return {}
159

W
wuzewu 已提交
160
        resource_index_list = [
W
wuzewu 已提交
161
            index
W
wuzewu 已提交
162 163 164 165
            for index, resource in enumerate(self.resource_list_file['name'])
            if resource == resource_name and (
                resource_type is None
                or self.resource_list_file['type'][index] == resource_type)
W
wuzewu 已提交
166
        ]
W
wuzewu 已提交
167 168 169
        resource_version_list = [
            self.resource_list_file['version'][index]
            for index in resource_index_list
W
wuzewu 已提交
170 171
        ]
        #TODO(wuzewu): version sort method
W
wuzewu 已提交
172
        resource_version_list = sorted(resource_version_list)
W
wuzewu 已提交
173
        if not version:
W
wuzewu 已提交
174
            if not resource_version_list:
W
wuzewu 已提交
175
                return {}
W
wuzewu 已提交
176
            version = resource_version_list[-1]
W
wuzewu 已提交
177

W
wuzewu 已提交
178 179
        for index in resource_index_list:
            if self.resource_list_file['version'][index] == version:
W
wuzewu 已提交
180 181
                return {
                    'url': self.resource_list_file['url'][index],
182 183
                    'md5': self.resource_list_file['md5'][index],
                    'version': version
W
wuzewu 已提交
184
                }
W
wuzewu 已提交
185

W
wuzewu 已提交
186
        return {}
W
wuzewu 已提交
187

W
wuzewu 已提交
188 189 190 191 192 193 194 195 196 197 198 199 200
    def get_module_url(self, module_name, version=None, update=False):
        return self.get_resource_url(
            resource_name=module_name,
            resource_type="Module",
            version=version,
            update=update)

    def get_model_url(self, module_name, version=None, update=False):
        return self.get_resource_url(
            resource_name=module_name,
            resource_type="Model",
            version=version,
            update=update)
W
wuzewu 已提交
201

W
wuzewu 已提交
202
    def request(self):
B
BinLong 已提交
203 204 205
        if not os.path.exists(hub.CACHE_HOME):
            utils.mkdir(hub.CACHE_HOME)
        try:
B
BinLong 已提交
206
            r = requests.get(self.get_server_url() + '/' + 'search')
B
BinLong 已提交
207
            data = json.loads(r.text)
B
BinLong 已提交
208 209
            cache_path = os.path.join(hub.CACHE_HOME, RESOURCE_LIST_FILE)
            with open(cache_path, 'w+') as fp:
B
BinLong 已提交
210
                yaml.safe_dump({'resource_list': data['data']}, fp)
B
BinLong 已提交
211 212 213 214
            return True
        except:
            pass

B
BinLong 已提交
215 216
        file_url = self.config[
            'resource_storage_server_url'] + RESOURCE_LIST_FILE
W
wuzewu 已提交
217
        result, tips, self.resource_list_file = default_downloader.download_file(
W
wuzewu 已提交
218 219 220
            file_url, save_path=hub.CACHE_HOME)
        if not result:
            return False
221
        return True
W
wuzewu 已提交
222

W
wuzewu 已提交
223 224

default_hub_server = HubServer()