Created by: barrierye
修改后耗时明显降低,下面是100次infer的profile,可以看到在这个例子中Total time从2.80s降到了1.79s,提速36% (虽然predict每次耗时从17191.5增加到了17484.1,但由于减少了Python端的一次拷贝,总时间下降)
修改前:
Total time: 2.80162 s
File: /home/users/wangjiawei04/paddle_release_home/python/lib64/python2.7/site-packages/paddle_serving_client/__init__.py
Function: predict at line 210
Line # Hits Time Per Hit % Time Line Contents
==============================================================
210 def predict(self, feed=None, fetch=None, need_variant_tag=False):
211 100 221.0 2.2 0.0 self.profile_.record('py_prepro_0')
212
213 100 164.0 1.6 0.0 if feed is None or fetch is None:
214 raise ValueError("You should specify feed and fetch for prediction")
215
216 100 139.0 1.4 0.0 fetch_list = []
217 100 279.0 2.8 0.0 if isinstance(fetch, str):
218 fetch_list = [fetch]
219 100 163.0 1.6 0.0 elif isinstance(fetch, list):
220 100 137.0 1.4 0.0 fetch_list = fetch
221 else:
222 raise ValueError("Fetch only accepts string and list of string")
223
224 100 146.0 1.5 0.0 feed_batch = []
225 100 158.0 1.6 0.0 if isinstance(feed, dict):
226 100 179.0 1.8 0.0 feed_batch.append(feed)
227 elif isinstance(feed, list):
228 feed_batch = feed
229 else:
230 raise ValueError("Feed only accepts dict and list of dict")
231
232 100 139.0 1.4 0.0 int_slot_batch = []
233 100 136.0 1.4 0.0 float_slot_batch = []
234 100 149.0 1.5 0.0 int_feed_names = []
235 100 137.0 1.4 0.0 float_feed_names = []
236 100 140.0 1.4 0.0 int_shape = []
237 100 131.0 1.3 0.0 float_shape = []
238 100 128.0 1.3 0.0 fetch_names = []
239 100 142.0 1.4 0.0 counter = 0
240 100 160.0 1.6 0.0 batch_size = len(feed_batch)
241
242 1200 1724.0 1.4 0.1 for key in fetch_list:
243 1100 4755.0 4.3 0.2 if key in self.fetch_names_:
244 1100 1718.0 1.6 0.1 fetch_names.append(key)
245
246 100 158.0 1.6 0.0 if len(fetch_names) == 0:
247 raise ValueError(
248 "Fetch names should not be empty or out of saved fetch list.")
249 return {}
250
251 200 401.0 2.0 0.0 for i, feed_i in enumerate(feed_batch):
252 100 147.0 1.5 0.0 int_slot = []
253 100 142.0 1.4 0.0 float_slot = []
254 300 493.0 1.6 0.0 for key in feed_i:
255 200 421.0 2.1 0.0 if key not in self.feed_names_:
256 raise ValueError("Wrong feed name: {}.".format(key))
257 200 372.0 1.9 0.0 if not isinstance(feed_i[key], np.ndarray):
258 self.shape_check(feed_i, key)
259 200 386.0 1.9 0.0 if self.feed_types_[key] == int_type:
260 200 290.0 1.4 0.0 if i == 0:
261 200 312.0 1.6 0.0 int_feed_names.append(key)
262 200 347.0 1.7 0.0 if isinstance(feed_i[key], np.ndarray):
263 200 308.0 1.5 0.0 if key in self.lod_tensor_set:
264 raise ValueError(
265 "LodTensor var can not be ndarray type.")
266 200 530.0 2.6 0.0 int_shape.append(list(feed_i[key].shape))
267 else:
268 int_shape.append(self.feed_shapes_[key])
269 200 342.0 1.7 0.0 if isinstance(feed_i[key], np.ndarray):
270 200 306.0 1.5 0.0 if key in self.lod_tensor_set:
271 raise ValueError(
272 "LodTensor var can not be ndarray type.")
273 #int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
274 200 340.0 1.7 0.0 int_slot.append(feed_i[key])
275 200 353.0 1.8 0.0 self.has_numpy_input = True
276 else:
277 int_slot.append(feed_i[key])
278 self.all_numpy_input = False
279 elif self.feed_types_[key] == float_type:
280 if i == 0:
281 float_feed_names.append(key)
282 if isinstance(feed_i[key], np.ndarray):
283 if key in self.lod_tensor_set:
284 raise ValueError(
285 "LodTensor var can not be ndarray type.")
286 float_shape.append(list(feed_i[key].shape))
287 else:
288 float_shape.append(self.feed_shapes_[key])
289 if isinstance(feed_i[key], np.ndarray):
290 if key in self.lod_tensor_set:
291 raise ValueError(
292 "LodTensor var can not be ndarray type.")
293 #float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
294 float_slot.append(feed_i[key])
295 self.has_numpy_input = True
296 else:
297 float_slot.append(feed_i[key])
298 self.all_numpy_input = False
299 100 173.0 1.7 0.0 int_slot_batch.append(int_slot)
300 100 162.0 1.6 0.0 float_slot_batch.append(float_slot)
301
302 100 209.0 2.1 0.0 self.profile_.record('py_prepro_1')
303 100 192.0 1.9 0.0 self.profile_.record('py_client_infer_0')
304
305 100 153.0 1.5 0.0 result_batch = self.result_handle_
306 100 164.0 1.6 0.0 if self.all_numpy_input:
307 100 242.0 2.4 0.0 res = self.client_handle_.numpy_predict(
308 100 152.0 1.5 0.0 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
309 100 1719149.0 17191.5 61.4 int_feed_names, int_shape, fetch_names, result_batch, self.pid)
310 elif self.has_numpy_input == False:
311 res = self.client_handle_.batch_predict(
312 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
313 int_feed_names, int_shape, fetch_names, result_batch, self.pid)
314 else:
315 raise SystemExit(
316 "Please make sure the inputs are all in list type or all in numpy.array type"
317 )
318
319 100 478.0 4.8 0.0 self.profile_.record('py_client_infer_1')
320 100 198.0 2.0 0.0 self.profile_.record('py_postpro_0')
321
322 100 219.0 2.2 0.0 if res == -1:
323 return None
324
325 100 171.0 1.7 0.0 multi_result_map = []
326 100 583.0 5.8 0.0 model_engine_names = result_batch.get_engine_names()
327 200 537.0 2.7 0.0 for mi, engine_name in enumerate(model_engine_names):
328 100 177.0 1.8 0.0 result_map = {}
329 # result map needs to be a numpy array
330 1200 2094.0 1.7 0.1 for i, name in enumerate(fetch_names):
331 1100 2577.0 2.3 0.1 if self.fetch_names_to_type_[name] == int_type:
332 result_map[name] = result_batch.get_int64_by_name(mi, name)
333 shape = result_batch.get_shape(mi, name)
334 result_map[name] = np.array(result_map[name], dtype='int64')
335 result_map[name].shape = shape
336 if name in self.lod_tensor_set:
337 result_map["{}.lod".format(name)] = np.array(
338 result_batch.get_lod(mi, name))
339 1100 2155.0 2.0 0.1 elif self.fetch_names_to_type_[name] == float_type:
340 1100 105221.0 95.7 3.8 result_map[name] = result_batch.get_float_by_name(mi, name)
341 1100 3665.0 3.3 0.1 shape = result_batch.get_shape(mi, name)
342 1100 1840.0 1.7 0.1 result_map[name] = np.array(
343 1100 937739.0 852.5 33.5 result_map[name], dtype='float32')
344 1100 3014.0 2.7 0.1 result_map[name].shape = shape
345 1100 2190.0 2.0 0.1 if name in self.lod_tensor_set:
346 result_map["{}.lod".format(name)] = np.array(
347 result_batch.get_lod(mi, name))
348 100 239.0 2.4 0.0 multi_result_map.append(result_map)
349 100 165.0 1.6 0.0 ret = None
350 100 207.0 2.1 0.0 if len(model_engine_names) == 1:
351 # If only one model result is returned, the format of ret is result_map
352 100 179.0 1.8 0.0 ret = multi_result_map[0]
353 else:
354 # If multiple model results are returned, the format of ret is {name: result_map}
355 ret = {
356 engine_name: multi_result_map[mi]
357 for mi, engine_name in enumerate(model_engine_names)
358 }
359
360 100 288.0 2.9 0.0 self.profile_.record('py_postpro_1')
361 100 244.0 2.4 0.0 self.profile_.print_profile()
362
363 # When using the A/B test, the tag of variant needs to be returned
364 100 150.0 1.5 0.0 return ret if not need_variant_tag else [
365 ret, self.result_handle_.variant_tag()
366 ]
修改后:
Total time: 1.78668 s
File: /home/users/wangjiawei04/paddle_release_home/python/lib64/python2.7/site-packages/paddle_serving_client/__init__.py
Function: predict at line 210
Line # Hits Time Per Hit % Time Line Contents
==============================================================
210 def predict(self, feed=None, fetch=None, need_variant_tag=False):
211 100 202.0 2.0 0.0 self.profile_.record('py_prepro_0')
212
213 100 157.0 1.6 0.0 if feed is None or fetch is None:
214 raise ValueError("You should specify feed and fetch for prediction")
215
216 100 141.0 1.4 0.0 fetch_list = []
217 100 253.0 2.5 0.0 if isinstance(fetch, str):
218 fetch_list = [fetch]
219 100 163.0 1.6 0.0 elif isinstance(fetch, list):
220 100 141.0 1.4 0.0 fetch_list = fetch
221 else:
222 raise ValueError("Fetch only accepts string and list of string")
223
224 100 133.0 1.3 0.0 feed_batch = []
225 100 151.0 1.5 0.0 if isinstance(feed, dict):
226 100 180.0 1.8 0.0 feed_batch.append(feed)
227 elif isinstance(feed, list):
228 feed_batch = feed
229 else:
230 raise ValueError("Feed only accepts dict and list of dict")
231
232 100 128.0 1.3 0.0 int_slot_batch = []
233 100 134.0 1.3 0.0 float_slot_batch = []
234 100 134.0 1.3 0.0 int_feed_names = []
235 100 129.0 1.3 0.0 float_feed_names = []
236 100 131.0 1.3 0.0 int_shape = []
237 100 132.0 1.3 0.0 float_shape = []
238 100 133.0 1.3 0.0 fetch_names = []
239 100 139.0 1.4 0.0 counter = 0
240 100 148.0 1.5 0.0 batch_size = len(feed_batch)
241
242 1200 1600.0 1.3 0.1 for key in fetch_list:
243 1100 4676.0 4.3 0.3 if key in self.fetch_names_:
244 1100 1591.0 1.4 0.1 fetch_names.append(key)
245
246 100 160.0 1.6 0.0 if len(fetch_names) == 0:
247 raise ValueError(
248 "Fetch names should not be empty or out of saved fetch list.")
249 return {}
250
251 200 390.0 1.9 0.0 for i, feed_i in enumerate(feed_batch):
252 100 148.0 1.5 0.0 int_slot = []
253 100 138.0 1.4 0.0 float_slot = []
254 300 472.0 1.6 0.0 for key in feed_i:
255 200 407.0 2.0 0.0 if key not in self.feed_names_:
256 raise ValueError("Wrong feed name: {}.".format(key))
257 200 348.0 1.7 0.0 if not isinstance(feed_i[key], np.ndarray):
258 self.shape_check(feed_i, key)
259 200 369.0 1.8 0.0 if self.feed_types_[key] == int_type:
260 200 273.0 1.4 0.0 if i == 0:
261 200 328.0 1.6 0.0 int_feed_names.append(key)
262 200 313.0 1.6 0.0 if isinstance(feed_i[key], np.ndarray):
263 200 286.0 1.4 0.0 if key in self.lod_tensor_set:
264 raise ValueError(
265 "LodTensor var can not be ndarray type.")
266 200 500.0 2.5 0.0 int_shape.append(list(feed_i[key].shape))
267 else:
268 int_shape.append(self.feed_shapes_[key])
269 200 322.0 1.6 0.0 if isinstance(feed_i[key], np.ndarray):
270 200 293.0 1.5 0.0 if key in self.lod_tensor_set:
271 raise ValueError(
272 "LodTensor var can not be ndarray type.")
273 #int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
274 200 322.0 1.6 0.0 int_slot.append(feed_i[key])
275 200 334.0 1.7 0.0 self.has_numpy_input = True
276 else:
277 int_slot.append(feed_i[key])
278 self.all_numpy_input = False
279 elif self.feed_types_[key] == float_type:
280 if i == 0:
281 float_feed_names.append(key)
282 if isinstance(feed_i[key], np.ndarray):
283 if key in self.lod_tensor_set:
284 raise ValueError(
285 "LodTensor var can not be ndarray type.")
286 float_shape.append(list(feed_i[key].shape))
287 else:
288 float_shape.append(self.feed_shapes_[key])
289 if isinstance(feed_i[key], np.ndarray):
290 if key in self.lod_tensor_set:
291 raise ValueError(
292 "LodTensor var can not be ndarray type.")
293 #float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
294 float_slot.append(feed_i[key])
295 self.has_numpy_input = True
296 else:
297 float_slot.append(feed_i[key])
298 self.all_numpy_input = False
299 100 163.0 1.6 0.0 int_slot_batch.append(int_slot)
300 100 150.0 1.5 0.0 float_slot_batch.append(float_slot)
301
302 100 192.0 1.9 0.0 self.profile_.record('py_prepro_1')
303 100 197.0 2.0 0.0 self.profile_.record('py_client_infer_0')
304
305 100 143.0 1.4 0.0 result_batch = self.result_handle_
306 100 155.0 1.6 0.0 if self.all_numpy_input:
307 100 200.0 2.0 0.0 res = self.client_handle_.numpy_predict(
308 100 144.0 1.4 0.0 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
309 100 1748409.0 17484.1 97.9 int_feed_names, int_shape, fetch_names, result_batch, self.pid)
310 elif self.has_numpy_input == False:
311 res = self.client_handle_.batch_predict(
312 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
313 int_feed_names, int_shape, fetch_names, result_batch, self.pid)
314 else:
315 raise SystemExit(
316 "Please make sure the inputs are all in list type or all in numpy.array type"
317 )
318
319 100 486.0 4.9 0.0 self.profile_.record('py_client_infer_1')
320 100 189.0 1.9 0.0 self.profile_.record('py_postpro_0')
321
322 100 217.0 2.2 0.0 if res == -1:
323 return None
324
325 100 183.0 1.8 0.0 multi_result_map = []
326 100 550.0 5.5 0.0 model_engine_names = result_batch.get_engine_names()
327 200 533.0 2.7 0.0 for mi, engine_name in enumerate(model_engine_names):
328 100 166.0 1.7 0.0 result_map = {}
329 # result map needs to be a numpy array
330 1200 1906.0 1.6 0.1 for i, name in enumerate(fetch_names):
331 1100 2250.0 2.0 0.1 if self.fetch_names_to_type_[name] == int_type:
332 result_map[name] = result_batch.get_int64_by_name(mi, name)
333 shape = result_batch.get_shape(mi, name)
334 # result_map[name] = np.array(result_map[name], dtype='int64')
335 result_map[name].shape = shape
336 if name in self.lod_tensor_set:
337 result_map["{}.lod".format(name)] = np.array(
338 result_batch.get_lod(mi, name))
339 1100 2024.0 1.8 0.1 elif self.fetch_names_to_type_[name] == float_type:
340 1100 3592.0 3.3 0.2 result_map[name] = result_batch.get_float_by_name(mi, name)
341 1100 3107.0 2.8 0.2 shape = result_batch.get_shape(mi, name)
342 # result_map[name] = np.array(
343 # result_map[name], dtype='float32')
344 1100 2440.0 2.2 0.1 result_map[name].shape = shape
345 1100 1996.0 1.8 0.1 if name in self.lod_tensor_set:
346 result_map["{}.lod".format(name)] = np.array(
347 result_batch.get_lod(mi, name))
348 100 219.0 2.2 0.0 multi_result_map.append(result_map)
349 100 145.0 1.4 0.0 ret = None
350 100 186.0 1.9 0.0 if len(model_engine_names) == 1:
351 # If only one model result is returned, the format of ret is result_map
352 100 170.0 1.7 0.0 ret = multi_result_map[0]
353 else:
354 # If multiple model results are returned, the format of ret is {name: result_map}
355 ret = {
356 engine_name: multi_result_map[mi]
357 for mi, engine_name in enumerate(model_engine_names)
358 }
359
360 100 215.0 2.1 0.0 self.profile_.record('py_postpro_1')
361 100 217.0 2.2 0.0 self.profile_.print_profile()
362
363 # When using the A/B test, the tag of variant needs to be returned
364 100 134.0 1.3 0.0 return ret if not need_variant_tag else [
365 ret, self.result_handle_.variant_tag()
366 ]