diff --git a/modules/image/classification/mobilenet_v2_animals/module.py b/modules/image/classification/mobilenet_v2_animals/module.py index c691a41e90c60def9b42a8fc246e2a900e86ee01..b9d62aa7c5f9468deb00a407f75ccead0544e192 100644 --- a/modules/image/classification/mobilenet_v2_animals/module.py +++ b/modules/image/classification/mobilenet_v2_animals/module.py @@ -9,7 +9,10 @@ import os import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -48,26 +51,53 @@ class MobileNetV2Animals(hub.Module): im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) return im_std + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True): """context for transfer learning. @@ -117,7 +147,7 @@ class MobileNetV2Animals(hub.Module): param.trainable = trainable return inputs, outputs, context_prog - def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1, use_device=None): """ API for image classification. @@ -127,18 +157,29 @@ class MobileNetV2Animals(hub.Module): batch_size (int): batch size. use_gpu (bool): Whether to use gpu. top_k (int): Return top k results. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The classfication results. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor all_data = list() for yield_data in reader(images, paths): @@ -158,10 +199,16 @@ class MobileNetV2Animals(hub.Module): pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( - [batch_image]) - out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + input_tensor.reshape(batch_image.shape) + input_tensor.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + predictor_output = output_handle.copy_to_cpu() + out = postprocess(data_out=predictor_output, label_list=self.label_list, top_k=top_k) res += out return res @@ -209,7 +256,12 @@ class MobileNetV2Animals(hub.Module): self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + results = self.classification( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + top_k=args.top_k, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -220,6 +272,10 @@ class MobileNetV2Animals(hub.Module): '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ diff --git a/modules/image/classification/resnet50_vd_dishes/module.py b/modules/image/classification/resnet50_vd_dishes/module.py index 43c072b7f81ab9e02749bcc12734c454b2da8a85..a276a344a65d049d0e57b6ed2bf756ed8c5f4f3b 100644 --- a/modules/image/classification/resnet50_vd_dishes/module.py +++ b/modules/image/classification/resnet50_vd_dishes/module.py @@ -9,7 +9,10 @@ import os import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -47,26 +50,53 @@ class ResNet50vdDishes(hub.Module): im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) return im_std + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True): """context for transfer learning. @@ -116,7 +146,7 @@ class ResNet50vdDishes(hub.Module): param.trainable = trainable return inputs, outputs, context_prog - def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1, use_device=None): """ API for image classification. @@ -126,18 +156,29 @@ class ResNet50vdDishes(hub.Module): batch_size (int): batch size. use_gpu (bool): Whether to use gpu. top_k (int): Return top k results. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The classfication results. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor all_data = list() for yield_data in reader(images, paths): @@ -157,10 +198,16 @@ class ResNet50vdDishes(hub.Module): pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( - [batch_image]) - out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + input_tensor.reshape(batch_image.shape) + input_tensor.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + predictor_output = output_handle.copy_to_cpu() + out = postprocess(data_out=predictor_output, label_list=self.label_list, top_k=top_k) res += out return res @@ -208,7 +255,12 @@ class ResNet50vdDishes(hub.Module): self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + results = self.classification( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + top_k=args.top_k, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -219,6 +271,10 @@ class ResNet50vdDishes(hub.Module): '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ diff --git a/modules/image/classification/resnet50_vd_wildanimals/module.py b/modules/image/classification/resnet50_vd_wildanimals/module.py index e3ab6e73b35da2c8ca6d955fd8a864a284018434..af408ac4fa78cda493bdb51163e1fa187d1135e7 100644 --- a/modules/image/classification/resnet50_vd_wildanimals/module.py +++ b/modules/image/classification/resnet50_vd_wildanimals/module.py @@ -9,7 +9,10 @@ import os import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -48,26 +51,53 @@ class ResNet50vdWildAnimals(hub.Module): im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) return im_std + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True): """context for transfer learning. @@ -117,7 +147,7 @@ class ResNet50vdWildAnimals(hub.Module): param.trainable = trainable return inputs, outputs, context_prog - def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1, use_device=None): """ API for image classification. @@ -127,18 +157,29 @@ class ResNet50vdWildAnimals(hub.Module): batch_size (int): batch size. use_gpu (bool): Whether to use gpu. top_k (int): Return top k results. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The classfication results. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor all_data = list() for yield_data in reader(images, paths): @@ -158,10 +199,16 @@ class ResNet50vdWildAnimals(hub.Module): pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( - [batch_image]) - out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + input_tensor.reshape(batch_image.shape) + input_tensor.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + predictor_output = output_handle.copy_to_cpu() + out = postprocess(data_out=predictor_output, label_list=self.label_list, top_k=top_k) res += out return res @@ -209,7 +256,12 @@ class ResNet50vdWildAnimals(hub.Module): self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + results = self.classification( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + top_k=args.top_k, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -220,6 +272,10 @@ class ResNet50vdWildAnimals(hub.Module): '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py index bfe7d54f85963f6b054e3c89e564a8fdee511969..c125f39c72b9099e7ef2bafde117c0b4ee3463af 100644 --- a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py @@ -151,7 +151,7 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule): return inputs, outputs, main_program @serving - def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1, use_device=None): """ Get the emotion prediction results results with the texts as input Args: @@ -161,15 +161,26 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule): batch_size(int): the program deals once with one batch Returns: results(list): the emotion prediction results + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor if texts != [] and isinstance(texts, list) and data == {}: predicted_data = texts @@ -189,14 +200,10 @@ class EmotionDetectionTextCNN(hub.NLPPredictionModule): else: batch_data = predicted_data[start_idx:] start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, self.vocab, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) + processed_results = preprocess(self.word_seg_module, batch_data, self.vocab, use_gpu, batch_size, + use_device) + predictor_output = self._internal_predict(predictor, processed_results) + batch_result = postprocess(predictor_output, processed_results) results += batch_result return results diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py index 10bd655af5cac809b51b2e10ad8325f9272a90e5..9c0f777fe636677d5d8636018207794dbcb22371 100644 --- a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py @@ -34,10 +34,10 @@ def get_predict_label(probs): return label, key -def preprocess(lac, predicted_data, word_dict, use_gpu=False, batch_size=1): +def preprocess(lac, predicted_data, word_dict, use_gpu=False, batch_size=1, use_device=None): result = [] data_dict = {"text": predicted_data} - processed = lac.lexical_analysis(data=data_dict, use_gpu=use_gpu, batch_size=batch_size) + processed = lac.lexical_analysis(data=data_dict, use_gpu=use_gpu, batch_size=batch_size, use_device=use_device) unk_id = word_dict[""] for index, data in enumerate(processed): result_i = {'processed': []} @@ -54,7 +54,7 @@ def preprocess(lac, predicted_data, word_dict, use_gpu=False, batch_size=1): def postprocess(prediction, texts): result = [] - pred = prediction.as_ndarray() + pred = prediction.copy_to_cpu() for index in range(len(texts)): result_i = {} result_i['text'] = texts[index]['origin'] diff --git a/modules/text/sentiment_analysis/senta_bilstm/module.py b/modules/text/sentiment_analysis/senta_bilstm/module.py index 0ee5ca73e0e2e2134802096c856870a276f0a6e3..42d80f18a8367f36009759d8281cd52ab6325b3e 100644 --- a/modules/text/sentiment_analysis/senta_bilstm/module.py +++ b/modules/text/sentiment_analysis/senta_bilstm/module.py @@ -153,7 +153,7 @@ class SentaBiLSTM(hub.NLPPredictionModule): return inputs, outputs, main_program @serving - def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1, use_device=None): """ Get the sentiment prediction results results with the texts as input @@ -162,18 +162,29 @@ class SentaBiLSTM(hub.NLPPredictionModule): data(dict): key must be 'text', value is the texts to be predicted, if data not texts use_gpu(bool): whether use gpu to predict or not batch_size(int): the program deals once with one batch + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: results(list): the word segmentation results """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor if texts != [] and isinstance(texts, list) and data == {}: predicted_data = texts @@ -193,14 +204,10 @@ class SentaBiLSTM(hub.NLPPredictionModule): batch_data = predicted_data[start_idx:] start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, self.word_dict, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) + processed_results = preprocess(self.word_seg_module, batch_data, self.word_dict, use_gpu, batch_size, + use_device) + predictor_output = self._internal_predict(predictor, processed_results) + batch_result = postprocess(predictor_output, processed_results) results += batch_result return results diff --git a/modules/text/sentiment_analysis/senta_bilstm/processor.py b/modules/text/sentiment_analysis/senta_bilstm/processor.py index 39190cf3a7c02a5e7974f32329a584f40db81832..f181d0e08b14b7b63eb60f4d9ef01aa9001c0b99 100644 --- a/modules/text/sentiment_analysis/senta_bilstm/processor.py +++ b/modules/text/sentiment_analysis/senta_bilstm/processor.py @@ -17,14 +17,14 @@ def load_vocab(file_path): return vocab -def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): +def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1, use_device=None): """ firstly, the predicted texts are segmented by lac module then, the word segmention results input into senta """ result = [] input_dict = {'text': texts} - processed = lac.lexical_analysis(data=input_dict, use_gpu=use_gpu, batch_size=batch_size) + processed = lac.lexical_analysis(data=input_dict, use_gpu=use_gpu, batch_size=batch_size, use_device=use_device) unk_id = word_dict[""] for index, data in enumerate(processed): result_i = {'processed': []} @@ -43,7 +43,7 @@ def postprocess(predict_out, texts): """ Convert model's output tensor to sentiment label """ - predict_out = predict_out.as_ndarray() + predict_out = predict_out.copy_to_cpu() batch_size = len(texts) result = [] for index in range(batch_size): diff --git a/modules/text/text_review/porn_detection_lstm/module.py b/modules/text/text_review/porn_detection_lstm/module.py index e1b7778a5529a91b0531589b954241e92fc2f041..d6c2e2af47ae456a13b108b0cf9222cd66c19544 100644 --- a/modules/text/text_review/porn_detection_lstm/module.py +++ b/modules/text/text_review/porn_detection_lstm/module.py @@ -78,7 +78,7 @@ class PornDetectionLSTM(hub.NLPPredictionModule): return inputs, outputs, program @serving - def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): + def detection(self, texts=[], data={}, use_gpu=False, batch_size=1, use_device=None): """ Get the porn prediction results results with the texts as input @@ -87,15 +87,29 @@ class PornDetectionLSTM(hub.NLPPredictionModule): data(dict): key must be 'text', value is the texts to be predicted, if data not texts use_gpu(bool): whether use gpu to predict or not batch_size(int): the program deals once with one batch + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: results(list): the porn prediction results """ - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - use_gpu = False + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor if texts != [] and isinstance(texts, list) and data == {}: predicted_data = texts @@ -116,13 +130,8 @@ class PornDetectionLSTM(hub.NLPPredictionModule): start_idx = start_idx + batch_size processed_results = preprocess(batch_data, self.tokenizer, self.vocab, self.sequence_max_len) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) + predictor_output = self._internal_predict(predictor, processed_results) + batch_result = postprocess(predictor_output, processed_results) results += batch_result return results diff --git a/modules/text/text_review/porn_detection_lstm/processor.py b/modules/text/text_review/porn_detection_lstm/processor.py index 1f6c8b565f53708f27735e51d6631015095f2cf6..c691dd6b38f4760d74ae9ad6b32d962f2589fd1c 100644 --- a/modules/text/text_review/porn_detection_lstm/processor.py +++ b/modules/text/text_review/porn_detection_lstm/processor.py @@ -52,7 +52,7 @@ def postprocess(predict_out, texts): Convert model's output tensor to pornography label """ result = [] - predict_out = predict_out.as_ndarray() + predict_out = predict_out.copy_to_cpu() for index in range(len(texts)): result_i = {} result_i['text'] = texts[index]['origin'] diff --git a/paddlehub/commands/run.py b/paddlehub/commands/run.py index d806ce167f38dd9b5e21eb512be4b5ceed81060b..5a49ff117b5d9f248d8dc8a0dc48cfbd10ef4db4 100644 --- a/paddlehub/commands/run.py +++ b/paddlehub/commands/run.py @@ -68,6 +68,10 @@ class RunCommand: arg_config_group.add_argument( '--use_gpu', type=ast.literal_eval, default=False, help='whether use GPU for prediction') arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size for prediction') + arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") module_type = module.type.lower() if module_type.startswith('cv'): @@ -83,4 +87,8 @@ class RunCommand: input_data = {key: [args.input_path] if module_type.startswith('cv') else [args.input_text]} return module( - sign_name=module.default_signature, data=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) + sign_name=module.default_signature, + data=input_data, + use_gpu=args.use_gpu, + batch_size=args.batch_size, + use_device=args.use_device) diff --git a/paddlehub/compat/module/module_v1.py b/paddlehub/compat/module/module_v1.py index 2e9b72b92c307d2132036686905cff2e53443d18..99e2c12b0d5592dc605311a6a38a963611b54607 100644 --- a/paddlehub/compat/module/module_v1.py +++ b/paddlehub/compat/module/module_v1.py @@ -167,7 +167,13 @@ class ModuleV1(object): program.global_block().var(feed_dict[tensor_name].name).desc.set_shape(seq_tensor_shape) @paddle_utils.run_in_static_mode - def __call__(self, sign_name: str, data: dict, use_gpu: bool = False, batch_size: int = 1, **kwargs): + def __call__(self, + sign_name: str, + data: dict, + use_gpu: bool = False, + batch_size: int = 1, + use_device: str = None, + **kwargs): '''Call the specified signature function for prediction.''' def _get_reader_and_feeder(data_format, data, place): @@ -188,7 +194,18 @@ class ModuleV1(object): with paddle.static.program_guard(program): result = [] index = 0 - place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() + + if use_device is not None: + if use_device == "xpu": + place = paddle.XPUPlace(0) + elif use_device == "npu": + place = paddle.NPUPlace(0) + elif use_device == "gpu": + place = paddle.CUDAPlace(0) + else: + place = paddle.CPUPlace() + else: + place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() exe = paddle.static.Executor(place=place) data = self.processor.preprocess(sign_name=sign_name, data_dict=data) diff --git a/paddlehub/compat/module/nlp_module.py b/paddlehub/compat/module/nlp_module.py index d7209774fd94ff2e6592b6aba9f9a18d81750933..80c61efa0fdde97822e46b061de12e7627220e96 100644 --- a/paddlehub/compat/module/nlp_module.py +++ b/paddlehub/compat/module/nlp_module.py @@ -31,6 +31,9 @@ from paddlehub.module.module import runnable, RunModule from paddlehub.utils.parser import txt_parser from paddlehub.utils.utils import sys_stdin_encoding +from paddle.inference import Config +from paddle.inference import create_predictor + class DataFormatError(Exception): def __init__(self, *args): @@ -48,24 +51,53 @@ class NLPBaseModule(RunModule): class NLPPredictionModule(NLPBaseModule): + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): - '''predictor config setting''' - cpu_config = paddle.fluid.core.AnalysisConfig(self.pretrained_model_path) + """ + predictor config setting + """ + + # create default cpu predictor + cpu_config = Config(self.pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = paddle.fluid.core.create_paddle_predictor(cpu_config) - - try: - _places = os.environ['CUDA_VISIBLE_DEVICES'] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = paddle.fluid.core.AnalysisConfig(self.pretrained_model_path) + self.cpu_predictor = create_predictor(cpu_config) + + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = paddle.fluid.core.create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def texts2tensor(self, texts: List[dict]) -> paddle.Tensor: ''' @@ -87,6 +119,29 @@ class NLPPredictionModule(NLPBaseModule): tensor.shape = [lod[-1], 1] return tensor + def _internal_predict(self, predictor, texts): + lod = [0] + data = [] + for i, text in enumerate(texts): + data += text['processed'] + lod.append(len(text['processed']) + lod[i]) + + # get predictor tensor + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + + # set data, shape and lod + input_tensor.copy_from_cpu(np.array(data).astype('int64')) + input_tensor.reshape([lod[-1], 1]) + input_tensor.set_lod([lod]) + + # real predict + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + return output_handle + def to_unicode(self, texts: str) -> Text: ''' Convert each element's type(str) of texts(list) to unicode in python2.7 @@ -129,7 +184,8 @@ class NLPPredictionModule(NLPBaseModule): self.parser.print_help() return None - results = self.predict(texts=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) + results = self.predict( + texts=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size, use_device=args.use_device) return results @@ -139,6 +195,10 @@ class NLPPredictionModule(NLPBaseModule): '--use_gpu', type=ast.literal_eval, default=False, help='whether use GPU for prediction') self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size for prediction') + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): '''Add the command input options'''