label_map(result.label_map.data.begin(),
result.label_map.data.end());
cv::Mat mask(result.label_map.shape[0],
diff --git a/deploy/lite/android/demo/.gitignore b/deploy/lite/android/demo/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..2b75303ac58f551de0a327638a60b909c6d33ece
--- /dev/null
+++ b/deploy/lite/android/demo/.gitignore
@@ -0,0 +1,13 @@
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
diff --git a/deploy/lite/android/demo/app/.gitignore b/deploy/lite/android/demo/app/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..796b96d1c402326528b4ba3c12ee9d92d0e212e9
--- /dev/null
+++ b/deploy/lite/android/demo/app/.gitignore
@@ -0,0 +1 @@
+/build
diff --git a/deploy/lite/android/demo/app/build.gradle b/deploy/lite/android/demo/app/build.gradle
new file mode 100644
index 0000000000000000000000000000000000000000..f743f1d23905566772c4e572e9700df5ad779ca0
--- /dev/null
+++ b/deploy/lite/android/demo/app/build.gradle
@@ -0,0 +1,119 @@
+import java.security.MessageDigest
+
+apply plugin: 'com.android.application'
+
+android {
+ compileSdkVersion 28
+ defaultConfig {
+ applicationId "com.baidu.paddlex.lite.demo"
+ minSdkVersion 15
+ targetSdkVersion 28
+ versionCode 1
+ versionName "1.0"
+ testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
+ }
+ buildTypes {
+ release {
+ minifyEnabled false
+ proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+ }
+ }
+}
+
+dependencies {
+ implementation fileTree(include: ['*.aar'], dir: 'libs')
+ implementation 'com.android.support:appcompat-v7:28.0.0'
+ implementation 'com.android.support.constraint:constraint-layout:1.1.3'
+ implementation 'com.android.support:design:28.0.0'
+ testImplementation 'junit:junit:4.12'
+ androidTestImplementation 'com.android.support.test:runner:1.0.2'
+ androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
+}
+
+
+def paddlexAndroidSdk = 'https://bj.bcebos.com/paddlex/deploy/lite/paddlex_lite_11cbd50e.tar.gz'
+
+task downloadAndExtractPaddleXAndroidSdk(type: DefaultTask) {
+ doFirst {
+ println "Downloading and extracting PaddleX Android SDK"}
+ doLast {
+ // Prepare cache folder for sdk
+ if (!file("cache").exists()) {
+ mkdir "cache"
+ }
+ // Generate cache name for sdk
+ MessageDigest messageDigest = MessageDigest.getInstance('MD5')
+ messageDigest.update(paddlexAndroidSdk.bytes)
+ String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
+ // Download sdk
+ if (!file("cache/${cacheName}.tar.gz").exists()) {
+ ant.get(src: paddlexAndroidSdk, dest: file("cache/${cacheName}.tar.gz"))
+ }
+ // Unpack sdk
+ copy {
+ from tarTree("cache/${cacheName}.tar.gz")
+ into "cache/${cacheName}"
+ }
+ // Copy sdk
+ if (!file("libs/paddlex.aar").exists()) {
+ copy {
+ from "cache/${cacheName}/paddlex.aar"
+ into "libs"
+ }
+ }
+ }
+}
+
+preBuild.dependsOn downloadAndExtractPaddleXAndroidSdk
+
+def paddleXLiteModel = 'https://bj.bcebos.com/paddlex/deploy/lite/mobilenetv2_imagenet_lite2.6.1.tar.gz'
+task downloadAndExtractPaddleXLiteModel(type: DefaultTask) {
+ doFirst {
+ println "Downloading and extracting PaddleX Android SDK"}
+
+ doLast {
+ // Prepare cache folder for model
+ if (!file("cache").exists()) {
+ mkdir "cache"
+ }
+ // Generate cache name for model
+ MessageDigest messageDigest = MessageDigest.getInstance('MD5')
+ messageDigest.update(paddleXLiteModel.bytes)
+ String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
+ // Download sdk
+ if (!file("cache/${cacheName}.tar.gz").exists()) {
+ ant.get(src: paddleXLiteModel, dest: file("cache/${cacheName}.tar.gz"))
+ }
+
+ // Unpack model
+ copy {
+ from tarTree("cache/${cacheName}.tar.gz")
+ into "cache/${cacheName}"
+ }
+
+ // Copy model.nb
+ if (!file("src/main/assets/model/model.nb").exists()) {
+ copy {
+ from "cache/${cacheName}/model.nb"
+ into "src/main/assets/model/"
+ }
+ }
+ // Copy config file model.yml
+ if (!file("src/main/assets/config/model.yml").exists()) {
+ copy {
+ from "cache/${cacheName}/model.yml"
+ into "src/main/assets/config/"
+ }
+ }
+ // Copy config file model.yml
+ if (!file("src/main/assets/images/test.jpg").exists()) {
+ copy {
+ from "cache/${cacheName}/test.jpg"
+ into "src/main/assets/images/"
+ }
+ }
+ }
+
+}
+
+preBuild.dependsOn downloadAndExtractPaddleXLiteModel
diff --git a/deploy/lite/android/demo/app/proguard-rules.pro b/deploy/lite/android/demo/app/proguard-rules.pro
new file mode 100644
index 0000000000000000000000000000000000000000..f1b424510da51fd82143bc74a0a801ae5a1e2fcd
--- /dev/null
+++ b/deploy/lite/android/demo/app/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+# http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+# public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
diff --git a/deploy/lite/android/demo/app/src/androidTest/java/com/baidu/paddlex/lite/demo/ExampleInstrumentedTest.java b/deploy/lite/android/demo/app/src/androidTest/java/com/baidu/paddlex/lite/demo/ExampleInstrumentedTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..4b58dec6f5dd8bfa083ec951d659dd0690f67221
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/androidTest/java/com/baidu/paddlex/lite/demo/ExampleInstrumentedTest.java
@@ -0,0 +1,32 @@
+package com.baidu.paddlex.lite.demo;
+
+import android.content.Context;
+import android.content.res.AssetManager;
+import android.support.test.InstrumentationRegistry;
+import android.support.test.runner.AndroidJUnit4;
+
+import com.baidu.paddlex.config.ConfigParser;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * @see Testing documentation
+ */
+@RunWith(AndroidJUnit4.class)
+public class ExampleInstrumentedTest {
+ @Test
+ public void useAppContext() throws IOException {
+ // Context of the app under test.
+ Context appContext = InstrumentationRegistry.getTargetContext();
+ AssetManager ass = appContext.getAssets();
+ assertEquals("com.baidu.paddlex.lite.demo", appContext.getPackageName());
+ }
+}
diff --git a/deploy/lite/android/demo/app/src/main/AndroidManifest.xml b/deploy/lite/android/demo/app/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000000000000000000000000000000000..940c9692fcf6fdfe6b07e8f4641fe7e9a9e5ff5f
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/AndroidManifest.xml
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/AppCompatPreferenceActivity.java b/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/AppCompatPreferenceActivity.java
new file mode 100644
index 0000000000000000000000000000000000000000..c6f4eff8e736278c71ef2c34783dd3e1b3659495
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/AppCompatPreferenceActivity.java
@@ -0,0 +1,126 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.lite.demo;
+
+import android.content.res.Configuration;
+import android.os.Bundle;
+import android.preference.PreferenceActivity;
+import android.support.annotation.LayoutRes;
+import android.support.annotation.Nullable;
+import android.support.v7.app.ActionBar;
+import android.support.v7.app.AppCompatDelegate;
+import android.support.v7.widget.Toolbar;
+import android.view.MenuInflater;
+import android.view.View;
+import android.view.ViewGroup;
+
+/**
+ * A {@link android.preference.PreferenceActivity} which implements and proxies the necessary calls
+ * to be used with AppCompat.
+ *
+ * This technique can be used with an {@link android.app.Activity} class, not just
+ * {@link android.preference.PreferenceActivity}.
+ */
+
+public abstract class AppCompatPreferenceActivity extends PreferenceActivity {
+ private AppCompatDelegate mDelegate;
+
+ @Override
+ protected void onCreate(Bundle savedInstanceState) {
+ getDelegate().installViewFactory();
+ getDelegate().onCreate(savedInstanceState);
+ super.onCreate(savedInstanceState);
+ }
+
+ @Override
+ protected void onPostCreate(Bundle savedInstanceState) {
+ super.onPostCreate(savedInstanceState);
+ getDelegate().onPostCreate(savedInstanceState);
+ }
+
+ public ActionBar getSupportActionBar() {
+ return getDelegate().getSupportActionBar();
+ }
+
+ public void setSupportActionBar(@Nullable Toolbar toolbar) {
+ getDelegate().setSupportActionBar(toolbar);
+ }
+
+ @Override
+ public MenuInflater getMenuInflater() {
+ return getDelegate().getMenuInflater();
+ }
+
+ @Override
+ public void setContentView(@LayoutRes int layoutResID) {
+ getDelegate().setContentView(layoutResID);
+ }
+
+ @Override
+ public void setContentView(View view) {
+ getDelegate().setContentView(view);
+ }
+
+ @Override
+ public void setContentView(View view, ViewGroup.LayoutParams params) {
+ getDelegate().setContentView(view, params);
+ }
+
+ @Override
+ public void addContentView(View view, ViewGroup.LayoutParams params) {
+ getDelegate().addContentView(view, params);
+ }
+
+ @Override
+ protected void onPostResume() {
+ super.onPostResume();
+ getDelegate().onPostResume();
+ }
+
+ @Override
+ protected void onTitleChanged(CharSequence title, int color) {
+ super.onTitleChanged(title, color);
+ getDelegate().setTitle(title);
+ }
+
+ @Override
+ public void onConfigurationChanged(Configuration newConfig) {
+ super.onConfigurationChanged(newConfig);
+ getDelegate().onConfigurationChanged(newConfig);
+ }
+
+ @Override
+ protected void onStop() {
+ super.onStop();
+ getDelegate().onStop();
+ }
+
+ @Override
+ protected void onDestroy() {
+ super.onDestroy();
+ getDelegate().onDestroy();
+ }
+
+ public void invalidateOptionsMenu() {
+ getDelegate().invalidateOptionsMenu();
+ }
+
+ private AppCompatDelegate getDelegate() {
+ if (mDelegate == null) {
+ mDelegate = AppCompatDelegate.create(this, null);
+ }
+ return mDelegate;
+ }
+}
diff --git a/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/MainActivity.java b/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/MainActivity.java
new file mode 100644
index 0000000000000000000000000000000000000000..62e47214fc80a40fbfa173967f61e490eab92e47
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/MainActivity.java
@@ -0,0 +1,466 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.lite.demo;
+
+import android.Manifest;
+import android.app.ProgressDialog;
+import android.content.ContentResolver;
+import android.content.Intent;
+import android.content.SharedPreferences;
+import android.content.pm.PackageManager;
+import android.database.Cursor;
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+import android.net.Uri;
+import android.os.Bundle;
+import android.os.Handler;
+import android.os.HandlerThread;
+import android.os.Message;
+import android.preference.PreferenceManager;
+import android.provider.MediaStore;
+import android.support.annotation.NonNull;
+import android.support.v4.app.ActivityCompat;
+import android.support.v4.content.ContextCompat;
+import android.support.v7.app.AppCompatActivity;
+import android.text.method.ScrollingMovementMethod;
+import android.util.Log;
+import android.view.Menu;
+import android.view.MenuInflater;
+import android.view.MenuItem;
+import android.view.View;
+import android.widget.Button;
+import android.widget.ImageView;
+import android.widget.TextView;
+import android.widget.Toast;
+import com.baidu.paddlex.Predictor;
+import com.baidu.paddlex.Utils;
+import com.baidu.paddlex.config.ConfigParser;
+import com.baidu.paddlex.postprocess.ClsResult;
+import com.baidu.paddlex.postprocess.DetResult;
+import com.baidu.paddlex.postprocess.SegResult;
+import com.baidu.paddlex.visual.Visualize;
+import org.opencv.core.Mat;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class MainActivity extends AppCompatActivity {
+ public static final int OPEN_GALLERY_REQUEST_CODE = 0;
+ public static final int TAKE_PHOTO_REQUEST_CODE = 1;
+ public static final int REQUEST_LOAD_MODEL = 0;
+ public static final int REQUEST_RUN_MODEL = 1;
+ public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
+ public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
+ public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
+ public static final int RESPONSE_RUN_MODEL_FAILED = 3;
+ private static final String TAG = MainActivity.class.getSimpleName();
+ protected ProgressDialog pbLoadModel = null;
+ protected ProgressDialog pbRunModel = null;
+
+ protected Handler receiver = null; // receive messages from worker thread
+ protected Handler sender = null; // send command to worker thread
+ protected HandlerThread worker = null; // worker thread to load&run model
+
+ protected TextView tvInputSetting;
+ protected ImageView ivInputImage;
+ protected TextView tvOutputResult;
+ protected TextView tvInferenceTime;
+ private Button predictButton;
+ protected String testImagePathFromAsset;
+ protected String testYamlPathFromAsset;
+ protected String testModelPathFromAsset;
+
+ // Predictor
+ protected Predictor predictor = new Predictor();
+ // model config
+ protected ConfigParser configParser = new ConfigParser();
+ // Visualize
+ protected Visualize visualize = new Visualize();
+ // Predict Mat of Opencv
+ protected Mat predictMat;
+
+
+
+
+ @Override
+ protected void onCreate(Bundle savedInstanceState) {
+ super.onCreate(savedInstanceState);
+ setContentView(R.layout.activity_main);
+ receiver = new Handler() {
+ @Override
+ public void handleMessage(Message msg) {
+ switch (msg.what) {
+ case RESPONSE_LOAD_MODEL_SUCCESSED:
+ pbLoadModel.dismiss();
+ Toast.makeText(MainActivity.this, "Load model successfully!", Toast.LENGTH_SHORT).show();
+ break;
+ case RESPONSE_LOAD_MODEL_FAILED:
+ pbLoadModel.dismiss();
+ Toast.makeText(MainActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
+ break;
+ case RESPONSE_RUN_MODEL_SUCCESSED:
+ pbRunModel.dismiss();
+ onRunModelSuccessed();
+ break;
+ case RESPONSE_RUN_MODEL_FAILED:
+ pbRunModel.dismiss();
+ Toast.makeText(MainActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
+ onRunModelFailed();
+ break;
+ default:
+ break;
+ }
+ }
+ };
+ worker = new HandlerThread("Predictor Worker");
+ worker.start();
+ sender = new Handler(worker.getLooper()) {
+ public void handleMessage(Message msg) {
+ switch (msg.what) {
+ case REQUEST_LOAD_MODEL:
+ // load model and reload test image
+ if (onLoadModel()) {
+ receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_SUCCESSED);
+ } else {
+ receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_FAILED);
+ }
+ break;
+ case REQUEST_RUN_MODEL:
+ // run model if model is loaded
+ if (onRunModel()) {
+ receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_SUCCESSED);
+ } else {
+ receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_FAILED);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ };
+
+ tvInputSetting = findViewById(R.id.tv_input_setting);
+ ivInputImage = findViewById(R.id.iv_input_image);
+ predictButton = findViewById(R.id.iv_predict_button);
+ tvInferenceTime = findViewById(R.id.tv_inference_time);
+ tvOutputResult = findViewById(R.id.tv_output_result);
+ tvInputSetting.setMovementMethod(ScrollingMovementMethod.getInstance());
+ tvOutputResult.setMovementMethod(ScrollingMovementMethod.getInstance());
+ SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
+ String image_path = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
+ getString(R.string.IMAGE_PATH_DEFAULT));
+ Utils.initialOpencv();
+ loadTestImageFromAsset(image_path);
+ predictButton.setOnClickListener(new View.OnClickListener() {
+ @Override
+ public void onClick(View v) {
+ if(predictor.isLoaded()){
+ onLoadModelSuccessed();
+ }
+ }
+ });
+
+ }
+
+ public boolean onLoadModel() {
+ return predictor.init(configParser);
+ }
+
+ public boolean onRunModel() {
+ return predictor.isLoaded() && predictor.predict();
+ }
+
+ public void onRunModelFailed() {
+ }
+
+ public void loadModel() {
+ pbLoadModel = ProgressDialog.show(this, "", "Loading model...", false, false);
+ sender.sendEmptyMessage(REQUEST_LOAD_MODEL);
+ }
+
+ public void runModel() {
+ pbRunModel = ProgressDialog.show(this, "", "Running model...", false, false);
+ sender.sendEmptyMessage(REQUEST_RUN_MODEL);
+ }
+
+ public void onLoadModelSuccessed() {
+ if (predictMat != null && predictor.isLoaded()) {
+ int w = predictMat.width();
+ int h = predictMat.height();
+ int c = predictMat.channels();
+ predictor.setInputMat(predictMat);
+ runModel();
+ }
+ }
+
+ public void onRunModelSuccessed() {
+ // obtain results and update UI
+ tvInferenceTime.setText("Inference time: " + predictor.getInferenceTime() + " ms");
+
+ if (configParser.getModelType().equalsIgnoreCase("segmenter")) {
+ SegResult segResult = predictor.getSegResult();
+ Mat maskMat = visualize.draw(segResult, predictMat.clone(), predictor.getImageBlob(), 1);
+ Imgproc.cvtColor(maskMat, maskMat, Imgproc.COLOR_BGRA2RGBA);
+ Bitmap outputImage = Bitmap.createBitmap(maskMat.width(), maskMat.height(), Bitmap.Config.ARGB_8888);
+ org.opencv.android.Utils.matToBitmap(maskMat, outputImage);
+ if (outputImage != null) {
+ ivInputImage.setImageBitmap(outputImage);
+ }
+ } else if (configParser.getModelType().equalsIgnoreCase("detector")) {
+ DetResult detResult = predictor.getDetResult();
+ Mat roiMat = visualize.draw(detResult, predictMat.clone());
+ Imgproc.cvtColor(roiMat, roiMat, Imgproc.COLOR_BGR2RGB);
+ Bitmap outputImage = Bitmap.createBitmap(roiMat.width(),roiMat.height(), Bitmap.Config.ARGB_8888);
+ org.opencv.android.Utils.matToBitmap(roiMat,outputImage);
+ if (outputImage != null) {
+ ivInputImage.setImageBitmap(outputImage);
+ }
+ } else if (configParser.getModelType().equalsIgnoreCase("classifier")) {
+ ClsResult clsResult = predictor.getClsResult();
+ if (configParser.getLabeList().size() > 0) {
+ String outputResult = "Top1: " + clsResult.getCategory() + " - " + String.format("%.3f", clsResult.getScore());
+ tvOutputResult.setText(outputResult);
+ tvOutputResult.scrollTo(0, 0);
+ }
+ }
+ }
+
+ public void onMatChanged(Mat mat) {
+ this.predictMat = mat.clone();
+ }
+
+ public void onImageChanged(Bitmap image) {
+ ivInputImage.setImageBitmap(image);
+ tvOutputResult.setText("");
+ tvInferenceTime.setText("Inference time: -- ms");
+ }
+
+ public void onSettingsClicked() {
+ startActivity(new Intent(MainActivity.this, SettingsActivity.class));
+ }
+
+ @Override
+ public boolean onCreateOptionsMenu(Menu menu) {
+ MenuInflater inflater = getMenuInflater();
+ inflater.inflate(R.menu.menu_action_options, menu);
+ return true;
+ }
+
+ @Override
+ public boolean onOptionsItemSelected(MenuItem item) {
+ switch (item.getItemId()) {
+ case android.R.id.home:
+ finish();
+ break;
+ case R.id.open_gallery:
+ if (requestAllPermissions()) {
+ openGallery();
+ }
+ break;
+ case R.id.take_photo:
+ if (requestAllPermissions()) {
+ takePhoto();
+ }
+ break;
+ case R.id.settings:
+ if (requestAllPermissions()) {
+ // make sure we have SDCard r&w permissions to load model from SDCard
+ onSettingsClicked();
+ }
+ break;
+ }
+ return super.onOptionsItemSelected(item);
+ }
+
+ @Override
+ public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions,
+ @NonNull int[] grantResults) {
+ super.onRequestPermissionsResult(requestCode, permissions, grantResults);
+ if (grantResults[0] != PackageManager.PERMISSION_GRANTED || grantResults[1] != PackageManager.PERMISSION_GRANTED) {
+ Toast.makeText(this, "Permission Denied", Toast.LENGTH_SHORT).show();
+ }
+ }
+
+ @Override
+ protected void onActivityResult(int requestCode, int resultCode, Intent data) {
+ super.onActivityResult(requestCode, resultCode, data);
+ if (resultCode == RESULT_OK && data != null) {
+ switch (requestCode) {
+ case OPEN_GALLERY_REQUEST_CODE:
+ try {
+ ContentResolver resolver = getContentResolver();
+ Uri uri = data.getData();
+ Bitmap image = MediaStore.Images.Media.getBitmap(resolver, uri);
+ String[] proj = {MediaStore.Images.Media.DATA};
+ Cursor cursor = managedQuery(uri, proj, null, null, null);
+ cursor.moveToFirst();
+ int columnIndex = cursor.getColumnIndex(proj[0]);
+ String imgDecodableString = cursor.getString(columnIndex);
+ File file = new File(imgDecodableString);
+ Mat mat = Imgcodecs.imread(file.getAbsolutePath(),Imgcodecs.IMREAD_COLOR);
+ onImageChanged(image);
+ onMatChanged(mat);
+ } catch (IOException e) {
+ Log.e(TAG, e.toString());
+ }
+ break;
+ case TAKE_PHOTO_REQUEST_CODE:
+ Bitmap image = (Bitmap) data.getParcelableExtra("data");
+ Mat mat = new Mat();
+ org.opencv.android.Utils.bitmapToMat(image, mat);
+ Imgproc.cvtColor(mat, mat, Imgproc.COLOR_RGBA2BGR);
+ onImageChanged(image);
+ onMatChanged(mat);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ private boolean requestAllPermissions() {
+ if (ContextCompat.checkSelfPermission(this, Manifest.permission.WRITE_EXTERNAL_STORAGE)
+ != PackageManager.PERMISSION_GRANTED || ContextCompat.checkSelfPermission(this,
+ Manifest.permission.CAMERA)
+ != PackageManager.PERMISSION_GRANTED) {
+ ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.WRITE_EXTERNAL_STORAGE,
+ Manifest.permission.CAMERA},
+ 0);
+ return false;
+ }
+ return true;
+ }
+
+ private void openGallery() {
+ Intent intent = new Intent(Intent.ACTION_PICK, null);
+ intent.setDataAndType(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, "image/*");
+ startActivityForResult(intent, OPEN_GALLERY_REQUEST_CODE);
+ }
+
+ private void takePhoto() {
+ Intent takePhotoIntent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE);
+ if (takePhotoIntent.resolveActivity(getPackageManager()) != null) {
+ startActivityForResult(takePhotoIntent, TAKE_PHOTO_REQUEST_CODE);
+ }
+ }
+
+ @Override
+ public boolean onPrepareOptionsMenu(Menu menu) {
+ boolean isLoaded = predictor.isLoaded();
+ menu.findItem(R.id.open_gallery).setEnabled(isLoaded);
+ menu.findItem(R.id.take_photo).setEnabled(isLoaded);
+ return super.onPrepareOptionsMenu(menu);
+ }
+
+ @Override
+ protected void onResume() {
+ Log.i(TAG, "begin onResume");
+ super.onResume();
+ SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
+
+ boolean settingsChanged = false;
+ boolean testImageChanged = false;
+ String modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
+ getString(R.string.MODEL_PATH_DEFAULT));
+ settingsChanged |= !modelPath.equalsIgnoreCase(testModelPathFromAsset);
+ String yamlPath = sharedPreferences.getString(getString(R.string.YAML_PATH_KEY),
+ getString(R.string.YAML_PATH_DEFAULT));
+ settingsChanged |= !yamlPath.equalsIgnoreCase(testYamlPathFromAsset);
+ int cpuThreadNum = Integer.parseInt(sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
+ getString(R.string.CPU_THREAD_NUM_DEFAULT)));
+ settingsChanged |= cpuThreadNum != configParser.getCpuThreadNum();
+ String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
+ getString(R.string.CPU_POWER_MODE_DEFAULT));
+ settingsChanged |= !cpuPowerMode.equalsIgnoreCase(configParser.getCpuPowerMode());
+ String imagePath = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
+ getString(R.string.IMAGE_PATH_DEFAULT));
+ testImageChanged |= !imagePath.equalsIgnoreCase(testImagePathFromAsset);
+
+ testYamlPathFromAsset = yamlPath;
+ testModelPathFromAsset = modelPath;
+ if (settingsChanged) {
+ try {
+ String realModelPath = modelPath;
+ if (!modelPath.substring(0, 1).equals("/")) {
+ String modelFileName = Utils.getFileNameFromString(modelPath);
+ realModelPath = this.getCacheDir() + File.separator + modelFileName;
+ Utils.copyFileFromAssets(this, modelPath, realModelPath);
+ }
+ String realYamlPath = yamlPath;
+ if (!yamlPath.substring(0, 1).equals("/")) {
+ String yamlFileName = Utils.getFileNameFromString(yamlPath);
+ realYamlPath = this.getCacheDir() + File.separator + yamlFileName;
+ Utils.copyFileFromAssets(this, yamlPath, realYamlPath);
+ }
+ configParser.init(realModelPath, realYamlPath, cpuThreadNum, cpuPowerMode);
+ visualize.init(configParser.getNumClasses());
+ } catch (IOException e) {
+ e.printStackTrace();
+ Toast.makeText(MainActivity.this, "Load config failed!", Toast.LENGTH_SHORT).show();
+ }
+ // update UI
+ tvInputSetting.setText("Model: " + configParser.getModel()+ "\n" + "CPU" +
+ " Thread Num: " + Integer.toString(configParser.getCpuThreadNum()) + "\n" + "CPU Power Mode: " + configParser.getCpuPowerMode());
+ tvInputSetting.scrollTo(0, 0);
+ // reload model if configure has been changed
+ loadModel();
+ }
+
+ if (testImageChanged){
+ loadTestImageFromAsset(imagePath);
+ }
+ }
+
+ public void loadTestImageFromAsset(String imagePath){
+ if (imagePath.isEmpty()) {
+ return;
+ }
+ // read test image file from custom file_paths if the first character of mode file_paths is '/', otherwise read test
+ // image file from assets
+ testImagePathFromAsset = imagePath;
+ if (!imagePath.substring(0, 1).equals("/")) {
+ InputStream imageStream = null;
+ try {
+ imageStream = getAssets().open(imagePath);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ onImageChanged(BitmapFactory.decodeStream(imageStream));
+ String realPath;
+ String imageFileName = Utils.getFileNameFromString(imagePath);
+ realPath = this.getCacheDir() + File.separator + imageFileName;
+ Utils.copyFileFromAssets(this, imagePath, realPath);
+ onMatChanged(Imgcodecs.imread(realPath, Imgcodecs.IMREAD_COLOR));
+ } else {
+ if (!new File(imagePath).exists()) {
+ return;
+ }
+ onMatChanged(Imgcodecs.imread(imagePath, Imgcodecs.IMREAD_COLOR));
+ onImageChanged( BitmapFactory.decodeFile(imagePath));
+ }
+ }
+
+ @Override
+ protected void onDestroy() {
+ if (predictor != null) {
+ predictor.releaseModel();
+ }
+ worker.quit();
+ super.onDestroy();
+ }
+}
\ No newline at end of file
diff --git a/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/SettingsActivity.java b/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/SettingsActivity.java
new file mode 100644
index 0000000000000000000000000000000000000000..271343ff5a626ba5d8a224dfe832738ae4ede123
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/java/com/baidu/paddlex/lite/demo/SettingsActivity.java
@@ -0,0 +1,158 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.lite.demo;
+
+import com.baidu.paddlex.Utils;
+
+import android.content.SharedPreferences;
+import android.os.Bundle;
+import android.preference.CheckBoxPreference;
+import android.preference.EditTextPreference;
+import android.preference.ListPreference;
+import android.support.v7.app.ActionBar;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class SettingsActivity extends AppCompatPreferenceActivity implements SharedPreferences.OnSharedPreferenceChangeListener {
+ ListPreference lpChoosePreInstalledModel = null;
+ CheckBoxPreference cbEnableCustomSettings = null;
+ EditTextPreference etModelPath = null;
+ EditTextPreference etYamlPath = null;
+ EditTextPreference etImagePath = null;
+ ListPreference lpCPUThreadNum = null;
+ ListPreference lpCPUPowerMode = null;
+
+ List preInstalledModelPaths = null;
+ List preInstalledYamlPaths = null;
+ List preInstalledImagePaths = null;
+ List preInstalledCPUThreadNums = null;
+ List preInstalledCPUPowerModes = null;
+
+ @Override
+ public void onCreate(Bundle savedInstanceState) {
+ super.onCreate(savedInstanceState);
+ addPreferencesFromResource(R.xml.settings);
+ ActionBar supportActionBar = getSupportActionBar();
+ if (supportActionBar != null) {
+ supportActionBar.setDisplayHomeAsUpEnabled(true);
+ }
+
+ // initialized pre-installed models
+ preInstalledModelPaths = new ArrayList();
+ preInstalledYamlPaths = new ArrayList();
+ preInstalledImagePaths = new ArrayList();
+ preInstalledCPUThreadNums = new ArrayList();
+ preInstalledCPUPowerModes = new ArrayList();
+ preInstalledModelPaths.add(getString(R.string.MODEL_PATH_DEFAULT));
+ preInstalledYamlPaths.add(getString(R.string.YAML_PATH_DEFAULT));
+ preInstalledImagePaths.add(getString(R.string.IMAGE_PATH_DEFAULT));
+ preInstalledCPUThreadNums.add(getString(R.string.CPU_THREAD_NUM_DEFAULT));
+ preInstalledCPUPowerModes.add(getString(R.string.CPU_POWER_MODE_DEFAULT));
+ // initialize UI components
+ lpChoosePreInstalledModel =
+ (ListPreference) findPreference(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY));
+ String[] preInstalledModelNames = new String[preInstalledModelPaths.size()];
+ for (int i = 0; i < preInstalledModelPaths.size(); i++) {
+ preInstalledModelNames[i] =
+ preInstalledModelPaths.get(i).substring(preInstalledModelPaths.get(i).lastIndexOf("/") + 1);
+ }
+ lpChoosePreInstalledModel.setEntries(preInstalledModelNames);
+ lpChoosePreInstalledModel.setEntryValues(preInstalledModelPaths.toArray(new String[preInstalledModelPaths.size()]));
+ cbEnableCustomSettings =
+ (CheckBoxPreference) findPreference(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY));
+ etModelPath = (EditTextPreference) findPreference(getString(R.string.MODEL_PATH_KEY));
+ etModelPath.setTitle("Model Path (SDCard: " + Utils.getSDCardDirectory() + ")");
+ etYamlPath = (EditTextPreference) findPreference(getString(R.string.YAML_PATH_KEY));
+ etImagePath = (EditTextPreference) findPreference(getString(R.string.IMAGE_PATH_KEY));
+ lpCPUThreadNum =
+ (ListPreference) findPreference(getString(R.string.CPU_THREAD_NUM_KEY));
+ lpCPUPowerMode =
+ (ListPreference) findPreference(getString(R.string.CPU_POWER_MODE_KEY));
+ }
+
+ private void reloadPreferenceAndUpdateUI() {
+ SharedPreferences sharedPreferences = getPreferenceScreen().getSharedPreferences();
+ boolean enableCustomSettings =
+ sharedPreferences.getBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
+ String modelPath = sharedPreferences.getString(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY),
+ getString(R.string.MODEL_PATH_DEFAULT));
+ int modelIdx = lpChoosePreInstalledModel.findIndexOfValue(modelPath);
+ if (modelIdx >= 0 && modelIdx < preInstalledModelPaths.size()) {
+ if (!enableCustomSettings) {
+ SharedPreferences.Editor editor = sharedPreferences.edit();
+ editor.putString(getString(R.string.MODEL_PATH_KEY), preInstalledModelPaths.get(modelIdx));
+ editor.putString(getString(R.string.YAML_PATH_KEY), preInstalledYamlPaths.get(modelIdx));
+ editor.putString(getString(R.string.IMAGE_PATH_KEY), preInstalledImagePaths.get(modelIdx));
+ editor.putString(getString(R.string.CPU_THREAD_NUM_KEY), preInstalledCPUThreadNums.get(modelIdx));
+ editor.putString(getString(R.string.CPU_POWER_MODE_KEY), preInstalledCPUPowerModes.get(modelIdx));
+ editor.commit();
+ }
+ lpChoosePreInstalledModel.setSummary(modelPath);
+ }
+
+ cbEnableCustomSettings.setChecked(enableCustomSettings);
+ etModelPath.setEnabled(enableCustomSettings);
+ etYamlPath.setEnabled(enableCustomSettings);
+ etImagePath.setEnabled(enableCustomSettings);
+ lpCPUThreadNum.setEnabled(enableCustomSettings);
+ lpCPUPowerMode.setEnabled(enableCustomSettings);
+ modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
+ getString(R.string.MODEL_PATH_DEFAULT));
+ String YamlPath = sharedPreferences.getString(getString(R.string.YAML_PATH_KEY),
+ getString(R.string.YAML_PATH_DEFAULT));
+ String imagePath = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY),
+ getString(R.string.IMAGE_PATH_DEFAULT));
+ String cpuThreadNum = sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
+ getString(R.string.CPU_THREAD_NUM_DEFAULT));
+ String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
+ getString(R.string.CPU_POWER_MODE_DEFAULT));
+
+ etModelPath.setSummary(modelPath);
+ etModelPath.setText(modelPath);
+ etYamlPath.setSummary(YamlPath);
+ etYamlPath.setText(YamlPath);
+ etImagePath.setSummary(imagePath);
+ etImagePath.setText(imagePath);
+ lpCPUThreadNum.setValue(cpuThreadNum);
+ lpCPUThreadNum.setSummary(cpuThreadNum);
+ lpCPUPowerMode.setValue(cpuPowerMode);
+ lpCPUPowerMode.setSummary(cpuPowerMode);
+
+ }
+
+ @Override
+ protected void onResume() {
+ super.onResume();
+ getPreferenceScreen().getSharedPreferences().registerOnSharedPreferenceChangeListener(this);
+ reloadPreferenceAndUpdateUI();
+ }
+
+ @Override
+ protected void onPause() {
+ super.onPause();
+ getPreferenceScreen().getSharedPreferences().unregisterOnSharedPreferenceChangeListener(this);
+ }
+
+ @Override
+ public void onSharedPreferenceChanged(SharedPreferences sharedPreferences, String key) {
+ if (key.equals(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY))) {
+ SharedPreferences.Editor editor = sharedPreferences.edit();
+ editor.putBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
+ editor.commit();
+ }
+ reloadPreferenceAndUpdateUI();
+ }
+}
diff --git a/deploy/lite/android/demo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/deploy/lite/android/demo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
new file mode 100644
index 0000000000000000000000000000000000000000..1f6bb290603d7caa16c5fb6f61bbfdc750622f5c
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/deploy/lite/android/demo/app/src/main/res/drawable/face.jpg b/deploy/lite/android/demo/app/src/main/res/drawable/face.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8963ae3db05894cd4bf3ea17957297363db73171
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/drawable/face.jpg differ
diff --git a/deploy/lite/android/demo/app/src/main/res/drawable/ic_launcher_background.xml b/deploy/lite/android/demo/app/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 0000000000000000000000000000000000000000..0d025f9bf6b67c63044a36a9ff44fbc69e5c5822
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/deploy/lite/android/demo/app/src/main/res/layout/activity_main.xml b/deploy/lite/android/demo/app/src/main/res/layout/activity_main.xml
new file mode 100644
index 0000000000000000000000000000000000000000..97c79f86dbedee3b71ef4b787b05352f70a428fd
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/layout/activity_main.xml
@@ -0,0 +1,112 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ />
+
+
+
+
+
+
\ No newline at end of file
diff --git a/deploy/lite/android/demo/app/src/main/res/menu/menu_action_options.xml b/deploy/lite/android/demo/app/src/main/res/menu/menu_action_options.xml
new file mode 100644
index 0000000000000000000000000000000000000000..34757f7d68cfae3b45cade0900dc507d205a018e
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/menu/menu_action_options.xml
@@ -0,0 +1,21 @@
+
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/deploy/lite/android/demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 0000000000000000000000000000000000000000..eca70cfe52eac1ba66ba280a68ca7be8fcf88a16
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/deploy/lite/android/demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 0000000000000000000000000000000000000000..eca70cfe52eac1ba66ba280a68ca7be8fcf88a16
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-hdpi/ic_launcher.png b/deploy/lite/android/demo/app/src/main/res/mipmap-hdpi/ic_launcher.png
new file mode 100644
index 0000000000000000000000000000000000000000..898f3ed59ac9f3248734a00e5902736c9367d455
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-hdpi/ic_launcher.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-hdpi/ic_launcher_round.png b/deploy/lite/android/demo/app/src/main/res/mipmap-hdpi/ic_launcher_round.png
new file mode 100644
index 0000000000000000000000000000000000000000..dffca3601eba7bf5f409bdd520820e2eb5122c75
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-hdpi/ic_launcher_round.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-mdpi/ic_launcher.png b/deploy/lite/android/demo/app/src/main/res/mipmap-mdpi/ic_launcher.png
new file mode 100644
index 0000000000000000000000000000000000000000..64ba76f75e9ce021aa3d95c213491f73bcacb597
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-mdpi/ic_launcher.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-mdpi/ic_launcher_round.png b/deploy/lite/android/demo/app/src/main/res/mipmap-mdpi/ic_launcher_round.png
new file mode 100644
index 0000000000000000000000000000000000000000..dae5e082342fcdeee5db8a6e0b27028e2d2808f5
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-mdpi/ic_launcher_round.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-xhdpi/ic_launcher.png b/deploy/lite/android/demo/app/src/main/res/mipmap-xhdpi/ic_launcher.png
new file mode 100644
index 0000000000000000000000000000000000000000..e5ed46597ea8447d91ab1786a34e30f1c26b18bd
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-xhdpi/ic_launcher.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png b/deploy/lite/android/demo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png
new file mode 100644
index 0000000000000000000000000000000000000000..14ed0af35023e4f1901cf03487b6c524257b8483
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-xxhdpi/ic_launcher.png b/deploy/lite/android/demo/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
new file mode 100644
index 0000000000000000000000000000000000000000..b0907cac3bfd8fbfdc46e1108247f0a1055387ec
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-xxhdpi/ic_launcher.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png b/deploy/lite/android/demo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
new file mode 100644
index 0000000000000000000000000000000000000000..d8ae03154975f397f8ed1b84f2d4bf9783ecfa26
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/deploy/lite/android/demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
new file mode 100644
index 0000000000000000000000000000000000000000..2c18de9e66108411737e910f5c1972476f03ddbf
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png b/deploy/lite/android/demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
new file mode 100644
index 0000000000000000000000000000000000000000..beed3cdd2c32af5114a7dc70b9ef5b698eb8797e
Binary files /dev/null and b/deploy/lite/android/demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png differ
diff --git a/deploy/lite/android/demo/app/src/main/res/values/arrays.xml b/deploy/lite/android/demo/app/src/main/res/values/arrays.xml
new file mode 100644
index 0000000000000000000000000000000000000000..8e08ad57ddaca4bb0cff1a3d61ec84b0442b1b0e
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/values/arrays.xml
@@ -0,0 +1,39 @@
+
+
+
+ - 1 threads
+ - 2 threads
+ - 4 threads
+ - 8 threads
+
+
+ - 1
+ - 2
+ - 4
+ - 8
+
+
+ - HIGH(only big cores)
+ - LOW(only LITTLE cores)
+ - FULL(all cores)
+ - NO_BIND(depends on system)
+ - RAND_HIGH
+ - RAND_LOW
+
+
+ - LITE_POWER_HIGH
+ - LITE_POWER_LOW
+ - LITE_POWER_FULL
+ - LITE_POWER_NO_BIND
+ - LITE_POWER_RAND_HIGH
+ - LITE_POWER_RAND_LOW
+
+
+ - BGR color format
+ - RGB color format
+
+
+ - BGR
+ - RGB
+
+
\ No newline at end of file
diff --git a/deploy/lite/android/demo/app/src/main/res/values/colors.xml b/deploy/lite/android/demo/app/src/main/res/values/colors.xml
new file mode 100644
index 0000000000000000000000000000000000000000..69b22338c6510250df3b43672635120dbce2fa49
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/values/colors.xml
@@ -0,0 +1,6 @@
+
+
+ #008577
+ #00574B
+ #D81B60
+
diff --git a/deploy/lite/android/demo/app/src/main/res/values/strings.xml b/deploy/lite/android/demo/app/src/main/res/values/strings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..1bd60b4a28cf4d54f5544a9ed9027d32faa574d0
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/values/strings.xml
@@ -0,0 +1,16 @@
+
+PaddleX Demo
+
+CHOOSE_PRE_INSTALLED_MODEL_KEY
+ENABLE_CUSTOM_SETTINGS_KEY
+MODEL_PATH_KEY
+YAML_PATH_KEY
+IMAGE_PATH_KEY
+CPU_POWER_MODE_KEY
+CPU_THREAD_NUM_KEY
+model/model.nb
+config/model.yml
+images/test.jpg
+1
+LITE_POWER_HIGH
+
\ No newline at end of file
diff --git a/deploy/lite/android/demo/app/src/main/res/values/styles.xml b/deploy/lite/android/demo/app/src/main/res/values/styles.xml
new file mode 100644
index 0000000000000000000000000000000000000000..5203f74f64d35d46d4451a4baa9350cf4f7770e8
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/values/styles.xml
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/deploy/lite/android/demo/app/src/main/res/xml/settings.xml b/deploy/lite/android/demo/app/src/main/res/xml/settings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..b26fdc1f8c2014485dad515f3604a48b79bbf6d1
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/main/res/xml/settings.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/deploy/lite/android/demo/app/src/test/java/com/baidu/paddlex/lite/demo/ExampleUnitTest.java b/deploy/lite/android/demo/app/src/test/java/com/baidu/paddlex/lite/demo/ExampleUnitTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..433c52cc67c4bceca7821441944e71e2bdd08503
--- /dev/null
+++ b/deploy/lite/android/demo/app/src/test/java/com/baidu/paddlex/lite/demo/ExampleUnitTest.java
@@ -0,0 +1,17 @@
+package com.baidu.paddlex.lite.demo;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Example local unit test, which will execute on the development machine (host).
+ *
+ * @see Testing documentation
+ */
+public class ExampleUnitTest {
+ @Test
+ public void addition_isCorrect() {
+ assertEquals(4, 2 + 2);
+ }
+}
\ No newline at end of file
diff --git a/deploy/lite/android/demo/build.gradle b/deploy/lite/android/demo/build.gradle
new file mode 100644
index 0000000000000000000000000000000000000000..fafc1b970be053f8a9ec61f55b94cb2e85b26a33
--- /dev/null
+++ b/deploy/lite/android/demo/build.gradle
@@ -0,0 +1,27 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+
+buildscript {
+ repositories {
+ google()
+ jcenter()
+
+ }
+ dependencies {
+ classpath 'com.android.tools.build:gradle:3.4.0'
+
+ // NOTE: Do not place your application dependencies here; they belong
+ // in the individual module build.gradle files
+ }
+}
+
+allprojects {
+ repositories {
+ google()
+ jcenter()
+
+ }
+}
+
+task clean(type: Delete) {
+ delete rootProject.buildDir
+}
diff --git a/deploy/lite/android/demo/gradle.properties b/deploy/lite/android/demo/gradle.properties
new file mode 100644
index 0000000000000000000000000000000000000000..82618cecb4d1cf137df18eca8dbe88e1b3b2c2b8
--- /dev/null
+++ b/deploy/lite/android/demo/gradle.properties
@@ -0,0 +1,15 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx1536m
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. More details, visit
+# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
+# org.gradle.parallel=true
+
+
diff --git a/deploy/lite/android/demo/gradle/wrapper/gradle-wrapper.jar b/deploy/lite/android/demo/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f6b961fd5a86aa5fbfe90f707c3138408be7c718
Binary files /dev/null and b/deploy/lite/android/demo/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/deploy/lite/android/demo/gradle/wrapper/gradle-wrapper.properties b/deploy/lite/android/demo/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000000000000000000000000000000000..578b5482ad45045124272fa3e54d065a77c2eea2
--- /dev/null
+++ b/deploy/lite/android/demo/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Thu Aug 22 15:05:37 CST 2019
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-5.1.1-all.zip
diff --git a/deploy/lite/android/demo/gradlew b/deploy/lite/android/demo/gradlew
new file mode 100644
index 0000000000000000000000000000000000000000..e69ae6eca7aa6d7565cb7f9621ee12a224e47081
--- /dev/null
+++ b/deploy/lite/android/demo/gradlew
@@ -0,0 +1,172 @@
+#!/usr/bin/env sh
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=$((i+1))
+ done
+ case $i in
+ (0) set -- ;;
+ (1) set -- "$args0" ;;
+ (2) set -- "$args0" "$args1" ;;
+ (3) set -- "$args0" "$args1" "$args2" ;;
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=$(save "$@")
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
+if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
+ cd "$(dirname "$0")"
+fi
+
+exec "$JAVACMD" "$@"
diff --git a/deploy/lite/android/demo/gradlew.bat b/deploy/lite/android/demo/gradlew.bat
new file mode 100644
index 0000000000000000000000000000000000000000..f9553162f122c71b34635112e717c3e733b5b212
--- /dev/null
+++ b/deploy/lite/android/demo/gradlew.bat
@@ -0,0 +1,84 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windows variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/deploy/lite/android/demo/import-summary.txt b/deploy/lite/android/demo/import-summary.txt
new file mode 100644
index 0000000000000000000000000000000000000000..840e9d2aa7ddc8f33df8d513de711f48d199b51a
--- /dev/null
+++ b/deploy/lite/android/demo/import-summary.txt
@@ -0,0 +1,245 @@
+ECLIPSE ANDROID PROJECT IMPORT SUMMARY
+======================================
+
+Ignored Files:
+--------------
+The following files were *not* copied into the new Gradle project; you
+should evaluate whether these are still needed in your project and if
+so manually move them:
+
+* javadoc/
+* javadoc/allclasses-frame.html
+* javadoc/allclasses-noframe.html
+* javadoc/constant-values.html
+* javadoc/help-doc.html
+* javadoc/index-all.html
+* javadoc/index.html
+* javadoc/org/
+* javadoc/org/opencv/
+* javadoc/org/opencv/android/
+* javadoc/org/opencv/android/BaseLoaderCallback.html
+* javadoc/org/opencv/android/Camera2Renderer.html
+* javadoc/org/opencv/android/CameraBridgeViewBase.CvCameraViewFrame.html
+* javadoc/org/opencv/android/CameraBridgeViewBase.CvCameraViewListener.html
+* javadoc/org/opencv/android/CameraBridgeViewBase.CvCameraViewListener2.html
+* javadoc/org/opencv/android/CameraBridgeViewBase.ListItemAccessor.html
+* javadoc/org/opencv/android/CameraBridgeViewBase.html
+* javadoc/org/opencv/android/CameraGLRendererBase.html
+* javadoc/org/opencv/android/CameraGLSurfaceView.CameraTextureListener.html
+* javadoc/org/opencv/android/CameraGLSurfaceView.html
+* javadoc/org/opencv/android/CameraRenderer.html
+* javadoc/org/opencv/android/FpsMeter.html
+* javadoc/org/opencv/android/InstallCallbackInterface.html
+* javadoc/org/opencv/android/JavaCamera2View.html
+* javadoc/org/opencv/android/JavaCameraView.JavaCameraSizeAccessor.html
+* javadoc/org/opencv/android/JavaCameraView.html
+* javadoc/org/opencv/android/LoaderCallbackInterface.html
+* javadoc/org/opencv/android/OpenCVLoader.html
+* javadoc/org/opencv/android/Utils.html
+* javadoc/org/opencv/android/package-frame.html
+* javadoc/org/opencv/android/package-summary.html
+* javadoc/org/opencv/android/package-tree.html
+* javadoc/org/opencv/calib3d/
+* javadoc/org/opencv/calib3d/Calib3d.html
+* javadoc/org/opencv/calib3d/StereoBM.html
+* javadoc/org/opencv/calib3d/StereoMatcher.html
+* javadoc/org/opencv/calib3d/StereoSGBM.html
+* javadoc/org/opencv/calib3d/package-frame.html
+* javadoc/org/opencv/calib3d/package-summary.html
+* javadoc/org/opencv/calib3d/package-tree.html
+* javadoc/org/opencv/core/
+* javadoc/org/opencv/core/Algorithm.html
+* javadoc/org/opencv/core/Core.MinMaxLocResult.html
+* javadoc/org/opencv/core/Core.html
+* javadoc/org/opencv/core/CvException.html
+* javadoc/org/opencv/core/CvType.html
+* javadoc/org/opencv/core/DMatch.html
+* javadoc/org/opencv/core/KeyPoint.html
+* javadoc/org/opencv/core/Mat.html
+* javadoc/org/opencv/core/MatOfByte.html
+* javadoc/org/opencv/core/MatOfDMatch.html
+* javadoc/org/opencv/core/MatOfDouble.html
+* javadoc/org/opencv/core/MatOfFloat.html
+* javadoc/org/opencv/core/MatOfFloat4.html
+* javadoc/org/opencv/core/MatOfFloat6.html
+* javadoc/org/opencv/core/MatOfInt.html
+* javadoc/org/opencv/core/MatOfInt4.html
+* javadoc/org/opencv/core/MatOfKeyPoint.html
+* javadoc/org/opencv/core/MatOfPoint.html
+* javadoc/org/opencv/core/MatOfPoint2f.html
+* javadoc/org/opencv/core/MatOfPoint3.html
+* javadoc/org/opencv/core/MatOfPoint3f.html
+* javadoc/org/opencv/core/MatOfRect.html
+* javadoc/org/opencv/core/MatOfRect2d.html
+* javadoc/org/opencv/core/MatOfRotatedRect.html
+* javadoc/org/opencv/core/Point.html
+* javadoc/org/opencv/core/Point3.html
+* javadoc/org/opencv/core/Range.html
+* javadoc/org/opencv/core/Rect.html
+* javadoc/org/opencv/core/Rect2d.html
+* javadoc/org/opencv/core/RotatedRect.html
+* javadoc/org/opencv/core/Scalar.html
+* javadoc/org/opencv/core/Size.html
+* javadoc/org/opencv/core/TermCriteria.html
+* javadoc/org/opencv/core/TickMeter.html
+* javadoc/org/opencv/core/package-frame.html
+* javadoc/org/opencv/core/package-summary.html
+* javadoc/org/opencv/core/package-tree.html
+* javadoc/org/opencv/dnn/
+* javadoc/org/opencv/dnn/DictValue.html
+* javadoc/org/opencv/dnn/Dnn.html
+* javadoc/org/opencv/dnn/Layer.html
+* javadoc/org/opencv/dnn/Net.html
+* javadoc/org/opencv/dnn/package-frame.html
+* javadoc/org/opencv/dnn/package-summary.html
+* javadoc/org/opencv/dnn/package-tree.html
+* javadoc/org/opencv/features2d/
+* javadoc/org/opencv/features2d/AKAZE.html
+* javadoc/org/opencv/features2d/AgastFeatureDetector.html
+* javadoc/org/opencv/features2d/BFMatcher.html
+* javadoc/org/opencv/features2d/BOWImgDescriptorExtractor.html
+* javadoc/org/opencv/features2d/BOWKMeansTrainer.html
+* javadoc/org/opencv/features2d/BOWTrainer.html
+* javadoc/org/opencv/features2d/BRISK.html
+* javadoc/org/opencv/features2d/DescriptorMatcher.html
+* javadoc/org/opencv/features2d/FastFeatureDetector.html
+* javadoc/org/opencv/features2d/Feature2D.html
+* javadoc/org/opencv/features2d/Features2d.html
+* javadoc/org/opencv/features2d/FlannBasedMatcher.html
+* javadoc/org/opencv/features2d/GFTTDetector.html
+* javadoc/org/opencv/features2d/KAZE.html
+* javadoc/org/opencv/features2d/MSER.html
+* javadoc/org/opencv/features2d/ORB.html
+* javadoc/org/opencv/features2d/Params.html
+* javadoc/org/opencv/features2d/package-frame.html
+* javadoc/org/opencv/features2d/package-summary.html
+* javadoc/org/opencv/features2d/package-tree.html
+* javadoc/org/opencv/imgcodecs/
+* javadoc/org/opencv/imgcodecs/Imgcodecs.html
+* javadoc/org/opencv/imgcodecs/package-frame.html
+* javadoc/org/opencv/imgcodecs/package-summary.html
+* javadoc/org/opencv/imgcodecs/package-tree.html
+* javadoc/org/opencv/imgproc/
+* javadoc/org/opencv/imgproc/CLAHE.html
+* javadoc/org/opencv/imgproc/Imgproc.html
+* javadoc/org/opencv/imgproc/LineSegmentDetector.html
+* javadoc/org/opencv/imgproc/Moments.html
+* javadoc/org/opencv/imgproc/Subdiv2D.html
+* javadoc/org/opencv/imgproc/package-frame.html
+* javadoc/org/opencv/imgproc/package-summary.html
+* javadoc/org/opencv/imgproc/package-tree.html
+* javadoc/org/opencv/ml/
+* javadoc/org/opencv/ml/ANN_MLP.html
+* javadoc/org/opencv/ml/ANN_MLP_ANNEAL.html
+* javadoc/org/opencv/ml/Boost.html
+* javadoc/org/opencv/ml/DTrees.html
+* javadoc/org/opencv/ml/EM.html
+* javadoc/org/opencv/ml/KNearest.html
+* javadoc/org/opencv/ml/LogisticRegression.html
+* javadoc/org/opencv/ml/Ml.html
+* javadoc/org/opencv/ml/NormalBayesClassifier.html
+* javadoc/org/opencv/ml/ParamGrid.html
+* javadoc/org/opencv/ml/RTrees.html
+* javadoc/org/opencv/ml/SVM.html
+* javadoc/org/opencv/ml/SVMSGD.html
+* javadoc/org/opencv/ml/StatModel.html
+* javadoc/org/opencv/ml/TrainData.html
+* javadoc/org/opencv/ml/package-frame.html
+* javadoc/org/opencv/ml/package-summary.html
+* javadoc/org/opencv/ml/package-tree.html
+* javadoc/org/opencv/objdetect/
+* javadoc/org/opencv/objdetect/BaseCascadeClassifier.html
+* javadoc/org/opencv/objdetect/CascadeClassifier.html
+* javadoc/org/opencv/objdetect/HOGDescriptor.html
+* javadoc/org/opencv/objdetect/Objdetect.html
+* javadoc/org/opencv/objdetect/QRCodeDetector.html
+* javadoc/org/opencv/objdetect/package-frame.html
+* javadoc/org/opencv/objdetect/package-summary.html
+* javadoc/org/opencv/objdetect/package-tree.html
+* javadoc/org/opencv/osgi/
+* javadoc/org/opencv/osgi/OpenCVInterface.html
+* javadoc/org/opencv/osgi/OpenCVNativeLoader.html
+* javadoc/org/opencv/osgi/package-frame.html
+* javadoc/org/opencv/osgi/package-summary.html
+* javadoc/org/opencv/osgi/package-tree.html
+* javadoc/org/opencv/photo/
+* javadoc/org/opencv/photo/AlignExposures.html
+* javadoc/org/opencv/photo/AlignMTB.html
+* javadoc/org/opencv/photo/CalibrateCRF.html
+* javadoc/org/opencv/photo/CalibrateDebevec.html
+* javadoc/org/opencv/photo/CalibrateRobertson.html
+* javadoc/org/opencv/photo/MergeDebevec.html
+* javadoc/org/opencv/photo/MergeExposures.html
+* javadoc/org/opencv/photo/MergeMertens.html
+* javadoc/org/opencv/photo/MergeRobertson.html
+* javadoc/org/opencv/photo/Photo.html
+* javadoc/org/opencv/photo/Tonemap.html
+* javadoc/org/opencv/photo/TonemapDrago.html
+* javadoc/org/opencv/photo/TonemapMantiuk.html
+* javadoc/org/opencv/photo/TonemapReinhard.html
+* javadoc/org/opencv/photo/package-frame.html
+* javadoc/org/opencv/photo/package-summary.html
+* javadoc/org/opencv/photo/package-tree.html
+* javadoc/org/opencv/utils/
+* javadoc/org/opencv/utils/Converters.html
+* javadoc/org/opencv/utils/package-frame.html
+* javadoc/org/opencv/utils/package-summary.html
+* javadoc/org/opencv/utils/package-tree.html
+* javadoc/org/opencv/video/
+* javadoc/org/opencv/video/BackgroundSubtractor.html
+* javadoc/org/opencv/video/BackgroundSubtractorKNN.html
+* javadoc/org/opencv/video/BackgroundSubtractorMOG2.html
+* javadoc/org/opencv/video/DenseOpticalFlow.html
+* javadoc/org/opencv/video/DualTVL1OpticalFlow.html
+* javadoc/org/opencv/video/FarnebackOpticalFlow.html
+* javadoc/org/opencv/video/KalmanFilter.html
+* javadoc/org/opencv/video/SparseOpticalFlow.html
+* javadoc/org/opencv/video/SparsePyrLKOpticalFlow.html
+* javadoc/org/opencv/video/Video.html
+* javadoc/org/opencv/video/package-frame.html
+* javadoc/org/opencv/video/package-summary.html
+* javadoc/org/opencv/video/package-tree.html
+* javadoc/org/opencv/videoio/
+* javadoc/org/opencv/videoio/VideoCapture.html
+* javadoc/org/opencv/videoio/VideoWriter.html
+* javadoc/org/opencv/videoio/Videoio.html
+* javadoc/org/opencv/videoio/package-frame.html
+* javadoc/org/opencv/videoio/package-summary.html
+* javadoc/org/opencv/videoio/package-tree.html
+* javadoc/overview-frame.html
+* javadoc/overview-summary.html
+* javadoc/overview-tree.html
+* javadoc/package-list
+* javadoc/resources/
+* javadoc/resources/background.gif
+* javadoc/resources/tab.gif
+* javadoc/resources/titlebar.gif
+* javadoc/resources/titlebar_end.gif
+* javadoc/serialized-form.html
+* javadoc/stylesheet.css
+
+Moved Files:
+------------
+Android Gradle projects use a different directory structure than ADT
+Eclipse projects. Here's how the projects were restructured:
+
+* AndroidManifest.xml => openCVLibrary346/src/main/AndroidManifest.xml
+* lint.xml => openCVLibrary346/lint.xml
+* res/ => openCVLibrary346/src/main/res/
+* src/ => openCVLibrary346/src/main/java/
+* src/org/opencv/engine/OpenCVEngineInterface.aidl => openCVLibrary346/src/main/aidl/org/opencv/engine/OpenCVEngineInterface.aidl
+
+Next Steps:
+-----------
+You can now build the project. The Gradle project needs network
+connectivity to download dependencies.
+
+Bugs:
+-----
+If for some reason your project does not build, and you determine that
+it is due to a bug or limitation of the Eclipse to Gradle importer,
+please file a bug at http://b.android.com with category
+Component-Tools.
+
+(This import summary is for your information only, and can be deleted
+after import once you are satisfied with the results.)
diff --git a/deploy/lite/android/demo/settings.gradle b/deploy/lite/android/demo/settings.gradle
new file mode 100644
index 0000000000000000000000000000000000000000..9d495b34f861c6ed05009b95cf15aaf24f76ebc0
--- /dev/null
+++ b/deploy/lite/android/demo/settings.gradle
@@ -0,0 +1 @@
+include ':app'
\ No newline at end of file
diff --git a/deploy/lite/android/sdk/.gitignore b/deploy/lite/android/sdk/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..796b96d1c402326528b4ba3c12ee9d92d0e212e9
--- /dev/null
+++ b/deploy/lite/android/sdk/.gitignore
@@ -0,0 +1 @@
+/build
diff --git a/deploy/lite/android/sdk/build.gradle b/deploy/lite/android/sdk/build.gradle
new file mode 100644
index 0000000000000000000000000000000000000000..11acc92c4d8c1154901c477128ea5c0f58701de2
--- /dev/null
+++ b/deploy/lite/android/sdk/build.gradle
@@ -0,0 +1,163 @@
+import java.security.MessageDigest
+
+apply plugin: 'com.android.library'
+
+android {
+ compileSdkVersion 28
+ buildToolsVersion "29.0.2"
+ defaultConfig {
+ minSdkVersion 15
+ targetSdkVersion 28
+ versionCode 1
+ versionName "1.0"
+
+ testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+ consumerProguardFiles 'consumer-rules.pro'
+ }
+ buildTypes {
+ release {
+ minifyEnabled false
+ proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+ }
+ }
+}
+
+dependencies {
+ implementation fileTree(dir: 'libs', include: ['*.jar','*.aar'])
+ implementation 'com.android.support:appcompat-v7:28.0.0'
+ implementation 'com.android.support.constraint:constraint-layout:1.1.3'
+ implementation 'com.android.support:design:28.0.0'
+ testImplementation 'junit:junit:4.12'
+ androidTestImplementation 'androidx.test.ext:junit:1.1.1'
+ androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0'
+}
+
+
+def paddleLiteLibs = 'https://bj.bcebos.com/paddlex/deploy/lite/paddle_lite_version_11cbd50e.tar.gz'
+task downloadAndExtractPaddleLiteLibs(type: DefaultTask) {
+ doFirst {
+ println "Downloading and extracting Paddle Lite libs"
+ }
+ doLast {
+ // Prepare cache folder for libs
+ if (!file("cache").exists()) {
+ mkdir "cache"
+ }
+ // Generate cache name for libs
+ MessageDigest messageDigest = MessageDigest.getInstance('MD5')
+ messageDigest.update(paddleLiteLibs.bytes)
+ String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
+ // Download libs
+ if (!file("cache/${cacheName}.tar.gz").exists()) {
+ ant.get(src: paddleLiteLibs, dest: file("cache/${cacheName}.tar.gz"))
+ }
+ // Unpack libs
+ copy {
+ from tarTree("cache/${cacheName}.tar.gz")
+ into "cache/${cacheName}"
+ }
+ // Copy PaddlePredictor.jar
+ if (!file("libs/PaddlePredictor.jar").exists()) {
+ copy {
+ from "cache/${cacheName}/PaddlePredictor.jar"
+ into "libs"
+ }
+ }
+ // Copy libpaddle_lite_jni.so for armeabi-v7a and arm64-v8a
+ if (!file("src/main/jniLibs/armeabi-v7a/libpaddle_lite_jni.so").exists()) {
+ copy {
+ from "cache/${cacheName}/libs/armeabi-v7a/"
+ into "src/main/jniLibs/armeabi-v7a"
+ }
+ }
+ if (!file("src/main/jniLibs/arm64-v8a/libpaddle_lite_jni.so").exists()) {
+ copy {
+ from "cache/${cacheName}/libs/arm64-v8a/"
+ into "src/main/jniLibs/arm64-v8a"
+ }
+ }
+ }
+}
+preBuild.dependsOn downloadAndExtractPaddleLiteLibs
+
+def snakeYamlLibs = 'https://bj.bcebos.com/paddlex/deploy/lite/snakeyaml-1.18-android.tar.gz'
+task downloadAndExtractSnakeYamlLibs(type: DefaultTask) {
+ doFirst {
+ println "Downloading and extracting snake yaml sdk"
+ }
+ doLast {
+ // Prepare cache folder for sdk
+ if (!file("cache").exists()) {
+ mkdir "cache"
+ }
+ // Generate cache name for sdk
+ MessageDigest messageDigest = MessageDigest.getInstance('MD5')
+ messageDigest.update(snakeYamlLibs.bytes)
+ String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
+ // Download libs
+ if (!file("cache/${cacheName}.tar.gz").exists()) {
+ ant.get(src: snakeYamlLibs, dest: file("cache/${cacheName}.tar.gz"))
+ }
+ // Unpack libs
+ copy {
+ from tarTree("cache/${cacheName}.tar.gz")
+ into "cache/${cacheName}"
+ }
+ // Copy .jar
+ if (!file("libs/snakeyaml-1.18-android.jar").exists()) {
+ copy {
+ from "cache/${cacheName}/snakeyaml-1.18-android.jar"
+ into "libs"
+ }
+ }
+ }
+}
+preBuild.dependsOn downloadAndExtractSnakeYamlLibs
+
+def opencvLibs = 'https://bj.bcebos.com/paddlex/deploy/lite/opencv-3.4.6-android.tar.gz'
+task downloadAndExtractOpencvLibs(type: DefaultTask) {
+ doFirst {
+ println "Downloading and extracting opencv sdk"
+ }
+ doLast {
+ // Prepare cache folder for sdk
+ if (!file("cache").exists()) {
+ mkdir "cache"
+ }
+ // Generate cache name for sdk
+ MessageDigest messageDigest = MessageDigest.getInstance('MD5')
+ messageDigest.update(opencvLibs.bytes)
+ String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
+ // Download libs
+ if (!file("cache/${cacheName}.tar.gz").exists()) {
+ ant.get(src: opencvLibs, dest: file("cache/${cacheName}.tar.gz"))
+ }
+ // Unpack libs
+ copy {
+ from tarTree("cache/${cacheName}.tar.gz")
+ into "cache/${cacheName}"
+ }
+ // Copy .jar
+ if (!file("libs/opencv346.jar").exists()) {
+ copy {
+ from "cache/${cacheName}/opencv346.jar"
+ into "libs"
+ }
+ }
+ // Copy .so for armeabi-v7a and arm64-v8a
+ if (!file("src/main/jniLibs/armeabi-v7a/libopencv_java3.so").exists()) {
+ copy {
+ from "cache/${cacheName}/libs/armeabi-v7a/"
+ into "src/main/jniLibs/armeabi-v7a"
+ }
+ }
+ if (!file("src/main/jniLibs/arm64-v8a/libopencv_java3.so").exists()) {
+ copy {
+ from "cache/${cacheName}/libs/arm64-v8a/"
+ into "src/main/jniLibs/arm64-v8a"
+ }
+ }
+ }
+}
+
+preBuild.dependsOn downloadAndExtractOpencvLibs
diff --git a/deploy/lite/android/sdk/consumer-rules.pro b/deploy/lite/android/sdk/consumer-rules.pro
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/deploy/lite/android/sdk/local.properties b/deploy/lite/android/sdk/local.properties
new file mode 100644
index 0000000000000000000000000000000000000000..5d4255d3a02904590fc7ed6606d4201444a6cc54
--- /dev/null
+++ b/deploy/lite/android/sdk/local.properties
@@ -0,0 +1,7 @@
+## This file must *NOT* be checked into Version Control Systems,
+# as it contains information specific to your local configuration.
+#
+# Location of the SDK. This is only used by Gradle.
+# For customization when using a Version Control System, please read the
+# header note.
+#Tue Jun 16 10:08:04 CST 2020
diff --git a/deploy/lite/android/sdk/proguard-rules.pro b/deploy/lite/android/sdk/proguard-rules.pro
new file mode 100644
index 0000000000000000000000000000000000000000..f1b424510da51fd82143bc74a0a801ae5a1e2fcd
--- /dev/null
+++ b/deploy/lite/android/sdk/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+# http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+# public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
diff --git a/deploy/lite/android/sdk/src/androidTest/java/com/example/paddlex/ExampleInstrumentedTest.java b/deploy/lite/android/sdk/src/androidTest/java/com/example/paddlex/ExampleInstrumentedTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..2a957581b8923cd297821059a8a265b9db3e9627
--- /dev/null
+++ b/deploy/lite/android/sdk/src/androidTest/java/com/example/paddlex/ExampleInstrumentedTest.java
@@ -0,0 +1,36 @@
+package com.example.paddlex;
+
+import android.content.Context;
+import android.content.res.AssetManager;
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+
+
+import androidx.test.platform.app.InstrumentationRegistry;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+
+
+import com.baidu.paddlex.config.ConfigParser;
+
+import org.json.JSONException;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * @see Testing documentation
+ */
+@RunWith(AndroidJUnit4.class)
+public class ExampleInstrumentedTest {
+ @Test
+ public void useAppContext() throws IOException, JSONException {
+ // Context of the app under test.
+ Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
+ AssetManager ass = appContext.getAssets();
+ }
+}
diff --git a/deploy/lite/android/sdk/src/main/AndroidManifest.xml b/deploy/lite/android/sdk/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000000000000000000000000000000000..252453d686d91aca4dfd05e407da03839615a5be
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/AndroidManifest.xml
@@ -0,0 +1 @@
+
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/Predictor.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/Predictor.java
new file mode 100644
index 0000000000000000000000000000000000000000..6d154492d26637656cd42aa41f58bb798e0675e6
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/Predictor.java
@@ -0,0 +1,409 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex;
+import android.util.Log;
+import com.baidu.paddle.lite.MobileConfig;
+import com.baidu.paddle.lite.PaddlePredictor;
+import com.baidu.paddle.lite.PowerMode;
+import com.baidu.paddle.lite.Tensor;
+import com.baidu.paddlex.config.ConfigParser;
+import com.baidu.paddlex.postprocess.ClsResult;
+import com.baidu.paddlex.postprocess.DetResult;
+import com.baidu.paddlex.postprocess.Result;
+import com.baidu.paddlex.postprocess.SegResult;
+import com.baidu.paddlex.preprocess.ImageBlob;
+import com.baidu.paddlex.preprocess.Transforms;
+import java.util.Date;
+import org.opencv.core.Mat;
+
+public class Predictor {
+ private static final String TAG = Predictor.class.getSimpleName();
+ protected boolean isLoaded = false;
+ protected int warmupIterNum = 0;
+ protected int inferIterNum = 1;
+ protected int cpuThreadNum = 1;
+ protected String cpuPowerMode = "LITE_POWER_HIGH";
+ protected String modelPath = "";
+ protected String modelName = "";
+ protected float inferenceTime = 0;
+ protected float preprocessTime = 0;
+ protected float postprocessTime = 0;
+ protected PaddlePredictor paddlePredictor = null;
+ protected ImageBlob imageBlob = new ImageBlob();
+ protected Transforms transforms = new Transforms();
+ protected ConfigParser configParser = new ConfigParser();
+ protected Mat inputMat;
+ protected Result result;
+
+ public Predictor() {
+ super();
+ }
+
+ public boolean init(String modelPath, int cpuThreadNum, String cpuPowerMode) {
+ if (configParser.getModelType().equalsIgnoreCase("classifier")) {
+ result = new ClsResult();
+ } else if (configParser.getModelType().equalsIgnoreCase("detector")) {
+ result = new DetResult();
+ } else if (configParser.getModelType().equalsIgnoreCase("segmenter")) {
+ result = new SegResult();
+ } else {
+ Log.i(TAG, "model type: " + configParser.getModelType() + " is not support! Only support: 'classifier' or 'detector' or 'segmenter'");
+ }
+ isLoaded = loadModel(modelPath, cpuThreadNum, cpuPowerMode);
+ return isLoaded;
+ }
+
+ public boolean init(ConfigParser configParser) {
+ this.configParser = configParser;
+ init(configParser.getModelPath(), configParser.getCpuThreadNum(), configParser.getCpuPowerMode());
+ transforms.loadConfig(configParser.getTransformsList(), configParser.getTransformsMode());
+ if (!isLoaded()) {
+ return false;
+ }
+ Log.i(TAG, configParser.toString());
+ return isLoaded;
+ }
+
+ public boolean predict() {
+ this.imageBlob.clear();
+ this.imageBlob = transforms.run(inputMat, imageBlob);
+ if (configParser.getModelType().equalsIgnoreCase("classifier")) {
+ runModel((ClsResult) result);
+ } else if (configParser.getModelType().equalsIgnoreCase("detector")) {
+ runModel((DetResult) result);
+ } else if (configParser.getModelType().equalsIgnoreCase("segmenter")) {
+ runModel((SegResult) result);
+ }
+ return true;
+ }
+
+ private boolean runModel(DetResult detReult) {
+ // set input shape & data
+ Tensor imTensor = getInput(0);
+ imTensor.resize(imageBlob.getNewImageSize());
+ imTensor.setData(imageBlob.getImageData());
+ if (configParser.getModel().equalsIgnoreCase("YOLOv3")) {
+ Tensor imSizeTensor = getInput(1);
+ long[] imSize = {1, 2};
+ imSizeTensor.resize(imSize);
+ imSizeTensor.setData(new int[]{(int) imageBlob.getOriImageSize()[2], (int) imageBlob.getOriImageSize()[3]});
+ } else if (configParser.getModel().equalsIgnoreCase("FasterRCNN")) {
+ Tensor imInfoTensor = getInput(1);
+ long[] imInfo = {1, 3};
+ imInfoTensor.resize(imInfo);
+ imInfoTensor.setData(new float[]{imageBlob.getNewImageSize()[2], imageBlob.getNewImageSize()[3], imageBlob.getScale()});
+
+ Tensor imShapeTensor = getInput(2);
+ long[] imShape = {1, 3};
+ imShapeTensor.resize(imShape);
+ imShapeTensor.setData(new float[]{imageBlob.getOriImageSize()[2], imageBlob.getOriImageSize()[3], 1});
+ }
+ // run model
+ runModel();
+ // Fetch output tensor
+ Tensor outputTensor = getOutput(0);
+ float[] output = outputTensor.getFloatData();
+ long[] outputShape = outputTensor.shape();
+ long outputSize = 1;
+ for (long s : outputShape) {
+ outputSize *= s;
+ }
+ int num_boxes = (int) (outputSize / 6);
+ for (int i = 0; i < num_boxes; i++) {
+ DetResult.Box box = detReult.new Box();
+ box.setCategoryId((int) output[i * 6]);
+ box.setCategory(configParser.getLabeList().get(box.getCategoryId()));
+ box.setScore(output[i * 6 + 1]);
+ float xmin = output[i * 6 + 2];
+ float ymin = output[i * 6 + 3];
+ float xmax = output[i * 6 + 4];
+ float ymax = output[i * 6 + 5];
+ box.setCoordinate(new float[]{xmin, ymin, xmax, ymax});
+ detReult.getBoxes().add(box);
+ }
+ return true;
+ }
+
+ private boolean runModel(SegResult segReult) {
+ // set input shape & data
+ Tensor imTensor = getInput(0);
+ imTensor.resize(imageBlob.getNewImageSize());
+ imTensor.setData(imageBlob.getImageData());
+ // run model
+ runModel();
+ Tensor labelTensor = getOutput(0);
+ // Fetch output tensor
+ long[] labelData = labelTensor.getLongData();
+ segReult.getMask().setLabelShape(labelTensor.shape());
+ long labelSize = 1;
+ for (long s : segReult.getMask().getLabelShape()) {
+ labelSize *= s;
+ }
+ segReult.getMask().setLabelData(labelData);
+
+ Tensor scoreTensor = getOutput(1);
+ float[] scoreData = scoreTensor.getFloatData();
+ segReult.getMask().setScoreShape(scoreTensor.shape());
+ segReult.getMask().setScoreData(scoreData);
+ return true;
+ }
+
+ private boolean runModel(ClsResult clsReult) {
+ // set input shape & data
+ Tensor imTensor = getInput(0);
+ imTensor.resize(imageBlob.getNewImageSize());
+ imTensor.setData(imageBlob.getImageData());
+ // run model
+ runModel();
+ // Fetch output tensor
+ Tensor outputTensor = getOutput(0);
+ long[] outputShape = outputTensor.shape();
+ long outputSize = 1;
+ for (long s : outputShape) {
+ outputSize *= s;
+ }
+ int max_index = 0; // Top3 indices
+ float max_score = 0; // Top3 scores
+ for (int i = 0; i < outputSize; i++) {
+ float tmp = outputTensor.getFloatData()[i];
+ if (tmp > max_score) {
+ max_index = i;
+ max_score = tmp;
+ }
+ }
+ clsReult.setCategoryId(max_index);
+ clsReult.setCategory(configParser.getLabeList().get(max_index));
+ clsReult.setScore(max_score);
+ return true;
+ }
+
+ private boolean loadModel(String modelPath, int cpuThreadNum, String cpuPowerMode) {
+ // release model if exists
+ releaseModel();
+ // load model
+ if (modelPath.isEmpty()) {
+ return false;
+ }
+ MobileConfig config = new MobileConfig();
+ config.setModelFromFile(modelPath);
+ config.setThreads(cpuThreadNum);
+ if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_HIGH")) {
+ config.setPowerMode(PowerMode.LITE_POWER_HIGH);
+ } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_LOW")) {
+ config.setPowerMode(PowerMode.LITE_POWER_LOW);
+ } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_FULL")) {
+ config.setPowerMode(PowerMode.LITE_POWER_FULL);
+ } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_NO_BIND")) {
+ config.setPowerMode(PowerMode.LITE_POWER_NO_BIND);
+ } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_RAND_HIGH")) {
+ config.setPowerMode(PowerMode.LITE_POWER_RAND_HIGH);
+ } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_RAND_LOW")) {
+ config.setPowerMode(PowerMode.LITE_POWER_RAND_LOW);
+ } else {
+ Log.e(TAG, "unknown cpu power mode!");
+ return false;
+ }
+ paddlePredictor = PaddlePredictor.createPaddlePredictor(config);
+ this.cpuThreadNum = cpuThreadNum;
+ this.cpuPowerMode = cpuPowerMode;
+ this.modelPath = modelPath;
+ this.modelName = configParser.getModel();
+ return true;
+ }
+
+ private boolean runModel() {
+ if (!isLoaded()) {
+ return false;
+ }
+ // warm up
+ for (int i = 0; i < warmupIterNum; i++) {
+ paddlePredictor.run();
+ }
+ Date start = new Date();
+ // inference
+ for (int i = 0; i < inferIterNum; i++) {
+ paddlePredictor.run();
+ }
+ Date end = new Date();
+ inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum;
+ return true;
+ }
+
+ public void releaseModel() {
+ paddlePredictor = null;
+ isLoaded = false;
+ cpuThreadNum = 1;
+ cpuPowerMode = "LITE_POWER_HIGH";
+ modelPath = "";
+ modelName = "";
+ }
+
+ public boolean isLoaded() {
+ return paddlePredictor != null && isLoaded;
+ }
+
+ public void setLoaded(boolean loaded) {
+ isLoaded = loaded;
+ }
+
+ public int getWarmupIterNum() {
+ return warmupIterNum;
+ }
+
+ public void setWarmupIterNum(int warmupIterNum) {
+ this.warmupIterNum = warmupIterNum;
+ }
+
+ public int getInferIterNum() {
+ return inferIterNum;
+ }
+
+ public void setInferIterNum(int inferIterNum) {
+ this.inferIterNum = inferIterNum;
+ }
+
+ public float getInferenceTime() {
+ return inferenceTime;
+ }
+
+ public void setInferenceTime(float inferenceTime) {
+ this.inferenceTime = inferenceTime;
+ }
+
+ public int getCpuThreadNum() {
+ return cpuThreadNum;
+ }
+
+ public void setCpuThreadNum(int cpuThreadNum) {
+ this.cpuThreadNum = cpuThreadNum;
+ }
+
+ public String getCpuPowerMode() {
+ return cpuPowerMode;
+ }
+
+ public void setCpuPowerMode(String cpuPowerMode) {
+ this.cpuPowerMode = cpuPowerMode;
+ }
+
+ public String getModelPath() {
+ return modelPath;
+ }
+
+ public void setModelPath(String modelPath) {
+ this.modelPath = modelPath;
+ }
+
+ public String getModelName() {
+ return modelName;
+ }
+
+ public void setModelName(String modelName) {
+ this.modelName = modelName;
+ }
+
+ public Result getResult() {
+ return result;
+ }
+
+ public void setResult(Result result) {
+ this.result = result;
+ }
+
+ public PaddlePredictor getPaddlePredictor() {
+ return paddlePredictor;
+ }
+
+ public void setPaddlePredictor(PaddlePredictor paddlePredictor) {
+ this.paddlePredictor = paddlePredictor;
+ }
+
+ public float getPreprocessTime() {
+ return preprocessTime;
+ }
+
+ public void setPreprocessTime(float preprocessTime) {
+ this.preprocessTime = preprocessTime;
+ }
+
+ public float getPostprocessTime() {
+ return postprocessTime;
+ }
+
+ public void setPostprocessTime(float postprocessTime) {
+ this.postprocessTime = postprocessTime;
+ }
+
+ public void setConfigParser(ConfigParser configParser) {
+ this.configParser = configParser;
+ }
+
+ public Mat getInputMat() {
+ return inputMat;
+ }
+
+ public void setInputMat(Mat inputMat) {
+ Mat copyMat = new Mat();
+ inputMat.copyTo(copyMat);
+ this.inputMat = copyMat;
+ }
+
+ public DetResult getDetResult() {
+ if (result.getType() != "det") {
+ Log.e(TAG, "this model_type is not detector");
+ return null;
+ }
+ return (DetResult) result;
+ }
+
+ public SegResult getSegResult() {
+ if (result.getType() != "seg") {
+ Log.e(TAG, "this model_type is not segmeter");
+ return null;
+ }
+ return (SegResult) result;
+ }
+
+ public ClsResult getClsResult() {
+ if (result.getType() != "cls") {
+ Log.e(TAG, "this model_type is not classifier");
+ return null;
+ }
+ return (ClsResult) result;
+ }
+
+ public ImageBlob getImageBlob() {
+ return imageBlob;
+ }
+
+ public void setImageBlob(ImageBlob imageBlob) {
+ this.imageBlob = imageBlob;
+ }
+
+ public Tensor getInput(int idx) {
+ if (!isLoaded()) {
+ return null;
+ }
+ return paddlePredictor.getInput(idx);
+ }
+
+ public Tensor getOutput(int idx) {
+ if (!isLoaded()) {
+ return null;
+ }
+ return paddlePredictor.getOutput(idx);
+ }
+
+}
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/Utils.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/Utils.java
new file mode 100644
index 0000000000000000000000000000000000000000..48ed6f031861ffaabeaea1265cdeb762a29fba6c
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/Utils.java
@@ -0,0 +1,132 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex;
+
+import android.content.Context;
+import android.os.Environment;
+
+import org.opencv.android.OpenCVLoader;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+public class Utils {
+ private static final String TAG = Utils.class.getSimpleName();
+
+ public static void copyFileFromAssets(Context appCtx, String srcPath, String dstPath) {
+ if (srcPath.isEmpty() || dstPath.isEmpty()) {
+ return;
+ }
+ InputStream is = null;
+ OutputStream os = null;
+ try {
+ is = new BufferedInputStream(appCtx.getAssets().open(srcPath));
+ os = new BufferedOutputStream(new FileOutputStream(new File(dstPath)));
+ byte[] buffer = new byte[1024];
+ int length = 0;
+ while ((length = is.read(buffer)) != -1) {
+ os.write(buffer, 0, length);
+ }
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ } finally {
+ try {
+ os.close();
+ is.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ public static void copyDirectoryFromAssets(Context appCtx, String srcDir, String dstDir) {
+ if (srcDir.isEmpty() || dstDir.isEmpty()) {
+ return;
+ }
+ try {
+ if (!new File(dstDir).exists()) {
+ new File(dstDir).mkdirs();
+ }
+ for (String fileName : appCtx.getAssets().list(srcDir)) {
+ String srcSubPath = srcDir + File.separator + fileName;
+ String dstSubPath = dstDir + File.separator + fileName;
+ copyFileFromAssets(appCtx, srcSubPath, dstSubPath);
+ if (new File(srcSubPath).isDirectory()) {
+ copyDirectoryFromAssets(appCtx, srcSubPath, dstSubPath);
+ } else {
+ copyFileFromAssets(appCtx, srcSubPath, dstSubPath);
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public static String getFileNameFromString(String srcDir) {
+ if (srcDir.isEmpty()) {
+ return null;
+ }
+ try {
+ String fileName = srcDir.substring(srcDir.lastIndexOf("/") + 1);
+ return fileName;
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+ public static float[] parseFloatsFromString(String string, String delimiter) {
+ String[] pieces = string.trim().toLowerCase().split(delimiter);
+ float[] floats = new float[pieces.length];
+ for (int i = 0; i < pieces.length; i++) {
+ floats[i] = Float.parseFloat(pieces[i].trim());
+ }
+ return floats;
+ }
+
+ public static long[] parseLongsFromString(String string, String delimiter) {
+ String[] pieces = string.trim().toLowerCase().split(delimiter);
+ long[] longs = new long[pieces.length];
+ for (int i = 0; i < pieces.length; i++) {
+ longs[i] = Long.parseLong(pieces[i].trim());
+ }
+ return longs;
+ }
+
+ public static String getSDCardDirectory() {
+ return Environment.getExternalStorageDirectory().getAbsolutePath();
+ }
+
+ public static boolean isSupportedNPU() {
+ String hardware = android.os.Build.HARDWARE;
+ return hardware.equalsIgnoreCase("kirin810") || hardware.equalsIgnoreCase("kirin990");
+ }
+
+ public static boolean initialOpencv() {
+ if (!OpenCVLoader.initDebug()) {
+ return false;
+ }
+ return true;
+ }
+
+}
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/config/ConfigParser.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/config/ConfigParser.java
new file mode 100644
index 0000000000000000000000000000000000000000..aa20f6a685bb6117fca135a5adf5039d42ad5ca6
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/config/ConfigParser.java
@@ -0,0 +1,162 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.config;
+
+import android.content.Context;
+import android.content.res.AssetManager;
+
+import org.yaml.snakeyaml.Yaml;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+public class ConfigParser {
+ protected String model = "";
+ protected List labeList = new ArrayList<>();
+ protected int numClasses = 0;
+ protected String modelType = "";
+ protected String transformsMode = "RGB";
+ protected List transformsList = new ArrayList();
+ protected String modelPath = "";
+ protected int cpuThreadNum = 1;
+ protected String cpuPowerMode = "";
+ protected String yamlPath = "";
+
+ public void init(String modelPath, String yamlPath, int cpuThreadNum,
+ String cpuPowerMode) throws IOException {
+
+ this.modelPath = modelPath;
+ this.cpuThreadNum = cpuThreadNum;
+ this.cpuPowerMode = cpuPowerMode;
+ this.yamlPath = yamlPath;
+ InputStream ymlStream = new FileInputStream(new File(yamlPath));
+ Yaml yml = new Yaml();
+ HashMap yml_map = (HashMap) yml.load(ymlStream);
+ model = (String) yml_map.get("Model");
+ if (yml_map.containsKey("TransformsMode")) {
+ transformsMode = (String) yml_map.get("TransformsMode");
+ }
+ HashMap _Attributes = (HashMap) yml_map.get("_Attributes");
+ // parser label_list
+ labeList = (List) _Attributes.get("labels");
+ numClasses = (int) _Attributes.get("num_classes");
+ // parser model_type(classifier, segmenter, detector)
+ modelType = (String) _Attributes.get("model_type");
+ // parser Transforms
+ transformsList = (List) yml_map.get("Transforms");
+
+ }
+
+ @Override
+ public String toString() {
+ return "ConfigParser{" +
+ "model='" + model + '\'' +
+ ", labeList=" + labeList +
+ ", numClasses=" + numClasses +
+ ", modelType='" + modelType + '\'' +
+ ", transformsMode='" + transformsMode + '\'' +
+ ", transformsList=" + transformsList +
+ ", modelPath='" + modelPath + '\'' +
+ ", cpuThreadNum=" + cpuThreadNum +
+ ", cpuPowerMode='" + cpuPowerMode + '\'' +
+ ", yamlPath='" + yamlPath + '\'' +
+ '}';
+ }
+
+ public int getNumClasses() {
+ return numClasses;
+ }
+
+ public void setNumClasses(int numClasses) {
+ this.numClasses = numClasses;
+ }
+
+ public List getLabeList() {
+ return labeList;
+ }
+
+ public void setLabeList(List labeList) {
+ this.labeList = labeList;
+ }
+
+ public String getModelType() {
+ return modelType;
+ }
+
+ public void setModelType(String modelType) {
+ this.modelType = modelType;
+ }
+
+ public List getTransformsList() {
+ return transformsList;
+ }
+
+ public void setTransformsList(List transformsList) {
+ this.transformsList = transformsList;
+ }
+
+ public String getModel() {
+ return model;
+ }
+
+ public void setModel(String model) {
+ this.model = model;
+ }
+
+ public String getTransformsMode() {
+ return transformsMode;
+ }
+
+ public void setTransformsMode(String transformsMode) {
+ this.transformsMode = transformsMode;
+ }
+
+ public String getModelPath() {
+ return modelPath;
+ }
+
+ public void setModelPath(String modelPath) {
+ this.modelPath = modelPath;
+ }
+
+ public int getCpuThreadNum() {
+ return cpuThreadNum;
+ }
+
+ public void setCpuThreadNum(int cpuThreadNum) {
+ this.cpuThreadNum = cpuThreadNum;
+ }
+
+ public String getCpuPowerMode() {
+ return cpuPowerMode;
+ }
+
+ public void setCpuPowerMode(String cpuPowerMode) {
+ this.cpuPowerMode = cpuPowerMode;
+ }
+
+ public String getYamlPath() {
+ return yamlPath;
+ }
+
+ public void setYamlPath(String yamlPath) {
+ this.yamlPath = yamlPath;
+ }
+}
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/ClsResult.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/ClsResult.java
new file mode 100644
index 0000000000000000000000000000000000000000..97ca2d92532bdfd826b2d67cc18a403f6e293cc0
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/ClsResult.java
@@ -0,0 +1,52 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.postprocess;
+
+public class ClsResult extends Result {
+ static String type = "cls";
+ protected int categoryId;
+ protected String category;
+ protected float score;
+
+ public int getCategoryId() {
+ return categoryId;
+ }
+
+ public void setCategoryId(int categoryId) {
+ this.categoryId = categoryId;
+ }
+
+ public String getCategory() {
+ return category;
+ }
+
+ public void setCategory(String category) {
+ this.category = category;
+ }
+
+ public double getScore() {
+ return score;
+ }
+
+ public void setScore(float score) {
+ this.score = score;
+ }
+
+ @Override
+ public String getType() {
+ return type;
+ }
+
+}
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/DetResult.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/DetResult.java
new file mode 100644
index 0000000000000000000000000000000000000000..422a275ebe5ab1d545d70bf783ba5684e211b2c6
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/DetResult.java
@@ -0,0 +1,76 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.postprocess;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DetResult extends Result {
+ static String type = "det";
+ protected List boxes = new ArrayList();
+
+ public List getBoxes() {
+ return boxes;
+ }
+
+ public void setBoxes(List boxes) {
+ this.boxes = boxes;
+ }
+
+ @Override
+ public String getType() {
+ return type;
+ }
+
+ public class Box {
+ protected int categoryId;
+ protected String category;
+ protected float score;
+ protected float[] coordinate = new float[4];
+
+ public int getCategoryId() {
+ return categoryId;
+ }
+
+ public void setCategoryId(int category_id) {
+ this.categoryId = category_id;
+ }
+
+ public String getCategory() {
+ return category;
+ }
+
+ public void setCategory(String category) {
+ this.category = category;
+ }
+
+ public float getScore() {
+ return score;
+ }
+
+ public void setScore(float score) {
+ this.score = score;
+ }
+
+ public float[] getCoordinate() {
+ return coordinate;
+ }
+
+ public void setCoordinate(float[] coordinate) {
+ this.coordinate = coordinate;
+ }
+ }
+
+}
diff --git a/docs/test.cpp b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/Result.java
similarity index 72%
rename from docs/test.cpp
rename to deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/Result.java
index 3f9bc680b74ee4a33ba65236d944592a832f835d..ba57af086e6d50609bcb9b9ae286bea0761f2c5d 100644
--- a/docs/test.cpp
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/Result.java
@@ -1,4 +1,4 @@
-// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,9 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include
+package com.baidu.paddlex.postprocess;
-int main() {
- std::cout << "haha" << std::endl;
- return 0;
+public class Result {
+ static String type = "base";
+
+ public String getType() {
+ return type;
+ }
}
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/SegResult.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/SegResult.java
new file mode 100644
index 0000000000000000000000000000000000000000..e3a15f2dcc582300dd2ccc82509dbdd841dcd989
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/postprocess/SegResult.java
@@ -0,0 +1,72 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.postprocess;
+
+public class SegResult extends Result {
+ static String type = "seg";
+ protected Mask mask = new Mask();
+
+ public Mask getMask() {
+ return mask;
+ }
+
+ public void setMask(Mask mask) {
+ this.mask = mask;
+ }
+
+ @Override
+ public String getType() {
+ return type;
+ }
+
+ public class Mask {
+ protected float[] scoreData;
+ protected long[] labelData;
+ protected long[] labelShape = new long[4];
+ protected long[] scoreShape = new long[4];
+
+ public float[] getScoreData() {
+ return scoreData;
+ }
+
+ public void setScoreData(float[] score_data) {
+ this.scoreData = score_data;
+ }
+
+ public long[] getLabelData() {
+ return labelData;
+ }
+
+ public void setLabelData(long[] label_data) {
+ this.labelData = label_data;
+ }
+
+ public long[] getLabelShape() {
+ return labelShape;
+ }
+
+ public void setLabelShape(long[] labelShape) {
+ this.labelShape = labelShape;
+ }
+
+ public long[] getScoreShape() {
+ return scoreShape;
+ }
+
+ public void setScoreShape(long[] scoreShape) {
+ this.scoreShape = scoreShape;
+ }
+ }
+}
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/ImageBlob.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/ImageBlob.java
new file mode 100644
index 0000000000000000000000000000000000000000..a9bd9a52e5d87792c5705b621a3fb275c68c1f7e
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/ImageBlob.java
@@ -0,0 +1,86 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.preprocess;
+
+import java.util.LinkedHashMap;
+
+public class ImageBlob {
+ // Original image height and width
+ private long[] oriImageSize = new long[]{1, 3, -1, -1};
+ // Newest image height and width after process
+ private long[] newImageSize = new long[]{1, 3, -1, -1};
+ // Reshape order, Image height and width before resize
+ private LinkedHashMap reshapeInfo = new LinkedHashMap();
+ // Resize scale
+ private float scale = 1;
+ // Buffer for image data after preprocessing
+ private float[] imageData;
+
+ public void clear() {
+ oriImageSize = new long[]{1, 3, -1, -1};
+ newImageSize = new long[]{1, 3, -1, -1};
+ reshapeInfo.clear();
+ imageData = null;
+ }
+
+ public long[] getOriImageSize() {
+ return oriImageSize;
+ }
+
+ public void setOriImageSize(long[] oriImageSize) {
+ this.oriImageSize = oriImageSize;
+ }
+
+ public void setOriImageSize(long dim, int idx) {
+ this.oriImageSize[idx] = dim;
+ }
+
+ public long[] getNewImageSize() {
+ return newImageSize;
+ }
+
+ public void setNewImageSize(long[] newImageSize) {
+ this.newImageSize = newImageSize;
+ }
+
+ public void setNewImageSize(long dim, int idx) {
+ this.newImageSize[idx] = dim;
+ }
+
+
+ public LinkedHashMap getReshapeInfo() {
+ return reshapeInfo;
+ }
+
+ public void setReshapeInfo(LinkedHashMap reshapeInfo) {
+ this.reshapeInfo = reshapeInfo;
+ }
+
+ public float getScale() {
+ return scale;
+ }
+
+ public void setScale(float scale) {
+ this.scale = scale;
+ }
+
+ public float[] getImageData() {
+ return imageData;
+ }
+
+ public void setImageData(float[] imageData) {
+ this.imageData = imageData;
+ }
+}
\ No newline at end of file
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java
new file mode 100644
index 0000000000000000000000000000000000000000..940ebaa234db2e34faa2daaf74dfacc0e9d131fe
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java
@@ -0,0 +1,286 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.preprocess;
+import android.util.Log;
+import org.opencv.android.OpenCVLoader;
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.Rect;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.imgproc.Imgproc;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+public class Transforms {
+ private static final String TAG = Transforms.class.getSimpleName();
+ private List transformOps = new ArrayList();
+ private String transformsMode = "RGB";
+ private HashMap interpMap = new HashMap(){{
+ put("LINEAR", Imgproc.INTER_LINEAR);
+ put("NEAREST", Imgproc.INTER_NEAREST);
+ put("AREA", Imgproc.INTER_AREA);
+ put("CUBIC", Imgproc.INTER_CUBIC);
+ put("LANCZOS4", Imgproc.INTER_LANCZOS4);
+ }
+ };
+
+ public void loadConfig(List transforms_list, String transformsMode) {
+ if (!OpenCVLoader.initDebug()) {
+ Log.e(TAG,"OpenCV Loadding failed.");
+ }
+ this.transformsMode = transformsMode;
+ for (int i = 0; i < transforms_list.size(); i++) {
+ HashMap transform_op = (HashMap) (transforms_list.get(i));
+ if (transform_op.containsKey("ResizeByShort")) {
+ HashMap info = (HashMap) transform_op.get("ResizeByShort");
+ ResizeByShort resizeByShort = new ResizeByShort();
+ resizeByShort.max_size = (int)info.get("max_size");
+ resizeByShort.short_size = (int)info.get("short_size");
+ if (info.containsKey("interp")) {
+ resizeByShort.interp = (String) info.get("interp");
+ }
+ transformOps.add(resizeByShort);
+ } else if (transform_op.containsKey("ResizeByLong")) {
+ HashMap info = (HashMap) transform_op.get("ResizeByLong");
+ ResizeByLong resizeByLong = new ResizeByLong();
+ resizeByLong.long_size = (int)info.get("long_size");
+ if (info.containsKey("interp")) {
+ resizeByLong.interp = (String) info.get("interp");
+ }
+ transformOps.add(resizeByLong);
+
+ } else if (transform_op.containsKey("CenterCrop")) {
+ HashMap info = (HashMap) transform_op.get("CenterCrop");
+ CenterCrop centerCrop = new CenterCrop();
+ if (info.get("crop_size") instanceof Integer) {
+ centerCrop.cropHeight = (int) info.get("crop_size");
+ centerCrop.cropWidth = (int) info.get("crop_size");
+ } else {
+ centerCrop.cropWidth = ((List) info.get("crop_size")).get(0);
+ centerCrop.cropHeight = ((List) info.get("crop_size")).get(1);
+ }
+ transformOps.add(centerCrop);
+ } else if (transform_op.containsKey("Normalize")) {
+ HashMap> info = (HashMap>) transform_op.get("Normalize");
+ Normalize normalize = new Normalize();
+ normalize.mean = info.get("mean").toArray(new Double[info.get("mean").size()]);
+ normalize.std = info.get("std").toArray(new Double[info.get("std").size()]);
+ transformOps.add(normalize);
+ } else if (transform_op.containsKey("Resize")) {
+ HashMap info = (HashMap) transform_op.get("Resize");
+ Resize resize = new Resize();
+ if (info.get("target_size") instanceof Integer) {
+ resize.width = (int) info.get("target_size");
+ resize.height = (int) info.get("target_size");
+ } else {
+ resize.width = ((List) info.get("target_size")).get(0);
+ resize.height = ((List) info.get("target_size")).get(1);
+ }
+ if (info.containsKey("interp")) {
+ resize.interp = (String) info.get("interp");
+ }
+ transformOps.add(resize);
+ } else if (transform_op.containsKey("Padding")) {
+ HashMap info = (HashMap) transform_op.get("Padding");
+ Padding padding = new Padding();
+ if (info.containsKey("coarsest_stride")) {
+ padding.coarsest_stride = (int) info.get("coarsest_stride");
+ }
+ if (info.containsKey("target_size")) {
+ if (info.get("target_size") instanceof Integer) {
+ padding.width = (int) info.get("target_size");
+ padding.height = (int) info.get("target_size");
+ } else {
+ padding.width = ((List) info.get("target_size")).get(0);
+ padding.height = ((List) info.get("target_size")).get(1);
+ }
+ }
+ transformOps.add(padding);
+ }
+ }
+ }
+
+ public ImageBlob run(Mat inputMat, ImageBlob imageBlob) {
+ imageBlob.setOriImageSize(inputMat.height(),2);
+ imageBlob.setOriImageSize(inputMat.width(),3);
+ imageBlob.setNewImageSize(inputMat.height(),2);
+ imageBlob.setNewImageSize(inputMat.width(),3);
+
+ if(transformsMode.equalsIgnoreCase("RGB")){
+ Imgproc.cvtColor(inputMat, inputMat, Imgproc.COLOR_BGR2RGB);
+ }else if(!transformsMode.equalsIgnoreCase("BGR")){
+ Log.e(TAG, "transformsMode only support RGB or BGR");
+ }
+ inputMat.convertTo(inputMat, CvType.CV_32FC(3));
+
+ for (transformOp op : transformOps) {
+ inputMat = op.run(inputMat, imageBlob);
+ }
+
+ int w = inputMat.width();
+ int h = inputMat.height();
+ int c = inputMat.channels();
+ imageBlob.setImageData(new float[w * h * c]);
+ int[] channelStride = new int[]{w * h, w * h * 2};
+ for (int y = 0; y < h; y++) {
+ for (int x = 0;
+ x < w; x++) {
+ double[] color = inputMat.get(y, x);
+ imageBlob.getImageData()[y * w + x] = (float) (color[0]);
+ imageBlob.getImageData()[y * w + x + channelStride[0]] = (float) (color[1]);
+ imageBlob.getImageData()[y * w + x + channelStride[1]] = (float) (color[2]);
+ }
+ }
+ return imageBlob;
+ }
+
+ private class transformOp {
+ public Mat run(Mat inputMat, ImageBlob data) {
+ return inputMat;
+ }
+ }
+
+ private class ResizeByShort extends transformOp {
+ private int max_size;
+ private int short_size;
+ private String interp = "LINEAR";
+
+ public Mat run(Mat inputMat, ImageBlob imageBlob) {
+ int origin_w = inputMat.width();
+ int origin_h = inputMat.height();
+ imageBlob.getReshapeInfo().put("resize", new int[]{origin_w, origin_h});
+ int im_size_max = Math.max(origin_w, origin_h);
+ int im_size_min = Math.min(origin_w, origin_h);
+ float scale = (float) (short_size) / (float) (im_size_min);
+ if (max_size > 0) {
+ if (Math.round(scale * im_size_max) > max_size) {
+ scale = (float) (max_size) / (float) (im_size_max);
+ }
+ }
+ int width = Math.round(scale * origin_w);
+ int height = Math.round(scale * origin_h);
+ Size sz = new Size(width, height);
+ Imgproc.resize(inputMat, inputMat, sz,0,0, interpMap.get(interp));
+ imageBlob.setNewImageSize(inputMat.height(),2);
+ imageBlob.setNewImageSize(inputMat.width(),3);
+ imageBlob.setScale(scale);
+ return inputMat;
+ }
+ }
+
+ private class ResizeByLong extends transformOp {
+ private int long_size;
+ private String interp = "LINEAR";
+
+ public Mat run(Mat inputMat, ImageBlob imageBlob) {
+ int origin_w = inputMat.width();
+ int origin_h = inputMat.height();
+ imageBlob.getReshapeInfo().put("resize", new int[]{origin_w, origin_h});
+ int im_size_max = Math.max(origin_w, origin_h);
+ float scale = (float) (long_size) / (float) (im_size_max);
+ int width = Math.round(scale * origin_w);
+ int height = Math.round(scale * origin_h);
+ Size sz = new Size(width, height);
+ Imgproc.resize(inputMat, inputMat, sz,0,0, interpMap.get(interp));
+ imageBlob.setNewImageSize(inputMat.height(),2);
+ imageBlob.setNewImageSize(inputMat.width(),3);
+ imageBlob.setScale(scale);
+ return inputMat;
+ }
+ }
+
+ private class CenterCrop extends transformOp {
+ private int cropHeight;
+ private int cropWidth;
+
+ public Mat run(Mat inputMat, ImageBlob imageBlob) {
+ int origin_w = inputMat.width();
+ int origin_h = inputMat.height();
+ if (origin_h < cropHeight || origin_w < cropWidth) {
+ Log.e(TAG, "[CenterCrop] Image size less than crop size");
+ }
+ int offset_x, offset_y;
+ offset_x = (origin_w - cropWidth) / 2;
+ offset_y = (origin_h - cropHeight) / 2;
+ offset_x = Math.max(Math.min(offset_x, origin_w - cropWidth), 0);
+ offset_y = Math.max(Math.min(offset_y, origin_h - cropHeight), 0);
+ Rect crop_roi = new Rect(offset_x, offset_y, cropHeight, cropWidth);
+ inputMat = inputMat.submat(crop_roi);
+ imageBlob.setNewImageSize(inputMat.height(),2);
+ imageBlob.setNewImageSize(inputMat.width(),3);
+ return inputMat;
+ }
+ }
+
+ private class Resize extends transformOp {
+ private int height;
+ private int width;
+ private String interp = "LINEAR";
+
+ public Mat run(Mat inputMat, ImageBlob imageBlob) {
+ int origin_w = inputMat.width();
+ int origin_h = inputMat.height();
+ imageBlob.getReshapeInfo().put("resize", new int[]{origin_w, origin_h});
+ Size sz = new Size(width, height);
+ Imgproc.resize(inputMat, inputMat, sz,0,0, interpMap.get(interp));
+ imageBlob.setNewImageSize(inputMat.height(),2);
+ imageBlob.setNewImageSize(inputMat.width(),3);
+ return inputMat;
+ }
+ }
+
+ private class Padding extends transformOp {
+ private double width;
+ private double height;
+ private double coarsest_stride;
+
+ public Mat run(Mat inputMat, ImageBlob imageBlob) {
+ int origin_w = inputMat.width();
+ int origin_h = inputMat.height();
+ imageBlob.getReshapeInfo().put("padding", new int[]{origin_w, origin_h});
+ double padding_w = 0;
+ double padding_h = 0;
+ if (width > 1 & height > 1) {
+ padding_w = width;
+ padding_h = height;
+ } else if (coarsest_stride > 1) {
+ padding_h = Math.ceil(origin_h / coarsest_stride) * coarsest_stride;
+ padding_w = Math.ceil(origin_w / coarsest_stride) * coarsest_stride;
+ }
+ imageBlob.setNewImageSize(inputMat.height(),2);
+ imageBlob.setNewImageSize(inputMat.width(),3);
+ Core.copyMakeBorder(inputMat, inputMat, 0, (int)padding_h, 0, (int)padding_w, Core.BORDER_CONSTANT, new Scalar(0));
+ return inputMat;
+ }
+ }
+
+ private class Normalize extends transformOp {
+ private Double[] mean = new Double[3];
+ private Double[] std = new Double[3];
+
+ public Mat run(Mat inputMat, ImageBlob imageBlob) {
+ inputMat.convertTo(inputMat, CvType.CV_32FC(3), 1/255.0);
+ Scalar meanScalar = new Scalar(mean[0], mean[1], mean[2]);
+ Scalar stdScalar = new Scalar(std[0], std[1], std[2]);
+ Core.subtract(inputMat, meanScalar, inputMat);
+ Core.divide(inputMat, stdScalar, inputMat);
+ return inputMat;
+ }
+ }
+}
+
diff --git a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/visual/Visualize.java b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/visual/Visualize.java
new file mode 100644
index 0000000000000000000000000000000000000000..c2cc7d280a7c7ed02f515a658840f9ad983478b0
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/visual/Visualize.java
@@ -0,0 +1,148 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.baidu.paddlex.visual;
+
+import android.graphics.Canvas;
+import android.graphics.Paint;
+import android.util.Log;
+
+import com.baidu.paddlex.postprocess.DetResult;
+import com.baidu.paddlex.postprocess.SegResult;
+import com.baidu.paddlex.preprocess.ImageBlob;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.Point;
+import org.opencv.core.Rect;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.imgproc.Imgproc;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+
+public class Visualize {
+ protected static final String TAG = Visualize.class.getSimpleName();
+ protected float detectConfidenceThreshold = (float) 0.5;
+ protected Scalar[] colormap = new Scalar[]{};
+
+ protected void generateColorMap(int num_class) {
+ this.colormap = new Scalar[num_class];
+ this.colormap[0] = new Scalar(0, 0, 0);
+ for (int i = 0; i < num_class; i++) {
+ int j = 0;
+ int lab = i;
+ while (lab > 0) {
+ int r = (((lab >> 0) & 1) << (7 - j));
+ int g = (((lab >> 1) & 1) << (7 - j));
+ int b = (((lab >> 2) & 1) << (7 - j));
+ this.colormap[i] = new Scalar(r, g, b);
+ ++j;
+ lab >>= 3;
+ }
+ }
+ }
+
+ public float getDetectConfidenceThreshold() {
+ return detectConfidenceThreshold;
+ }
+
+ public void setDetectConfidenceThreshold(float detectConfidenceThreshold) {
+ this.detectConfidenceThreshold = detectConfidenceThreshold;
+ }
+
+ public Scalar[] getColormap() {
+ return colormap;
+ }
+
+ public void setColormap(Scalar[] colormap) {
+ this.colormap = colormap;
+ }
+
+ public void init(int num_class) {
+ generateColorMap(num_class);
+ }
+
+ public Mat draw(DetResult result, Mat visualizeMat) {
+ Paint rectPaint = new Paint();
+ rectPaint.setStyle(Paint.Style.STROKE);
+ rectPaint.setStrokeWidth(2);
+ Paint txtPaint = new Paint();
+ txtPaint.setTextSize(15);
+ txtPaint.setAntiAlias(true);
+ for (DetResult.Box box : result.getBoxes()) {
+ if (box.getScore() < detectConfidenceThreshold) {
+ continue;
+ }
+
+ String text = box.getCategory() + ":" + String.valueOf(box.getScore()).substring(0, 4);
+ Scalar roiColor = colormap[box.getCategoryId()];
+ double font_scale = 0.5;
+ int thickness = 1;
+ int font_face = Core.FONT_HERSHEY_SIMPLEX;
+
+ Point roiXyMin = new Point(box.getCoordinate()[0],box.getCoordinate()[1]);
+ Point roiXyMax = new Point(box.getCoordinate()[2],box.getCoordinate()[3]);
+ Size text_size = Imgproc.getTextSize(text, font_face,font_scale, thickness,null);
+ Imgproc.rectangle(visualizeMat, roiXyMin, roiXyMax, roiColor,2);
+
+ Point textXyMin = new Point(box.getCoordinate()[0],box.getCoordinate()[1]-text_size.height);
+ Point textXyMax = new Point(box.getCoordinate()[0]+text_size.width,box.getCoordinate()[1]);
+ Imgproc.rectangle(visualizeMat,textXyMin, textXyMax, roiColor,-1);
+ Imgproc.putText(visualizeMat,
+ text,
+ roiXyMin,
+ font_face,
+ font_scale,
+ new Scalar(255, 255, 255));
+ }
+ return visualizeMat;
+ }
+
+ public Mat draw(SegResult result, Mat visualizeMat, ImageBlob imageBlob, int cutoutClass) {
+ int new_h = (int)imageBlob.getNewImageSize()[2];
+ int new_w = (int)imageBlob.getNewImageSize()[3];
+ Mat mask = new Mat(new_h, new_w, CvType.CV_8UC(1));
+
+ for (int h = 0; h < new_h; h++) {
+ for (int w = 0; w < new_w; w++){
+ mask.put(h , w, (1-result.getMask().getScoreData()[cutoutClass + h * new_h + w]) * 255);
+ }
+ }
+ ListIterator> reverseReshapeInfo = new ArrayList>(imageBlob.getReshapeInfo().entrySet()).listIterator(imageBlob.getReshapeInfo().size());
+ while (reverseReshapeInfo.hasPrevious()) {
+ Map.Entry entry = reverseReshapeInfo.previous();
+ if (entry.getKey().equalsIgnoreCase("padding")) {
+ Rect crop_roi = new Rect(0, 0, entry.getValue()[0], entry.getValue()[1]);
+ mask = mask.submat(crop_roi);
+ } else if (entry.getKey().equalsIgnoreCase("resize")) {
+ Size sz = new Size(entry.getValue()[0], entry.getValue()[1]);
+ Imgproc.resize(mask, mask, sz,0,0,Imgproc.INTER_LINEAR);
+ }
+ Log.i(TAG, "postprocess operator: " + entry.getKey());
+ Log.i(TAG, "shape:: " + String.valueOf(mask.width()) + ","+ String.valueOf(mask.height()));
+ }
+
+ Mat dst = new Mat();
+ List listMat = Arrays.asList(visualizeMat, mask);
+ Core.merge(listMat, dst);
+
+ return dst;
+ }
+}
diff --git a/deploy/lite/android/sdk/src/main/res/values/strings.xml b/deploy/lite/android/sdk/src/main/res/values/strings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..1f207644c2963126a573a7e8e694ef198d7c4976
--- /dev/null
+++ b/deploy/lite/android/sdk/src/main/res/values/strings.xml
@@ -0,0 +1,3 @@
+
+ PaddleX
+
diff --git a/deploy/lite/android/sdk/src/test/java/com/example/paddlex/ExampleUnitTest.java b/deploy/lite/android/sdk/src/test/java/com/example/paddlex/ExampleUnitTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..7bff08d0b041cd9f4002debb918a602682d88fcc
--- /dev/null
+++ b/deploy/lite/android/sdk/src/test/java/com/example/paddlex/ExampleUnitTest.java
@@ -0,0 +1,17 @@
+package com.example.paddlex;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Example local unit test, which will execute on the development machine (host).
+ *
+ * @see Testing documentation
+ */
+public class ExampleUnitTest {
+ @Test
+ public void addition_isCorrect() {
+ assertEquals(4, 2 + 2);
+ }
+}
\ No newline at end of file
diff --git a/deploy/lite/export_lite.py b/deploy/lite/export_lite.py
index 0286d8733868dfbbaceadbfcf7d6728e367341df..85276c8b59b1994712fb66d061bbdfa10359e251 100644
--- a/deploy/lite/export_lite.py
+++ b/deploy/lite/export_lite.py
@@ -21,7 +21,8 @@ def export_lite():
opt = lite.Opt()
model_file = os.path.join(FLAGS.model_dir, '__model__')
params_file = os.path.join(FLAGS.model_dir, '__params__')
- opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_file)
+ opt.run_optimize("", model_file, params_file, 'naive_buffer', FLAGS.place,
+ FLAGS.save_file)
if __name__ == '__main__':
diff --git a/docs/FAQ.md b/docs/FAQ.md
deleted file mode 100755
index e25faab5ad9e230f34f1790db0dcf24fba3328e6..0000000000000000000000000000000000000000
--- a/docs/FAQ.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# 常见问题
-
-## 1. 训练参数如何调整
-> 参考[参数调整文档](appendix/parameters.md)
-
-## 2. 训练过程因显存不够出错
-
-> 通过使用在终端`nvidia-smi`命令,查看GPU是否被其它任务占用,尝试清除其它任务;
-> 调低训练时的`batch_size`参数,从而降低显存的要求,注意需等比例调低`learning_rate`等参数;
-> 选用更小的模型或backbone。
-
-## 3. 是否有更小的模型,适用于更低配置的设备上运行
-> 可以使用模型裁剪,参考文档[模型裁剪使用教程](slim/prune.md),通过调整裁剪参数,可以控制模型裁剪后的大小,在实际实验中,如VOC检测数据,使用yolov3-mobilenet,原模型大小为XXM,裁剪后为XX M,精度基本保持不变
-
-## 4. 如何配置训练时GPU的卡数
-> 通过在终端export环境变量,或在Python代码中设置,可参考文档[CPU/多卡GPU训练](appendix/gpu_configure.md)
-
-## 5. 想将之前训练的模型参数上继续训练
-> 在训练调用`train`接口时,将`pretrain_weights`设为之前的模型保存路径即可
-
-
-## 6. PaddleX保存的模型分为正常训练过程中产生、裁剪训练产生、导出为部署模型和量化保存这么多种,有什么差别,怎么区分
-
-**不同模型的功能差异**
-
->1.正常模型训练保存
->
->>模型在正常训练过程,每间隔n个epoch保存的模型目录,模型可作为预训练模型参数,可使用PaddleX加载预测、或导出部署模型
-
->2.裁剪训练保存
->
->>模型在裁剪训练过程,每间隔n个epoch保存的模型目录,模型不可作为预训练模型参数,可使用PaddleX加载预测、或导出部署模型
-
->3.导出部署模型
->
->>为了模型在服务端部署,导出的模型目录,不可作为预训练模型参数,可使用PaddleX加载预测
-
->4.量化保存模型
->
->>为了提升模型预测速度,将模型参数进行量化保存的模型目录,模型不可作为预训练模型参数,可使用PaddleX加载预测
-
-**区分方法**
->> 通过模型目录下model.yml文件中`status`字段来区别不同的模型类型, 'Normal'、'Prune'、'Infer'、'Quant'分别表示正常模型训练保存、裁剪训练保存、导出的部署模型、量化保存模型
-
-
-## 7. 模型训练需要太久时间,或者训练速度太慢,怎么提速
-> 1.模型训练速度与用户选定的模型大小,和设定的`batch_size`相关,模型大小可直接参考[模型库](model_zoo.md)中的指标,一般而言,模型越大,训练速度就越慢;
-
-> 2.在模型速度之外,模型训练完成所需的时间又与用户设定的`num_epochs`迭代轮数相关,用户可以通过观察模型在验证集上的指标来决定是否提示结束掉训练进程(训练时设定`save_interval_epochs`参数,训练过程会每间隔`save_interval_epochs`轮数在验证集上计算指标,并保存模型);
-
-## 8. 如何设定迭代的轮数
-> 1. 用户自行训练时,如不确定迭代的轮数,可以将轮数设高一些,同时注意设置`save_interval_epochs`,这样模型迭代每间隔相应轮数就会在验证集上进行评估和保存,可以根据不同轮数模型在验证集上的评估指标,判断模型是否已经收敛,若模型已收敛,可以自行结束训练进程
->
-## 9. 只有CPU,没有GPU,如何提升训练速度
-> 当没有GPU时,可以根据自己的CPU配置,选择是否使用多CPU进行训练,具体配置方式可以参考文档[多卡CPU/GPU训练](appendix/gpu_configure.md)
->
-## 10. 电脑不能联网,训练时因为下载预训练模型失败,如何解决
-> 可以预先通过其它方式准备好预训练模型,然后训练时自定义`pretrain_weights`即可,可参考文档[无联网模型训练](how_to_offline_run.md)
-
-## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定
-> 1.可以按照9的方式来解决这个问题
-> 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载
-
-## 12. PaddleX GUI启动时提示"Failed to execute script PaddleX",如何解决?
-> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。
-> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。
-> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决
-> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功
diff --git a/docs/README.md b/docs/README.md
old mode 100755
new mode 100644
diff --git a/docs/apis/datasets.md b/docs/apis/datasets.md
new file mode 100644
index 0000000000000000000000000000000000000000..3494aaa8a66840cecc16a79454b22bd252ab117b
--- /dev/null
+++ b/docs/apis/datasets.md
@@ -0,0 +1,142 @@
+# 数据集读取
+
+## paddlex.datasets.ImageNet
+> **用于图像分类模型**
+```
+paddlex.datasets.ImageNet(data_dir, file_list, label_list, transforms=None, num_workers=‘auto’, buffer_size=100, parallel_method='thread', shuffle=False)
+```
+读取ImageNet格式的分类数据集,并对样本进行相应的处理。ImageNet数据集格式的介绍可查看文档:[数据集格式说明](../data/format/index.html)
+
+示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/image_classification/mobilenetv2.py)
+
+> **参数**
+
+> > * **data_dir** (str): 数据集所在的目录路径。
+> > * **file_list** (str): 描述数据集图片文件和类别id的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
+> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
+> > * **transforms** (paddlex.cls.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.cls.transforms](./transforms/cls_transforms.md)。
+> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
+> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
+> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+
+## paddlex.datasets.VOCDetection
+> **用于目标检测模型**
+```
+paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, num_workers=‘auto’, buffer_size=100, parallel_method='thread', shuffle=False)
+```
+
+> 读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../data/format/index.html)
+
+> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/object_detection/yolov3_darknet53.py)
+
+> **参数**
+
+> > * **data_dir** (str): 数据集所在的目录路径。
+> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
+> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
+> > * **transforms** (paddlex.det.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.det.transforms](./transforms/det_transforms.md)。
+> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
+> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
+> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+
+## paddlex.datasets.CocoDetection
+> **用于实例分割/目标检测模型**
+```
+paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
+```
+
+> 读取MSCOCO格式的检测数据集,并对样本进行相应的处理,该格式的数据集同样可以应用到实例分割模型的训练中。MSCOCO数据集格式的介绍可查看文档:[数据集格式说明](../data/format/index.html)
+
+> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/instance_segmentation/mask_rcnn_r50_fpn.py)
+
+> **参数**
+
+> > * **data_dir** (str): 数据集所在的目录路径。
+> > * **ann_file** (str): 数据集的标注文件,为一个独立的json格式文件。
+> > * **transforms** (paddlex.det.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.det.transforms](./transforms/det_transforms.md)。
+> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
+> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
+> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+
+## paddlex.datasets.SegDataset
+> **用于语义分割模型**
+```
+paddlex.datasets.SegDataset(data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
+```
+
+> 读取语义分割任务数据集,并对样本进行相应的处理。语义分割任务数据集格式的介绍可查看文档:[数据集格式说明](../data/format/index.html)
+
+> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/semantic_segmentation/unet.py)
+
+> **参数**
+
+> > * **data_dir** (str): 数据集所在的目录路径。
+> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
+> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
+> > * **transforms** (paddlex.seg.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.seg.transforms](./transforms/seg_transforms.md)。
+> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
+> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
+> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+
+## paddlex.datasets.EasyDataCls
+> **用于图像分类模型**
+```
+paddlex.datasets.EasyDataCls(data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
+```
+
+> 读取EasyData平台标注图像分类数据集,并对样本进行相应的处理。
+
+> **参数**
+
+> > * **data_dir** (str): 数据集所在的目录路径。
+> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
+> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
+> > * **transforms** (paddlex.seg.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.cls.transforms](./transforms/cls_transforms.md)。
+> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
+> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
+> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+
+## paddlex.datasets.EasyDataDet
+> 用于**目标检测/实例分割模型**
+```
+paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, num_workers=‘auto’, buffer_size=100, parallel_method='thread', shuffle=False)
+```
+
+> 读取EasyData目标检测/实例分割格式数据集,并对样本进行相应的处理,该格式的数据集同样可以应用到实例分割模型的训练中。
+
+
+> **参数**
+
+> > * **data_dir** (str): 数据集所在的目录路径。
+> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
+> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
+> > * **transforms** (paddlex.det.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.det.transforms](./transforms/det_transforms.md)。
+> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
+> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
+> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+
+## paddlex.datasets.EasyDataSeg
+> **用于语义分割模型**
+```
+paddlex.datasets.EasyDataSeg(data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
+```
+
+> 读取EasyData语义分割任务数据集,并对样本进行相应的处理。
+
+
+> **参数**
+
+> > * **data_dir** (str): 数据集所在的目录路径。
+> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
+> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
+> > * **transforms** (paddlex.seg.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.seg.transforms](./transforms/seg_transforms.md)。
+> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
+> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
+> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
diff --git a/docs/apis/datasets/classification.md b/docs/apis/datasets/classification.md
deleted file mode 100755
index 104bdf2dab80acfa8f1de1ef8ee522a126ddb7cc..0000000000000000000000000000000000000000
--- a/docs/apis/datasets/classification.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# 图像分类数据集
-
-## ImageNet类
-```
-paddlex.datasets.ImageNet(data_dir, file_list, label_list, transforms=None, num_workers=‘auto’, buffer_size=100, parallel_method='thread', shuffle=False)
-```
-读取ImageNet格式的分类数据集,并对样本进行相应的处理。ImageNet数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
-
-示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/classification/mobilenetv2.py#L25)
-
-> **参数**
-
-> > * **data_dir** (str): 数据集所在的目录路径。
-> > * **file_list** (str): 描述数据集图片文件和类别id的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
-> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
-> > * **transforms** (paddlex.cls.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.cls.transforms](./transforms/cls_transforms.md)。
-> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
-> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
-> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
-
-## EasyDataCls类
-```
-paddlex.datasets.EasyDatasetCls(data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
-```
-
-> 读取EasyData平台标注图像分类数据集,并对样本进行相应的处理。EasyData图像分类任务数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)。
-
-> **参数**
-
-> > * **data_dir** (str): 数据集所在的目录路径。
-> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
-> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
-> > * **transforms** (paddlex.seg.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.cls.transforms](./transforms/cls_transforms.md)。
-> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
-> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
-> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
diff --git a/docs/apis/datasets/dataset_convert.md b/docs/apis/datasets/dataset_convert.md
deleted file mode 100644
index 2a04c8013bb5ea32b761fa28dad7fa9a6e09db99..0000000000000000000000000000000000000000
--- a/docs/apis/datasets/dataset_convert.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# 数据集转换
-## labelme2voc
-```python
-pdx.tools.labelme2voc(image_dir, json_dir, dataset_save_dir)
-```
-将LabelMe标注的数据集转换为VOC数据集。
-
-> **参数**
-> > * **image_dir** (str): 图像文件存放的路径。
-> > * **json_dir** (str): 与每张图像对应的json文件的存放路径。
-> > * **dataset_save_dir** (str): 转换后数据集存放路径。
-
-## 其它数据集转换
-### easydata2imagenet
-```python
-pdx.tools.easydata2imagenet(image_dir, json_dir, dataset_save_dir)
-```
-### easydata2voc
-```python
-pdx.tools.easydata2voc(image_dir, json_dir, dataset_save_dir)
-```
-### easydata2coco
-```python
-pdx.tools.easydata2coco(image_dir, json_dir, dataset_save_dir)
-```
-### easydata2seg
-```python
-pdx.tools.easydata2seg(image_dir, json_dir, dataset_save_dir)
-```
-### labelme2coco
-```python
-pdx.tools.labelme2coco(image_dir, json_dir, dataset_save_dir)
-```
-### labelme2seg
-```python
-pdx.tools.labelme2seg(image_dir, json_dir, dataset_save_dir)
-```
-### jingling2seg
-```python
-pdx.tools.jingling2seg(image_dir, json_dir, dataset_save_dir)
-```
-
diff --git a/docs/apis/datasets/detection.md b/docs/apis/datasets/detection.md
deleted file mode 100755
index a32b6be5de6246ef6e28ebe376ded7e3faf82ff7..0000000000000000000000000000000000000000
--- a/docs/apis/datasets/detection.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# 检测和实例分割数据集
-
-## VOCDetection类
-
-```
-paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, num_workers=‘auto’, buffer_size=100, parallel_method='thread', shuffle=False)
-```
-
-> 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
-
-> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29)
-
-> **参数**
-
-> > * **data_dir** (str): 数据集所在的目录路径。
-> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
-> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
-> > * **transforms** (paddlex.det.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.det.transforms](./transforms/det_transforms.md)。
-> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
-> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
-> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
-
-> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
-> ```
-> add_negative_samples(image_dir)
-> ```
-> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
-
-> > **参数**
-
-> > > * **image_dir** (str): 背景图片所在的目录路径。
-
-## CocoDetection类
-
-```
-paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
-```
-
-> 用于**目标检测或实例分割**。读取MSCOCO格式的检测数据集,并对样本进行相应的处理,该格式的数据集同样可以应用到实例分割模型的训练中。MSCOCO数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
-
-> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/mask_rcnn_r50_fpn.py#L27)
-
-> **参数**
-
-> > * **data_dir** (str): 数据集所在的目录路径。
-> > * **ann_file** (str): 数据集的标注文件,为一个独立的json格式文件。
-> > * **transforms** (paddlex.det.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.det.transforms](./transforms/det_transforms.md)。
-> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
-> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
-> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
-
-> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
-> ```
-> add_negative_samples(image_dir)
-> ```
-> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
-
-> > **参数**
-
-> > > * **image_dir** (str): 背景图片所在的目录路径。
-
-## EasyDataDet类
-
-```
-paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, num_workers=‘auto’, buffer_size=100, parallel_method='thread', shuffle=False)
-```
-
-> 用于**目标检测或实例分割**。读取EasyData目标检测格式数据集,并对样本进行相应的处理,该格式的数据集同样可以应用到实例分割模型的训练中。EasyData目标检测或实例分割任务数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
-
-
-> **参数**
-
-> > * **data_dir** (str): 数据集所在的目录路径。
-> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
-> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
-> > * **transforms** (paddlex.det.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.det.transforms](./transforms/det_transforms.md)。
-> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
-> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
-> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
-
-
-> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可:
-> ```
-> add_negative_samples(image_dir)
-> ```
-> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
-
-> > **参数**
-
-> > > * **image_dir** (str): 背景图片所在的目录路径。
diff --git a/docs/apis/datasets/index.rst b/docs/apis/datasets/index.rst
deleted file mode 100755
index 06326e441d1573521d1030eaaeaca8364899b2c6..0000000000000000000000000000000000000000
--- a/docs/apis/datasets/index.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-数据集-datasets
-============================
-
-PaddleX目前支持主流的CV数据集格式和 `EasyData `_ 数据标注平台的标注数据格式,此外PaddleX也提升了数据格式转换工具API,支持包括LabelMe,精灵标注助手和EasyData平台数据格式的转换,可以参考PaddleX的tools API文档。
-
-下表为各数据集格式与相应任务的对应关系,
-
-+------------------------+------------+----------+----------+----------+
-| 数据集格式 | 图像分类 | 目标检测 | 实例分割 | 语义分割 |
-+========================+============+==========+==========+==========+
-| ImageNet | √ | - | - | - |
-+------------------------+------------+----------+----------+----------+
-| VOCDetection | - | √ | - | - |
-+------------------------+------------+----------+----------+----------+
-| CocoDetection | - | √ | √ | - |
-+------------------------+------------+----------+----------+----------+
-| SegDataset | - | - | - | √ |
-+------------------------+------------+----------+----------+----------+
-| EasyDataCls | √ | - | - | - |
-+------------------------+------------+----------+----------+----------+
-| EasyDataDet | - | √ | √ | - |
-+------------------------+------------+----------+----------+----------+
-| EasyDataSeg | - | - | - | √ |
-+------------------------+------------+----------+----------+----------+
-
-.. toctree::
- :maxdepth: 2
-
- classification.md
- detection.md
- semantic_segmentation.md
- dataset_convert.md
diff --git a/docs/apis/datasets/semantic_segmentation.md b/docs/apis/datasets/semantic_segmentation.md
deleted file mode 100755
index 7aa4c21af7e1ebee850b185dea4f5d052abad167..0000000000000000000000000000000000000000
--- a/docs/apis/datasets/semantic_segmentation.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# 语义分割数据集
-
-## SegDataset类
-
-```
-paddlex.datasets.SegDataset(data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
-```
-
-> 读取语义分割任务数据集,并对样本进行相应的处理。语义分割任务数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
-
-> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/segmentation/unet.py#L27)
-
-> **参数**
-
-> > * **data_dir** (str): 数据集所在的目录路径。
-> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
-> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
-> > * **transforms** (paddlex.seg.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.seg.transforms](./transforms/seg_transforms.md)。
-> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
-> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
-> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
-
-## EasyDataSeg类
-
-```
-paddlex.datasets.EasyDataSeg(data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False)
-```
-
-> 读取EasyData语义分割任务数据集,并对样本进行相应的处理。EasyData语义分割任务数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
-
-
-> **参数**
-
-> > * **data_dir** (str): 数据集所在的目录路径。
-> > * **file_list** (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对`data_dir`的相对路径)。
-> > * **label_list** (str): 描述数据集包含的类别信息文件路径。
-> > * **transforms** (paddlex.seg.transforms): 数据集中每个样本的预处理/增强算子,详见[paddlex.seg.transforms](./transforms/seg_transforms.md)。
-> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
-> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
-> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
diff --git a/docs/apis/deploy.md b/docs/apis/deploy.md
old mode 100755
new mode 100644
index 4ddc0d90a5a2ac17ab9c9154bddf2421489fd8a7..dd6812452b09b54fd2cf8def2f0085d3dff603d4
--- a/docs/apis/deploy.md
+++ b/docs/apis/deploy.md
@@ -4,35 +4,52 @@
## Predictor类
+图像分类、目标检测、实例分割、语义分割统一的预测器,实现高性能预测。
+
```
paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_trt=False, use_glog=False, memory_optimize=True)
```
-> **参数**
+**参数**
-> > * **model_dir**: 训练过程中保存的模型路径, 注意需要使用导出的inference模型
-> > * **use_gpu**: 是否使用GPU进行预测
-> > * **gpu_id**: 使用的GPU序列号
-> > * **use_mkl**: 是否使用mkldnn加速库
-> > * **use_trt**: 是否使用TensorRT预测引擎
-> > * **use_glog**: 是否打印中间日志
-> > * **memory_optimize**: 是否优化内存使用
-
-> > ### 示例
-> >
-> > ```
-> > import paddlex
-> >
-> > model = paddlex.deploy.Predictor(model_dir, use_gpu=True)
-> > result = model.predict(image_file)
-> > ```
+> * **model_dir** (str): 导出为inference格式的模型路径。
+> * **use_gpu** (bool): 是否使用GPU进行预测。
+> * **gpu_id** (int): 使用的GPU序列号。
+> * **use_mkl** (bool): 是否使用mkldnn加速库。
+> * **use_trt** (boll): 是否使用TensorRT预测引擎。
+> * **use_glog** (bool): 是否打印中间日志。
+> * **memory_optimize** (bool): 是否优化内存使用。
-### predict 接口
+> ### 示例
+>
> ```
-> predict(image, topk=1)
+> import paddlex
+>
+> model = paddlex.deploy.Predictor(model_dir, use_gpu=True)
+> result = model.predict(image_file)
> ```
-> **参数
+### predict 接口
+
+```
+predict(image, topk=1)
+```
+
+单张图片预测接口。
+
+> **参数**
+>
+> > * **image** (str|np.ndarray): 待预测的图片路径或numpy数组(HWC排列,BGR格式)。
+> > * **topk** (int): 图像分类时使用的参数,表示预测前topk个可能的分类
-* **image(str|np.ndarray)**: 待预测的图片路径或np.ndarray,若为后者需注意为BGR格式
-* **topk(int)**: 图像分类时使用的参数,表示预测前topk个可能的分类
+### batch_predict 接口
+```
+batch_predict(image_list, topk=1, thread_num=2)
+```
+批量图片预测接口。
+
+> **参数**
+>
+> > * **image_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径或numpy数组(HWC排列,BGR格式)。
+> > * **topk** (int): 图像分类时使用的参数,表示预测前topk个可能的分类。
+> > * **thread_num** (int): 并发执行各图像预处理时的线程数。
diff --git a/docs/apis/index.rst b/docs/apis/index.rst
index d58e90d93fd0e5ae78476f9ce0841ca190675f11..57a035122717982bb4ce77d1073eacf51d5e380a 100755
--- a/docs/apis/index.rst
+++ b/docs/apis/index.rst
@@ -1,13 +1,12 @@
-PaddleX API说明文档
+API接口说明
============================
.. toctree::
:maxdepth: 2
transforms/index.rst
- datasets/index.rst
+ datasets.md
models/index.rst
slim.md
- load_model.md
visualize.md
- deploy.md
+ interpret.md
diff --git a/docs/apis/interpret.md b/docs/apis/interpret.md
new file mode 100644
index 0000000000000000000000000000000000000000..24ab99a1a17b2ebee2dc4388843e5d81176038ac
--- /dev/null
+++ b/docs/apis/interpret.md
@@ -0,0 +1,58 @@
+# 模型可解释性
+
+目前PaddleX支持对于图像分类的结果以可视化的方式进行解释,支持LIME和NormLIME两种可解释性算法。
+
+## paddlex.interpret.lime
+> **LIME可解释性结果可视化**
+```
+paddlex.interpret.lime(img_file,
+ model,
+ num_samples=3000,
+ batch_size=50,
+ save_dir='./')
+```
+使用LIME算法将模型预测结果的可解释性可视化。
+LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心,在其附近的空间中进行随机采样,每个采样通过原模型得到新的输出,这样得到一系列的输入和对应的输出,LIME用一个简单的、可解释的模型(比如线性回归模型)来拟合这个映射关系,得到每个输入维度的权重,以此来解释模型。
+
+**注意:** 可解释性结果可视化目前只支持分类模型。
+
+### 参数
+>* **img_file** (str): 预测图像路径。
+>* **model** (paddlex.cv.models): paddlex中的模型。
+>* **num_samples** (int): LIME用于学习线性模型的采样数,默认为3000。
+>* **batch_size** (int): 预测数据batch大小,默认为50。
+>* **save_dir** (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
+
+
+### 使用示例
+> 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/lime.py)。
+
+
+## paddlex.interpret.normlime
+> **NormLIME可解释性结果可视化**
+```
+paddlex.interpret.normlime(img_file,
+ model,
+ dataset=None,
+ num_samples=3000,
+ batch_size=50,
+ save_dir='./',
+ normlime_weights_file=None)
+```
+使用NormLIME算法将模型预测结果的可解释性可视化。
+NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大,此处采用一种简化的方式:使用一定数量的测试样本(目前默认使用所有测试样本),对每个样本进行特征提取,映射到同一个特征空间;然后以此特征做为输入,以模型输出做为输出,使用线性回归对其进行拟合,得到一个全局的输入和输出的关系。之后,对一测试样本进行解释时,使用NormLIME全局的解释,来对LIME的结果进行滤波,使最终的可视化结果更加稳定。
+
+**注意:** 可解释性结果可视化目前只支持分类模型。
+
+### 参数
+>* **img_file** (str): 预测图像路径。
+>* **model** (paddlex.cv.models): paddlex中的模型。
+>* **dataset** (paddlex.datasets): 数据集读取器,默认为None。
+>* **num_samples** (int): LIME用于学习线性模型的采样数,默认为3000。
+>* **batch_size** (int): 预测数据batch大小,默认为50。
+>* **save_dir** (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
+>* **normlime_weights_file** (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。
+
+**注意:** dataset`读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。NormLIME可解释性结果可视化目前只支持分类模型。
+### 使用示例
+> 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)。
diff --git a/docs/apis/load_model.md b/docs/apis/load_model.md
index 0e460ee583c8bb4030d202372dae40882b7b34dd..e325279a3f3db5c9e832d18fec78dcf4fac6e167 100755
--- a/docs/apis/load_model.md
+++ b/docs/apis/load_model.md
@@ -1,8 +1,9 @@
-# 模型加载-load_model
+# 模型加载
PaddleX提供了统一的模型加载接口,支持加载PaddleX保存的模型,并在验证集上进行评估或对测试图片进行预测
-## 函数接口
+## paddlex.load_model
+> **加载PaddleX保存的模型**
```
paddlex.load_model(model_dir)
diff --git a/docs/apis/models/classification.md b/docs/apis/models/classification.md
index b70b555a7007b77851af22ddd4a775a4b3a8f93b..0890b334e7f13efc2b17ce49d79eefe59ad589d3 100755
--- a/docs/apis/models/classification.md
+++ b/docs/apis/models/classification.md
@@ -1,6 +1,6 @@
-# 图像分类
+# Image Classification
-## ResNet50类
+## paddlex.cls.ResNet50
```python
paddlex.cls.ResNet50(num_classes=1000)
@@ -12,7 +12,7 @@ paddlex.cls.ResNet50(num_classes=1000)
> - **num_classes** (int): 类别数。默认为1000。
-### train 训练接口
+### train
```python
train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, warmup_steps=0, warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
@@ -41,7 +41,7 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-### evaluate 评估接口
+### evaluate
```python
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False)
@@ -59,7 +59,7 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False)
> > - **dict**: 当return_details为False时,返回dict, 包含关键字:'acc1'、'acc5',分别表示最大值的accuracy、前5个最大值的accuracy。
> > - **tuple** (metrics, eval_details): 当`return_details`为True时,增加返回dict,包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。
-### predict 预测接口
+### predict
```python
predict(self, img_file, transforms=None, topk=5)
@@ -69,7 +69,7 @@ predict(self, img_file, transforms=None, topk=5)
> **参数**
>
-> > - **img_file** (str): 预测图像路径。
+> > - **img_file** (str|np.ndarray): 预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.cls.transforms): 数据预处理操作。
> > - **topk** (int): 预测时前k个最大值。
@@ -78,117 +78,52 @@ predict(self, img_file, transforms=None, topk=5)
> > - **list**: 其中元素均为字典。字典的关键字为'category_id'、'category'、'score',
> > 分别对应预测类别id、预测类别标签、预测得分。
-## 其它分类器类
+### batch_predict
-PaddleX提供了共计22种分类器,所有分类器均提供同`ResNet50`相同的训练`train`,评估`evaluate`和预测`predict`接口,各模型效果可参考[模型库](https://paddlex.readthedocs.io/zh_CN/latest/appendix/model_zoo.html)。
-
-### ResNet18
-```python
-paddlex.cls.ResNet18(num_classes=1000)
-```
-
-### ResNet34
-```python
-paddlex.cls.ResNet34(num_classes=1000)
-```
-
-
-### ResNet50
-```python
-paddlex.cls.ResNet50(num_classes=1000)
-```
-
-### ResNet50_vd
-```python
-paddlex.cls.ResNet50_vd(num_classes=1000)
-```
-
-### ResNet50_vd_ssld
-```python
-paddlex.cls.ResNet50_vd_ssld(num_classes=1000)
-```
-
-### ResNet101
```python
-paddlex.cls.ResNet101(num_classes=1000)
+batch_predict(self, img_file_list, transforms=None, topk=5, thread_num=2)
```
-### ResNet101_vd
-```python
-paddlex.cls.ResNet101_vdnum_classes=1000)
-```
-
-### ResNet101_vd_ssld
-```python
-paddlex.cls.ResNet101_vd_ssld(num_classes=1000)
-```
-
-### DarkNet53
-```python
-paddlex.cls.DarkNet53(num_classes=1000)
-```
-
-### MobileNetV1
-```python
-paddlex.cls.MobileNetV1(num_classes=1000)
-```
-
-### MobileNetV2
-```python
-paddlex.cls.MobileNetV2(num_classes=1000)
-```
-
-### MobileNetV3_small
-```python
-paddlex.cls.MobileNetV3_small(num_classes=1000)
-```
+> 分类模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`ResNet50.test_transforms`和`ResNet50.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
-### MobileNetV3_small_ssld
-```python
-paddlex.cls.MobileNetV3_small_ssld(num_classes=1000)
-```
-
-### MobileNetV3_large
-```python
-paddlex.cls.MobileNetV3_large(num_classes=1000)
-```
-
-### MobileNetV3_large_ssld
-```python
-paddlex.cls.MobileNetV3_large_ssld(num_classes=1000)
-```
-
-### Xception65
-```python
-paddlex.cls.Xception65(num_classes=1000)
-```
-
-### Xception71
-```python
-paddlex.cls.Xception71(num_classes=1000)
-```
-
-### ShuffleNetV2
-```python
-paddlex.cls.ShuffleNetV2(num_classes=1000)
-```
-
-### DenseNet121
-```python
-paddlex.cls.DenseNet121(num_classes=1000)
-```
-
-### DenseNet161
-```python
-paddlex.cls.DenseNet161(num_classes=1000)
-```
-
-### DenseNet201
-```python
-paddlex.cls.DenseNet201(num_classes=1000)
-```
+> **参数**
+>
+> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径或numpy数组(HWC排列,BGR格式)。
+> > - **transforms** (paddlex.cls.transforms): 数据预处理操作。
+> > - **topk** (int): 预测时前k个最大值。
+> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
-### HRNet_W18
-```python
-paddlex.cls.HRNet_W18(num_classes=1000)
-```
+> **返回值**
+>
+> > - **list**: 每个元素都为列表,表示各图像的预测结果。在各图像的预测列表中,其中元素均为字典。字典的关键字为'category_id'、'category'、'score',分别对应预测类别id、预测类别标签、预测得分。
+
+
+## 其它分类模型
+
+PaddleX提供了共计22种分类模型,所有分类模型均提供同`ResNet50`相同的训练`train`,评估`evaluate`和预测`predict`接口,各模型效果可参考[模型库](https://paddlex.readthedocs.io/zh_CN/latest/appendix/model_zoo.html)。
+
+| 模型 | 接口 |
+| :---------------- | :---------------------- |
+| ResNet18 | paddlex.cls.ResNet18(num_classes=1000) |
+| ResNet34 | paddlex.cls.ResNet34(num_classes=1000) |
+| ResNet50 | paddlex.cls.ResNet50(num_classes=1000) |
+| ResNet50_vd | paddlex.cls.ResNet50_vd(num_classes=1000) |
+| ResNet50_vd_ssld | paddlex.cls.ResNet50_vd_ssld(num_classes=1000) |
+| ResNet101 | paddlex.cls.ResNet101(num_classes=1000) |
+| ResNet101_vd | paddlex.cls.ResNet101_vd(num_classes=1000) |
+| ResNet101_vd_ssld | paddlex.cls.ResNet101_vd_ssld(num_classes=1000) |
+| DarkNet53 | paddlex.cls.DarkNet53(num_classes=1000) |
+| MoibileNetV1 | paddlex.cls.MobileNetV1(num_classes=1000) |
+| MobileNetV2 | paddlex.cls.MobileNetV2(num_classes=1000) |
+| MobileNetV3_small | paddlex.cls.MobileNetV3_small(num_classes=1000) |
+| MobileNetV3_small_ssld | paddlex.cls.MobileNetV3_small_ssld(num_classes=1000) |
+| MobileNetV3_large | paddlex.cls.MobileNetV3_large(num_classes=1000) |
+| MobileNetV3_large_ssld | paddlex.cls.MobileNetV3_large_ssld(num_classes=1000) |
+| Xception65 | paddlex.cls.Xception65(num_classes=1000) |
+| Xception71 | paddlex.cls.Xception71(num_classes=1000) |
+| ShuffleNetV2 | paddlex.cls.ShuffleNetV2(num_classes=1000) |
+| DenseNet121 | paddlex.cls.DenseNet121(num_classes=1000) |
+| DenseNet161 | paddlex.cls.DenseNet161(num_classes=1000) |
+| DenseNet201 | paddlex.cls.DenseNet201(num_classes=1000) |
+| HRNet_W18 | paddlex.cls.HRNet_W18(num_classes=1000) |
+| AlexNet | paddlex.cls.AlexNet(num_classes=1000) |
diff --git a/docs/apis/models/detection.md b/docs/apis/models/detection.md
index f76e5598636f6c8ac94b90acca7fe1c846708077..1afe5ff3b12d2f6286bc7f858ad914a7fade1f51 100755
--- a/docs/apis/models/detection.md
+++ b/docs/apis/models/detection.md
@@ -1,6 +1,6 @@
-# 目标检测
+# Object Detection
-## YOLOv3类
+## paddlex.det.YOLOv3
```python
paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_masks=None, ignore_threshold=0.7, nms_score_threshold=0.01, nms_topk=1000, nms_keep_topk=100, nms_iou_threshold=0.45, label_smooth=False, train_random_shapes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608])
@@ -25,7 +25,7 @@ paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_
> > - **label_smooth** (bool): 是否使用label smooth。默认值为False。
> > - **train_random_shapes** (list|tuple): 训练时从列表中随机选择图像大小。默认值为[320, 352, 384, 416, 448, 480, 512, 544, 576, 608]。
-### train 训练接口
+### train
```python
train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, save_interval_epochs=20, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=1.0/8000, warmup_steps=1000, warmup_start_lr=0.0, lr_decay_epochs=[213, 240], lr_decay_gamma=0.1, metric=None, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
@@ -57,7 +57,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-### evaluate 评估接口
+### evaluate
```python
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_details=False)
@@ -77,7 +77,7 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_de
>
> > - **tuple** (metrics, eval_details) | **dict** (metrics): 当`return_details`为True时,返回(metrics, eval_details),当`return_details`为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'或者’bbox_map‘,分别表示平均准确率平均值在各个阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
-### predict 预测接口
+### predict
```python
predict(self, img_file, transforms=None)
@@ -87,7 +87,7 @@ predict(self, img_file, transforms=None)
> **参数**
>
-> > - **img_file** (str): 预测图像路径。
+> > - **img_file** (str|np.ndarray): 预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
>
> **返回值**
@@ -95,7 +95,27 @@ predict(self, img_file, transforms=None)
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key包括'bbox', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。
-## FasterRCNN类
+### batch_predict
+
+```python
+batch_predict(self, img_file_list, transforms=None, thread_num=2)
+```
+
+> YOLOv3模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`YOLOv3.test_transforms`和`YOLOv3.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义`test_transforms`传入给`predict`接口
+
+> **参数**
+>
+> > - **img_file_list** (str|np.ndarray): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。
+> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
+> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
+>
+> **返回值**
+>
+> > - **list**: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个元素均为一个dict,key包括'bbox', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。
+
+
+
+## paddlex.det.FasterRCNN
```python
paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_ratios=[0.5, 1.0, 2.0], anchor_sizes=[32, 64, 128, 256, 512])
@@ -112,7 +132,7 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec
> > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
> > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
-### train 训练接口
+### train
```python
train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2,save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.0025, warmup_steps=500, warmup_start_lr=1.0/1200, lr_decay_epochs=[8, 11], lr_decay_gamma=0.1, metric=None, use_vdl=False, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
@@ -142,7 +162,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-### evaluate 接口
+### evaluate
```python
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_details=False)
@@ -162,7 +182,7 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_de
>
> > - **tuple** (metrics, eval_details) | **dict** (metrics): 当`return_details`为True时,返回(metrics, eval_details),当`return_details`为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'或者’bbox_map‘,分别表示平均准确率平均值在各个IoU阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
-### predict 预测接口
+### predict
```python
predict(self, img_file, transforms=None)
@@ -172,9 +192,28 @@ predict(self, img_file, transforms=None)
> **参数**
>
-> > - **img_file** (str): 预测图像路径。
+> > - **img_file** (str|np.ndarray): 预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
>
> **返回值**
>
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key包括'bbox', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。
+
+
+### batch_predict
+
+```python
+batch_predict(self, img_file_list, transforms=None, thread_num=2)
+```
+
+> FasterRCNN模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`FasterRCNN.test_transforms`和`FasterRCNN.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
+
+> **参数**
+>
+> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素是预测图像路径或numpy数组(HWC排列,BGR格式)。
+> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
+> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
+>
+> **返回值**
+>
+> > - **list**: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个元素均为一个dict,key包括'bbox', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。
diff --git a/docs/apis/models/index.rst b/docs/apis/models/index.rst
index 5e533189cd44759cb2002e64bf1a0a9b066cfc6e..2cf02d9f011c95ab0f0325dab33b7e9025f4f533 100755
--- a/docs/apis/models/index.rst
+++ b/docs/apis/models/index.rst
@@ -1,10 +1,10 @@
-模型集-models
+视觉模型集
============================
PaddleX目前支持 `四种视觉任务解决方案 <../../cv_solutions.html>`_ ,包括图像分类、目标检测、实例分割和语义分割。对于每种视觉任务,PaddleX又提供了1种或多种模型,用户可根据需求及应用场景选取。
.. toctree::
- :maxdepth: 2
+ :maxdepth: 3
classification.md
detection.md
diff --git a/docs/apis/models/instance_segmentation.md b/docs/apis/models/instance_segmentation.md
index 72d008b2252a0df73648941d8dbee9d6f8a8764a..3ab5a9b81c5808600efc1d1e63bc4480237ab5f7 100755
--- a/docs/apis/models/instance_segmentation.md
+++ b/docs/apis/models/instance_segmentation.md
@@ -1,6 +1,6 @@
-# 实例分割
+# Instance Segmentation
-## MaskRCNN类
+## MaskRCNN
```python
paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_ratios=[0.5, 1.0, 2.0], anchor_sizes=[32, 64, 128, 256, 512])
@@ -17,7 +17,7 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_
> > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
> > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
-#### train 训练接口
+#### train
```python
train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, save_interval_epochs=1, log_interval_steps=20, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=1.0/800, warmup_steps=500, warmup_start_lr=1.0 / 2400, lr_decay_epochs=[8, 11], lr_decay_gamma=0.1, metric=None, use_vdl=False, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
@@ -47,7 +47,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-#### evaluate 评估接口
+#### evaluate
```python
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_details=False)
@@ -67,7 +67,7 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_de
>
> > - **tuple** (metrics, eval_details) | **dict** (metrics): 当`return_details`为True时,返回(metrics, eval_details),当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'和'segm_mmap'或者’bbox_map‘和'segm_map',分别表示预测框和分割区域平均准确率平均值在各个IoU阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。eval_details为dict,包含关键字:'bbox',对应元素预测框结果列表,每个预测结果由图像id、预测框类别id、预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。
-#### predict 预测接口
+#### predict
```python
predict(self, img_file, transforms=None)
@@ -77,9 +77,28 @@ predict(self, img_file, transforms=None)
> **参数**
>
-> > - **img_file** (str): 预测图像路径。
+> > - **img_file** (str|np.ndarray): 预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
>
> **返回值**
>
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景。
+
+
+#### batch_predict
+
+```python
+batch_predict(self, img_file_list, transforms=None, thread_num=2)
+```
+
+> MaskRCNN模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在FasterRCNN.test_transforms和FasterRCNN.eval_transforms中。如未在训练时定义eval_dataset,那在调用预测predict接口时,用户需要再重新定义test_transforms传入给predict接口。
+
+> **参数**
+>
+> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是预测图像路径或numpy数组(HWC排列,BGR格式)。
+> > - **transforms** (paddlex.det.transforms): 数据预处理操作。
+> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
+>
+> **返回值**
+>
+> > - **list**: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景。
diff --git a/docs/apis/models/semantic_segmentation.md b/docs/apis/models/semantic_segmentation.md
index 3ff66337fe64b35f29a2a7985cea040fcb233d82..d0ccd54ce2e21eed6adc2db252e788a955566cfe 100755
--- a/docs/apis/models/semantic_segmentation.md
+++ b/docs/apis/models/semantic_segmentation.md
@@ -1,6 +1,6 @@
-# 语义分割
+# Semantic Segmentation
-## DeepLabv3p类
+## paddlex.seg.DeepLabv3p
```python
paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride=16, aspp_with_sep_conv=True, decoder_use_sep_conv=True, encoder_with_aspp=True, enable_decoder=True, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
@@ -23,7 +23,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
-### train 训练接口
+### train
```python
train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
@@ -51,7 +51,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-### evaluate 评估接口
+### evaluate
```python
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
@@ -73,109 +73,66 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
> > - **tuple** (metrics, eval_details):当`return_details`为True时,增加返回dict (eval_details),
> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
-### predict 预测接口
+### predict
```
-predict(self, im_file, transforms=None):
+predict(self, img_file, transforms=None):
```
> DeepLabv3p模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`DeepLabv3p.test_transforms`和`DeepLabv3p.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
> **参数**
> >
-> > - **img_file** (str): 预测图像路径。
+> > - **img_file** (str|np.ndarray): 预测图像路径或numpy数组(HWC排列,BGR格式)。
> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
-
+> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
-## UNet类
-
-```python
-paddlex.seg.UNet(num_classes=2, upsample_mode='bilinear', use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
-```
-
-> 构建UNet分割器。
-
-> **参数**
-
-> > - **num_classes** (int): 类别数。
-> > - **upsample_mode** (str): UNet decode时采用的上采样方式,取值为'bilinear'时利用双线行差值进行上菜样,当输入其他选项时则利用反卷积进行上菜样,默认为'bilinear'。
-> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
-> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
-> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
-> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
-### train 训练接口
+### batch_predict
-```python
-train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='COCO', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
```
-
-> UNet模型训练接口。
-
-> **参数**
-> >
-> > - **num_epochs** (int): 训练迭代轮数。
-> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
-> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
-> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
-> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
-> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
-> > - **save_dir** (str): 模型保存路径。默认'output'
-> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'COCO',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。
-> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
-> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
-> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
-> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
-> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
-> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
-> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
-> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
-> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-
-#### evaluate 评估接口
-
-```
-evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
+batch_predict(self, img_file_list, transforms=None):
```
-> UNet模型评估接口。
+> DeepLabv3p模型批量预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`DeepLabv3p.test_transforms`和`DeepLabv3p.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
> **参数**
> >
-> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
-> > - **batch_size** (int): 评估时的batch大小。默认1。
-> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
-> > - **return_details** (bool): 是否返回详细信息。默认False。
+> > - **img_file_list** (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是预测图像路径或numpy数组(HWC排列,BGR格式)。
+> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
> **返回值**
> >
-> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
-> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
-> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
-> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
+> > - **dict**: 每个元素都为列表,表示各图像的预测结果。各图像的预测结果用字典表示,包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
-#### predict 预测接口
-```
-predict(self, im_file, transforms=None):
+
+## paddlex.seg.UNet
+
+```python
+paddlex.seg.UNet(num_classes=2, upsample_mode='bilinear', use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
```
-> UNet模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
+> 构建UNet分割器。
> **参数**
-> >
-> > - **img_file** (str): 预测图像路径。
-> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
-> **返回值**
-> >
-> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
+> > - **num_classes** (int): 类别数。
+> > - **upsample_mode** (str): UNet decode时采用的上采样方式,取值为'bilinear'时利用双线行差值进行上菜样,当输入其他选项时则利用反卷积进行上菜样,默认为'bilinear'。
+> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
+> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
+> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
+> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
+> - train 训练接口说明同 [DeepLabv3p模型train接口](#train)
+> - evaluate 评估接口说明同 [DeepLabv3p模型evaluate接口](#evaluate)
+> - predict 预测接口说明同 [DeepLabv3p模型predict接口](#predict)
+> - batch_predict 批量预测接口说明同 [DeepLabv3p模型predict接口](#batch-predict)
-## HRNet类
+## paddlex.seg.HRNet
```python
paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
@@ -192,75 +149,12 @@ paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=Fal
> > - **class_weight** (list|str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
-### train 训练接口
-
-```python
-train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
-```
-
-> HRNet模型训练接口。
-
-> **参数**
-> >
-> > - **num_epochs** (int): 训练迭代轮数。
-> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
-> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
-> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
-> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
-> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
-> > - **save_dir** (str): 模型保存路径。默认'output'
-> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重(注意:目前仅提供`width`取值为18的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。
-> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
-> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
-> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
-> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
-> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
-> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
-> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
-> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
-> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-
-#### evaluate 评估接口
-
-```
-evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
-```
-
-> HRNet模型评估接口。
-
-> **参数**
-> >
-> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
-> > - **batch_size** (int): 评估时的batch大小。默认1。
-> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
-> > - **return_details** (bool): 是否返回详细信息。默认False。
-
-> **返回值**
-> >
-> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
-> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
-> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
-> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
-
-#### predict 预测接口
+> - train 训练接口说明同 [DeepLabv3p模型train接口](#train)
+> - evaluate 评估接口说明同 [DeepLabv3p模型evaluate接口](#evaluate)
+> - predict 预测接口说明同 [DeepLabv3p模型predict接口](#predict)
+> - batch_predict 批量预测接口说明同 [DeepLabv3p模型predict接口](#batch-predict)
-```
-predict(self, im_file, transforms=None):
-```
-
-> HRNet模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
-
-> **参数**
-> >
-> > - **img_file** (str): 预测图像路径。
-> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
-
-> **返回值**
-> >
-> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
-
-
-## FastSCNN类
+## paddlex.seg.FastSCNN
```python
paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0])
@@ -277,69 +171,7 @@ paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, cla
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
> > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
-### train 训练接口
-
-```python
-train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='CITYSCAPES', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
-```
-
-> FastSCNN模型训练接口。
-
-> **参数**
-> >
-> > - **num_epochs** (int): 训练迭代轮数。
-> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
-> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
-> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
-> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
-> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
-> > - **save_dir** (str): 模型保存路径。默认'output'
-> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'CITYSCAPES'。
-> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
-> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
-> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
-> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
-> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
-> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
-> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
-> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
-> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
-
-#### evaluate 评估接口
-
-```
-evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
-```
-
-> FastSCNN模型评估接口。
-
-> **参数**
-> >
-> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
-> > - **batch_size** (int): 评估时的batch大小。默认1。
-> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
-> > - **return_details** (bool): 是否返回详细信息。默认False。
-
-> **返回值**
-> >
-> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
-> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
-> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
-> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
-
-#### predict 预测接口
-
-```
-predict(self, im_file, transforms=None):
-```
-
-> FastSCNN模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
-
-> **参数**
-> >
-> > - **img_file** (str): 预测图像路径。
-> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
-
-> **返回值**
-> >
-> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
+> - train 训练接口说明同 [DeepLabv3p模型train接口](#train)
+> - evaluate 评估接口说明同 [DeepLabv3p模型evaluate接口](#evaluate)
+> - predict 预测接口说明同 [DeepLabv3p模型predict接口](#predict)
+> - batch_predict 批量预测接口说明同 [DeepLabv3p模型predict接口](#batch-predict)
diff --git a/docs/apis/slim.md b/docs/apis/slim.md
index 39557f531f391eb3ff4e3050c2829e5a2ff95f5f..23c2d5ee7026320bfe0e1da33028c833be11dfc5 100755
--- a/docs/apis/slim.md
+++ b/docs/apis/slim.md
@@ -1,6 +1,7 @@
-# 模型压缩-slim
+# 模型压缩
-## 计算参数敏感度
+## paddlex.slim.cal_params_sensitivities
+> **计算参数敏感度**
```
paddlex.slim.cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8)
```
@@ -8,7 +9,7 @@ paddlex.slim.cal_params_sensitivities(model, save_file, eval_dataset, batch_size
1. 获取模型中可裁剪卷积Kernel的名称。
2. 计算每个可裁剪卷积Kernel不同裁剪率下的敏感度。
【注意】卷积的敏感度是指在不同裁剪率下评估数据集预测精度的损失,通过得到的敏感度,可以决定最终模型需要裁剪的参数列表和各裁剪参数对应的裁剪率。
-[查看使用示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/classification/cal_sensitivities_file.py#L33) [查看裁剪教程](../tutorials/compress/classification.md)
+[查看使用示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/classification/cal_sensitivities_file.py#L33)
### 参数
@@ -18,7 +19,8 @@ paddlex.slim.cal_params_sensitivities(model, save_file, eval_dataset, batch_size
* **batch_size** (int): 评估时的batch_size大小。
-## 导出量化模型
+## paddlex.slim.export_quant_model
+> **导出量化模型**
```
paddlex.slim.export_quant_model(model, test_dataset, batch_size=2, batch_num=10, save_dir='./quant_model', cache_dir='./temp')
```
diff --git a/docs/apis/transforms/cls_transforms.md b/docs/apis/transforms/cls_transforms.md
index 7d124b9bed4445eb7a216587cde8a35532f54a48..e0507cf017f5272782e549e1ea42eb9f4790dc09 100755
--- a/docs/apis/transforms/cls_transforms.md
+++ b/docs/apis/transforms/cls_transforms.md
@@ -1,56 +1,18 @@
-# 图像分类-cls.transforms
+# paddlex.cls.transforms
对图像分类任务的数据进行操作。可以利用[Compose](#compose)类将图像预处理/增强操作进行组合。
-## Compose类
+## Compose
```python
paddlex.cls.transforms.Compose(transforms)
```
根据数据预处理/增强算子对输入数据进行操作。 [使用示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/classification/mobilenetv2.py#L13)
-### 参数
-* **transforms** (list): 数据预处理/数据增强列表。
-
-
-## RandomCrop类
-```python
-paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3)
-```
-
-对图像进行随机剪裁,模型训练时的数据增强操作。
-1. 根据lower_scale、lower_ratio、upper_ratio计算随机剪裁的高、宽。
-2. 根据随机剪裁的高、宽随机选取剪裁的起始点。
-3. 剪裁图像。
-4. 调整剪裁后的图像的大小到crop_size*crop_size。
-
-### 参数
-* **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。
-* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
-* **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。
-* **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。
-
-## RandomHorizontalFlip类
-```python
-paddlex.cls.transforms.RandomHorizontalFlip(prob=0.5)
-```
-
-以一定的概率对图像进行随机水平翻转,模型训练时的数据增强操作。
-
-### 参数
-* **prob** (float): 随机水平翻转的概率。默认为0.5。
-
-## RandomVerticalFlip类
-```python
-paddlex.cls.transforms.RandomVerticalFlip(prob=0.5)
-```
-
-以一定的概率对图像进行随机垂直翻转,模型训练时的数据增强操作。
-
-### 参数
-* **prob** (float): 随机垂直翻转的概率。默认为0.5。
+> **参数**
+> * **transforms** (list): 数据预处理/数据增强列表。
-## Normalize类
+## Normalize
```python
paddlex.cls.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
@@ -63,7 +25,7 @@ paddlex.cls.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
* **mean** (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
-## ResizeByShort类
+## ResizeByShort
```python
paddlex.cls.transforms.ResizeByShort(short_size=256, max_size=-1)
```
@@ -79,7 +41,7 @@ paddlex.cls.transforms.ResizeByShort(short_size=256, max_size=-1)
* **short_size** (int): 调整大小后的图像目标短边长度。默认为256。
* **max_size** (int): 长边目标长度的最大限制。默认为-1。
-## CenterCrop类
+## CenterCrop
```python
paddlex.cls.transforms.CenterCrop(crop_size=224)
```
@@ -91,7 +53,44 @@ paddlex.cls.transforms.CenterCrop(crop_size=224)
### 参数
* **crop_size** (int): 裁剪的目标边长。默认为224。
-## RandomRotate类
+## RandomCrop
+```python
+paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3)
+```
+
+对图像进行随机剪裁,模型训练时的数据增强操作。
+1. 根据lower_scale、lower_ratio、upper_ratio计算随机剪裁的高、宽。
+2. 根据随机剪裁的高、宽随机选取剪裁的起始点。
+3. 剪裁图像。
+4. 调整剪裁后的图像的大小到crop_size*crop_size。
+
+### 参数
+* **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。
+* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
+* **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。
+* **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。
+
+## RandomHorizontalFlip
+```python
+paddlex.cls.transforms.RandomHorizontalFlip(prob=0.5)
+```
+
+以一定的概率对图像进行随机水平翻转,模型训练时的数据增强操作。
+
+### 参数
+* **prob** (float): 随机水平翻转的概率。默认为0.5。
+
+## RandomVerticalFlip
+```python
+paddlex.cls.transforms.RandomVerticalFlip(prob=0.5)
+```
+
+以一定的概率对图像进行随机垂直翻转,模型训练时的数据增强操作。
+
+### 参数
+* **prob** (float): 随机垂直翻转的概率。默认为0.5。
+
+## RandomRotate
```python
paddlex.cls.transforms.RandomRotate(rotate_range=30, prob=0.5)
```
@@ -102,7 +101,7 @@ paddlex.cls.transforms.RandomRotate(rotate_range=30, prob=0.5)
* **rotate_range** (int): 旋转度数的范围。默认为30。
* **prob** (float): 随机旋转的概率。默认为0.5。
-## RandomDistort类
+## RandomDistort
```python
paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5, contrast_range=0.9, contrast_prob=0.5, saturation_range=0.9, saturation_prob=0.5, hue_range=18, hue_prob=0.5)
```
@@ -123,15 +122,16 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5,
* **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。
-## ComposedClsTransforms类
+
diff --git a/docs/apis/transforms/det_transforms.md b/docs/apis/transforms/det_transforms.md
index 3b182a1e4eeb7fdbe2d40c7530989d54919d8ec2..9388c01c544649d254d38fb37850a17fb65e8e06 100755
--- a/docs/apis/transforms/det_transforms.md
+++ b/docs/apis/transforms/det_transforms.md
@@ -1,8 +1,8 @@
-# 检测和实例分割-det.transforms
+# paddlex.det.transforms
-对目标检测任务的数据进行操作。可以利用[Compose](#compose)类将图像预处理/增强操作进行组合。
+对目标检测/实例分割任务的数据进行操作。可以利用[Compose](#compose)类将图像预处理/增强操作进行组合。
-## Compose类
+## Compose
```python
paddlex.det.transforms.Compose(transforms)
```
@@ -12,7 +12,20 @@ paddlex.det.transforms.Compose(transforms)
### 参数
* **transforms** (list): 数据预处理/数据增强列表。
-## ResizeByShort类
+## Normalize
+```python
+paddlex.det.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+```
+
+对图像进行标准化。
+1. 归一化图像到到区间[0.0, 1.0]。
+2. 对图像进行减均值除以标准差操作。
+
+### 参数
+* **mean** (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
+* **std** (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
+
+## ResizeByShort
```python
paddlex.det.transforms.ResizeByShort(short_size=800, max_size=1333)
```
@@ -28,7 +41,7 @@ paddlex.det.transforms.ResizeByShort(short_size=800, max_size=1333)
* **short_size** (int): 短边目标长度。默认为800。
* **max_size** (int): 长边目标长度的最大限制。默认为1333。
-## Padding类
+## Padding
```python
paddlex.det.transforms.Padding(coarsest_stride=1)
```
@@ -41,7 +54,7 @@ paddlex.det.transforms.Padding(coarsest_stride=1)
### 参数
* **coarsest_stride** (int): 填充后的图像长、宽为该参数的倍数,默认为1。
-## Resize类
+## Resize
```python
paddlex.det.transforms.Resize(target_size=608, interp='LINEAR')
```
@@ -55,7 +68,7 @@ paddlex.det.transforms.Resize(target_size=608, interp='LINEAR')
* **target_size** (int/list/tuple): 短边目标长度。默认为608。
* **interp** (str): resize的插值方式,与opencv的插值方式对应,取值范围为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
-## RandomHorizontalFlip类
+## RandomHorizontalFlip
```python
paddlex.det.transforms.RandomHorizontalFlip(prob=0.5)
```
@@ -65,20 +78,7 @@ paddlex.det.transforms.RandomHorizontalFlip(prob=0.5)
### 参数
* **prob** (float): 随机水平翻转的概率。默认为0.5。
-## Normalize类
-```python
-paddlex.det.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-```
-
-对图像进行标准化。
-1. 归一化图像到到区间[0.0, 1.0]。
-2. 对图像进行减均值除以标准差操作。
-
-### 参数
-* **mean** (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
-* **std** (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
-
-## RandomDistort类
+## RandomDistort
```python
paddlex.det.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5, contrast_range=0.5, contrast_prob=0.5, saturation_range=0.5, saturation_prob=0.5, hue_range=18, hue_prob=0.5)
```
@@ -99,7 +99,7 @@ paddlex.det.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5,
* **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。
-## MixupImage类
+## MixupImage
```python
paddlex.det.transforms.MixupImage(alpha=1.5, beta=1.5, mixup_epoch=-1)
```
@@ -141,7 +141,7 @@ paddlex.det.transforms.RandomExpand(ratio=4., prob=0.5, fill_value=[123.675, 116
【注意】该数据增强必须在数据增强Resize、ResizeByShort之前使用。
-## RandomCrop类
+## RandomCrop
```python
paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3, .5, .7, .9], scaling=[.3, 1.], num_attempts=50, allow_no_crop=True, cover_all_box=False)
```
@@ -168,14 +168,15 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3,
* **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。
* **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
-## ComposedRCNNTransforms类
+
diff --git a/docs/apis/transforms/index.rst b/docs/apis/transforms/index.rst
index 0a2be9860a32e56b6e1e6b31aa12ab22332e6785..2264fb610a03aee0631986912eaa7ce2e82e7478 100755
--- a/docs/apis/transforms/index.rst
+++ b/docs/apis/transforms/index.rst
@@ -1,4 +1,4 @@
-数据处理-transforms
+数据处理与增强
============================
transforms为PaddleX的模型训练提供了数据的预处理和数据增强接口。
diff --git a/docs/apis/transforms/seg_transforms.md b/docs/apis/transforms/seg_transforms.md
index 264af5c472cb824865188a5386a513e5a00fe0ba..0d49fbf8421bc458314569d185e47dec732f1f33 100755
--- a/docs/apis/transforms/seg_transforms.md
+++ b/docs/apis/transforms/seg_transforms.md
@@ -1,9 +1,9 @@
-# 语义分割-seg.transforms
+# paddlex.seg.transforms
对用于分割任务的数据进行操作。可以利用[Compose](#compose)类将图像预处理/增强操作进行组合。
-## Compose类
+## Compose
```python
paddlex.seg.transforms.Compose(transforms)
```
@@ -12,7 +12,7 @@ paddlex.seg.transforms.Compose(transforms)
* **transforms** (list): 数据预处理/数据增强列表。
-## RandomHorizontalFlip类
+## RandomHorizontalFlip
```python
paddlex.seg.transforms.RandomHorizontalFlip(prob=0.5)
```
@@ -21,7 +21,7 @@ paddlex.seg.transforms.RandomHorizontalFlip(prob=0.5)
* **prob** (float): 随机水平翻转的概率。默认值为0.5。
-## RandomVerticalFlip类
+## RandomVerticalFlip
```python
paddlex.seg.transforms.RandomVerticalFlip(prob=0.1)
```
@@ -30,7 +30,7 @@ paddlex.seg.transforms.RandomVerticalFlip(prob=0.1)
* **prob** (float): 随机垂直翻转的概率。默认值为0.1。
-## Resize类
+## Resize
```python
paddlex.seg.transforms.Resize(target_size, interp='LINEAR')
```
@@ -46,7 +46,7 @@ paddlex.seg.transforms.Resize(target_size, interp='LINEAR')
可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。
-## ResizeByLong类
+## ResizeByLong
```python
paddlex.seg.transforms.ResizeByLong(long_size)
```
@@ -55,7 +55,7 @@ paddlex.seg.transforms.ResizeByLong(long_size)
* **long_size** (int): resize后图像的长边大小。
-## ResizeRangeScaling类
+## ResizeRangeScaling
```python
paddlex.seg.transforms.ResizeRangeScaling(min_value=400, max_value=600)
```
@@ -65,7 +65,7 @@ paddlex.seg.transforms.ResizeRangeScaling(min_value=400, max_value=600)
* **max_value** (int): 图像长边resize后的最大值。默认值600。
-## ResizeStepScaling类
+## ResizeStepScaling
```python
paddlex.seg.transforms.ResizeStepScaling(min_scale_factor=0.75, max_scale_factor=1.25, scale_step_size=0.25)
```
@@ -76,7 +76,7 @@ paddlex.seg.transforms.ResizeStepScaling(min_scale_factor=0.75, max_scale_factor
* **scale_step_size** (float), resize尺度范围间隔。默认值0.25。
-## Normalize类
+## Normalize
```python
paddlex.seg.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
```
@@ -89,7 +89,7 @@ paddlex.seg.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
* **std** (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
-## Padding类
+## Padding
```python
paddlex.seg.transforms.Padding(target_size, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
```
@@ -100,7 +100,7 @@ paddlex.seg.transforms.Padding(target_size, im_padding_value=[127.5, 127.5, 127.
* **label_padding_value** (int): 标注图像padding的值。默认值为255(仅在训练时需要设定该参数)。
-## RandomPaddingCrop类
+## RandomPaddingCrop
```python
paddlex.seg.transforms.RandomPaddingCrop(crop_size=512, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
```
@@ -111,7 +111,7 @@ paddlex.seg.transforms.RandomPaddingCrop(crop_size=512, im_padding_value=[127.5,
* **label_padding_value** (int): 标注图像padding的值。默认值为255。
-## RandomBlur类
+## RandomBlur
```python
paddlex.seg.transforms.RandomBlur(prob=0.1)
```
@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1)
* **prob** (float): 图像模糊概率。默认为0.1。
-## RandomRotate类
+## RandomRotate
```python
paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
```
@@ -134,7 +134,7 @@ paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 12
* **label_padding_value** (int): 标注图像padding的值。默认为255。
-## RandomScaleAspect类
+## RandomScaleAspect
```python
paddlex.seg.transforms.RandomScaleAspect(min_scale=0.5, aspect_ratio=0.33)
```
@@ -146,7 +146,7 @@ paddlex.seg.transforms.RandomScaleAspect(min_scale=0.5, aspect_ratio=0.33)
* **aspect_ratio** (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。
-## RandomDistort类
+## RandomDistort
```python
paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5, contrast_range=0.5, contrast_prob=0.5, saturation_range=0.5, saturation_prob=0.5, hue_range=18, hue_prob=0.5)
```
@@ -167,26 +167,29 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5,
* **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。
-## ComposedSegTransforms类
+
diff --git a/docs/apis/visualize.md b/docs/apis/visualize.md
index 2cdc96844758128545ffe3a1ebf815476cae1090..f04e86acc8ec1cb82355bac508952438a714d8b0 100755
--- a/docs/apis/visualize.md
+++ b/docs/apis/visualize.md
@@ -1,33 +1,59 @@
-# 可视化-visualize
+# 预测结果可视化
+
PaddleX提供了一系列模型预测和结果分析的可视化函数。
-## 目标检测/实例分割预测结果可视化
+## paddlex.det.visualize
+> **目标检测/实例分割预测结果可视化**
```
paddlex.det.visualize(image, result, threshold=0.5, save_dir='./')
```
将目标检测/实例分割模型预测得到的Box框和Mask在原图上进行可视化。
### 参数
-> * **image** (str): 原图文件路径。
+> * **image** (str|np.ndarray): 原图文件路径或numpy数组(HWC排列,BGR格式)。
> * **result** (str): 模型预测结果。
> * **threshold**(float): score阈值,将Box置信度低于该阈值的框过滤不进行可视化。默认0.5
> * **save_dir**(str): 可视化结果保存路径。若为None,则表示不保存,该函数将可视化的结果以np.ndarray的形式返回;若设为目录路径,则将可视化结果保存至该目录下。默认值为'./'。
### 使用示例
-> 点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/xiaoduxiong_epoch_12.tar.gz)和[测试图片](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong.jpeg)
+> 点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/xiaoduxiong_epoch_12.tar.gz)
```
import paddlex as pdx
model = pdx.load_model('xiaoduxiong_epoch_12')
-result = model.predict('xiaoduxiong.jpeg')
-pdx.det.visualize('xiaoduxiong.jpeg', result, save_dir='./')
+result = model.predict('./xiaoduxiong_epoch_12/xiaoduxiong.jpeg')
+pdx.det.visualize('./xiaoduxiong_epoch_12/xiaoduxiong.jpeg', result, save_dir='./')
# 预测结果保存在./visualize_xiaoduxiong.jpeg
```
+## paddlex.seg.visualize
+> **语义分割模型预测结果可视化**
+```
+paddlex.seg.visualize(image, result, weight=0.6, save_dir='./')
+```
+将语义分割模型预测得到的Mask在原图上进行可视化。
-## 目标检测/实例分割准确率-召回率可视化
+### 参数
+> * **image** (str|np.ndarray): 原图文件路径或numpy数组(HWC排列,BGR格式)。
+> * **result** (str): 模型预测结果。
+> * **weight**(float): mask可视化结果与原图权重因子,weight表示原图的权重。默认0.6。
+> * **save_dir**(str): 可视化结果保存路径。若为None,则表示不保存,该函数将可视化的结果以np.ndarray的形式返回;若设为目录路径,则将可视化结果保存至该目录下。默认值为'./'。
+
+### 使用示例
+> 点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/cityscape_deeplab.tar.gz)和[测试图片](https://bj.bcebos.com/paddlex/datasets/city.png)
+```
+import paddlex as pdx
+model = pdx.load_model('cityscape_deeplab')
+result = model.predict('city.png')
+pdx.det.visualize('city.png', result, save_dir='./')
+# 预测结果保存在./visualize_city.png
+```
+
+## paddlex.det.draw_pr_curve
+> **目标检测/实例分割准确率-召回率可视化**
```
paddlex.det.draw_pr_curve(eval_details_file=None, gt=None, pred_bbox=None, pred_mask=None, iou_thresh=0.5, save_dir='./')
```
将目标检测/实例分割模型评估结果中各个类别的准确率和召回率的对应关系进行可视化,同时可视化召回率和置信度阈值的对应关系。
+> 注:PaddleX在训练过程中保存的模型目录中,均包含`eval_result.json`文件,可将此文件路径传给`eval_details_file`参数,设定`iou_threshold`即可得到对应模型在验证集上的PR曲线图。
### 参数
> * **eval_details_file** (str): 模型评估结果的保存路径,包含真值信息和预测结果。默认值为None。
@@ -73,29 +99,9 @@ pdx.det.draw_pr_curve(gt=gt, pred_bbox=bbox, save_dir='./insect')
预测框的各个类别的准确率和召回率的对应关系、召回率和置信度阈值的对应关系可视化如下:
.png)
-## 语义分割预测结果可视化
-```
-paddlex.seg.visualize(image, result, weight=0.6, save_dir='./')
-```
-将语义分割模型预测得到的Mask在原图上进行可视化。
-
-### 参数
-> * **image** (str): 原图文件路径。
-> * **result** (str): 模型预测结果。
-> * **weight**(float): mask可视化结果与原图权重因子,weight表示原图的权重。默认0.6。
-> * **save_dir**(str): 可视化结果保存路径。若为None,则表示不保存,该函数将可视化的结果以np.ndarray的形式返回;若设为目录路径,则将可视化结果保存至该目录下。默认值为'./'。
-### 使用示例
-> 点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/cityscape_deeplab.tar.gz)和[测试图片](https://bj.bcebos.com/paddlex/datasets/city.png)
-```
-import paddlex as pdx
-model = pdx.load_model('cityscape_deeplab')
-result = model.predict('city.png')
-pdx.det.visualize('city.png', result, save_dir='./')
-# 预测结果保存在./visualize_city.png
-```
-
-## 模型裁剪比例可视化分析
+## paddlex.slim.visualzie
+> **模型裁剪比例可视化分析**
```
paddlex.slim.visualize(model, sensitivities_file)
```
@@ -114,64 +120,11 @@ pdx.slim.visualize(model, 'mobilenetv2.sensitivities', save_dir='./')
# 可视化结果保存在./sensitivities.png
```
-## LIME可解释性结果可视化
-```
-paddlex.interpret.lime(img_file,
- model,
- num_samples=3000,
- batch_size=50,
- save_dir='./')
-```
-使用LIME算法将模型预测结果的可解释性可视化。
-LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心,在其附近的空间中进行随机采样,每个采样通过原模型得到新的输出,这样得到一系列的输入和对应的输出,LIME用一个简单的、可解释的模型(比如线性回归模型)来拟合这个映射关系,得到每个输入维度的权重,以此来解释模型。
-
-**注意:** 可解释性结果可视化目前只支持分类模型。
-
-### 参数
->* **img_file** (str): 预测图像路径。
->* **model** (paddlex.cv.models): paddlex中的模型。
->* **num_samples** (int): LIME用于学习线性模型的采样数,默认为3000。
->* **batch_size** (int): 预测数据batch大小,默认为50。
->* **save_dir** (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
-
-
-### 使用示例
-> 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/lime.py)。
-
-
-## NormLIME可解释性结果可视化
-```
-paddlex.interpret.normlime(img_file,
- model,
- dataset=None,
- num_samples=3000,
- batch_size=50,
- save_dir='./',
- normlime_weights_file=None)
-```
-使用NormLIME算法将模型预测结果的可解释性可视化。
-NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大,此处采用一种简化的方式:使用一定数量的测试样本(目前默认使用所有测试样本),对每个样本进行特征提取,映射到同一个特征空间;然后以此特征做为输入,以模型输出做为输出,使用线性回归对其进行拟合,得到一个全局的输入和输出的关系。之后,对一测试样本进行解释时,使用NormLIME全局的解释,来对LIME的结果进行滤波,使最终的可视化结果更加稳定。
-
-**注意:** 可解释性结果可视化目前只支持分类模型。
-
-### 参数
->* **img_file** (str): 预测图像路径。
->* **model** (paddlex.cv.models): paddlex中的模型。
->* **dataset** (paddlex.datasets): 数据集读取器,默认为None。
->* **num_samples** (int): LIME用于学习线性模型的采样数,默认为3000。
->* **batch_size** (int): 预测数据batch大小,默认为50。
->* **save_dir** (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
->* **normlime_weights_file** (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。
-
-**注意:** dataset`读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。NormLIME可解释性结果可视化目前只支持分类模型。
-### 使用示例
-> 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)。
-
-
-## 数据预处理/增强过程可视化
+## paddlex.transforms.visualize
+> **数据预处理/增强过程可视化**
```
-paddlex.transforms.visualize(dataset,
- img_count=3,
+paddlex.transforms.visualize(dataset,
+ img_count=3,
save_dir='vdl_output')
```
对数据预处理/增强中间结果进行可视化。
@@ -183,4 +136,4 @@ paddlex.transforms.visualize(dataset,
### 参数
>* **dataset** (paddlex.datasets): 数据集读取器。
>* **img_count** (int): 需要进行数据预处理/增强的图像数目。默认为3。
->* **save_dir** (str): 日志保存的路径。默认为'vdl_output'。
\ No newline at end of file
+>* **save_dir** (str): 日志保存的路径。默认为'vdl_output'。
diff --git a/docs/appendix/anaconda_install.md b/docs/appendix/anaconda_install.md
index 154be30928496da632ac5f67f3e7ee27fe05bc48..0484ea720bd944351efe1de63bce06d87d6429db 100755
--- a/docs/appendix/anaconda_install.md
+++ b/docs/appendix/anaconda_install.md
@@ -8,7 +8,7 @@ Anaconda是一个开源的Python发行版本,其包含了conda、Python等180
### 第二步 安装
运行下载的安装包(以.exe为后辍),根据引导完成安装, 用户可自行修改安装目录(如下图)
-
+
### 第三步 使用
- 点击Windows系统左下角的Windows图标,打开:所有程序->Anaconda3/2(64-bit)->Anaconda Prompt
diff --git a/docs/appendix/datasets.md b/docs/appendix/datasets.md
deleted file mode 100644
index e966205c1c39eb8e68d9366db324c984a8a42134..0000000000000000000000000000000000000000
--- a/docs/appendix/datasets.md
+++ /dev/null
@@ -1,367 +0,0 @@
-# 数据集格式说明
-
----
-## 图像分类ImageNet
-
-图像分类ImageNet数据集包含对应多个标签的图像文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--labelA # 标签为labelA的图像目录
-| |--a1.jpg
-| |--...
-| └--...
-|
-|--...
-|
-|--labelZ # 标签为labelZ的图像目录
-| |--z1.jpg
-| |--...
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为图像文件对应的标签id(从0开始)。如下所示:
-```
-labelA/a1.jpg 0
-labelZ/z1.jpg 25
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-labelA
-labelB
-...
-```
-[点击这里](https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz),下载蔬菜分类分类数据集。
-在PaddleX中,使用`paddlex.cv.datasets.ImageNet`([API说明](../apis/datasets/classification.html#imagenet))加载分类数据集。
-
-## 目标检测VOC
-目标检测VOC数据集包含图像文件夹、标注信息文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--JPEGImages # 图像目录
-| |--xxx1.jpg
-| |--...
-| └--...
-|
-|--Annotations # 标注信息目录
-| |--xxx1.xml
-| |--...
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注文件相对于dataset的相对路径。如下所示:
-```
-JPEGImages/xxx1.jpg Annotations/xxx1.xml
-JPEGImages/xxx2.jpg Annotations/xxx2.xml
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-labelA
-labelB
-...
-```
-[点击这里](https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz),下载昆虫检测数据集。
-在PaddleX中,使用`paddlex.cv.datasets.VOCDetection`([API说明](../apis/datasets/detection.html#vocdetection))加载目标检测VOC数据集。
-
-## 目标检测和实例分割COCO
-目标检测和实例分割COCO数据集包含图像文件夹及图像标注信息文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--JPEGImages # 图像目录
-| |--xxx1.jpg
-| |--...
-| └--...
-|
-|--train.json # 训练相关信息文件
-|
-└--val.json # 验证相关信息文件
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train.json`和`val.json`存储与标注信息、图像文件相关的信息。如下所示:
-
-```
-{
- "annotations": [
- {
- "iscrowd": 0,
- "category_id": 1,
- "id": 1,
- "area": 33672.0,
- "image_id": 1,
- "bbox": [232, 32, 138, 244],
- "segmentation": [[32, 168, 365, 117, ...]]
- },
- ...
- ],
- "images": [
- {
- "file_name": "xxx1.jpg",
- "height": 512,
- "id": 267,
- "width": 612
- },
- ...
- ]
- "categories": [
- {
- "name": "labelA",
- "id": 1,
- "supercategory": "component"
- }
- ]
-}
-```
-其中,每个字段的含义如下所示:
-
-| 域名 | 字段名 | 含义 | 数据类型 | 备注 |
-|:-----|:--------|:------------|------|:-----|
-| annotations | id | 标注信息id | int | 从1开始 |
-| annotations | iscrowd | 标注框是否为一组对象 | int | 只有0、1两种取值 |
-| annotations | category_id | 标注框类别id | int | |
-| annotations | area | 标注框的面积 | float | |
-| annotations | image_id | 当前标注信息所在图像的id | int | |
-| annotations | bbox | 标注框坐标 | list | 长度为4,分别代表x,y,w,h |
-| annotations | segmentation | 标注区域坐标 | list | list中有至少1个list,每个list由每个小区域坐标点的横纵坐标(x,y)组成 |
-| images | id | 图像id | int | 从1开始 |
-| images | file_name | 图像文件名 | str | |
-| images | height | 图像高度 | int | |
-| images | width | 图像宽度 | int | |
-| categories | id | 类别id | int | 从1开始 |
-| categories | name | 类别标签名 | str | |
-| categories | supercategory | 类别父类的标签名 | str | |
-
-
-[点击这里](https://bj.bcebos.com/paddlex/datasets/garbage_ins_det.tar.gz),下载垃圾实例分割数据集。
-在PaddleX中,使用`paddlex.cv.datasets.COCODetection`([API说明](../apis/datasets/detection.html#cocodetection))加载COCO格式数据集。
-
-## 语义分割数据
-语义分割数据集包含原图、标注图及相应的文件列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--images # 原图目录
-| |--xxx1.png
-| |--...
-| └--...
-|
-|--annotations # 标注图目录
-| |--xxx1.png
-| |--...
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注图像文件相对于dataset的相对路径。如下所示:
-```
-images/xxx1.png annotations/xxx1.png
-images/xxx2.png annotations/xxx2.png
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-background
-labelA
-labelB
-...
-```
-
-标注图像为单通道图像,像素值即为对应的类别,像素标注类别需要从0开始递增(一般第一个类别为`background`),
-例如0,1,2,3表示有4种类别,标注类别最多为256类。其中可以指定特定的像素值用于表示该值的像素不参与训练和评估(默认为255)。
-
-[点击这里](https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz),下载视盘语义分割数据集。
-在PaddleX中,使用`paddlex.cv.datasets.SegReader`([API说明](../apis/datasets/semantic_segmentation.html#segdataset))加载语义分割数据集。
-
-
-## 图像分类EasyDataCls
-
-图像分类EasyDataCls数据集包含存放图像和json文件的文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--easydata # 存放图像和json文件的文件夹
-| |--0001.jpg
-| |--0001.json
-| |--0002.jpg
-| |--0002.json
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,图像文件名应与json文件名一一对应。
-
-每个json文件存储于`labels`相关的信息。如下所示:
-```
-{"labels": [{"name": "labelA"}]}
-```
-其中,`name`字段代表对应图像的类别。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为json文件相对于dataset的相对路径。如下所示:
-```
-easydata/0001.jpg easydata/0001.json
-easydata/0002.jpg easydata/0002.json
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-labelA
-labelB
-...
-```
-[点击这里](https://ai.baidu.com/easydata/),可以标注图像分类EasyDataCls数据集。
-在PaddleX中,使用`paddlex.cv.datasets.EasyDataCls`([API说明](../apis/datasets/classification.html#easydatacls))加载分类数据集。
-
-
-## 目标检测和实例分割EasyDataDet
-
-目标检测和实例分割EasyDataDet数据集包含存放图像和json文件的文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录ß
-|--easydata # 存放图像和json文件的文件夹
-| |--0001.jpg
-| |--0001.json
-| |--0002.jpg
-| |--0002.json
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,图像文件名应与json文件名一一对应。
-
-每个json文件存储于`labels`相关的信息。如下所示:
-```
-"labels": [{"y1": 18, "x2": 883, "x1": 371, "y2": 404, "name": "labelA",
- "mask": "kVfc0`0Zg0 注: acck准确率是针对一张图片进行计算的:把模型在各个类别上的预测得分按从高往低进行排序,取出前k个预测类别,若这k个预测类别包含了真值类,则认为该图片分类正确。
-
+
上图中第1行中的`acc1`表示参与当前迭代步数的训练样本的平均top1准确率,值越高代表模型越优;`acc5`表示参与当前迭代步数的训练样本的平均top5(若类别数n少于5,则为topn)准确率,值越高代表模型越优。第4行中的`loss`表示整个训练集的平均损失函数值,`acc1`表示整个训练集的平均top1准确率,`acc5`表示整个训练集的平均top5准确率。
@@ -46,7 +46,7 @@ PaddleX所有模型在训练过程中会根据用户设定的`save_interval_epoc
### 评估日志字段
-
+
上图中第3行中的`acc1`表示整个验证集的平均top1准确率,`acc5`表示整个验证集的平均top5准确率。
@@ -59,7 +59,7 @@ PaddleX所有模型在训练过程中会根据用户设定的`save_interval_epoc
YOLOv3的训练日志只包括训练通用统计信息(见上文训练通用统计信息)。
-
+
上图中第5行`loss`表示整个训练集的平均损失函数loss值。
@@ -75,7 +75,7 @@ FasterRCNN的训练日志除了通用统计信息外,还包括`loss_cls`、`lo
| loss_rpn_bbox | RPN子网络中检测框回归损失函数值 |
| loss | 所有子网络损失函数值之和 |
-
+
上图中第1行`loss`, `loss_cls`、`loss_bbox`、`loss_rpn_clss`、`loss_rpn_bbox`都是参与当前迭代步数的训练样本的损失值,而第7行是针整个训练集的损失函数值。
@@ -93,7 +93,7 @@ MaskRCNN的训练日志除了通用统计信息外,还包括`loss_cls`、`loss
| loss_rpn_bbox | RPN子网络中检测框回归损失函数值 |
| loss | 所有子网络损失函数值之和 |
-
+
上图中第1行`loss`, `loss_cls`、`loss_bbox`、`loss_mask`、`loss_rpn_clss`、`loss_rpn_bbox`都是参与当前迭代步数的训练样本的损失值,而第7行是针整个训练集的损失函数值。
@@ -103,7 +103,7 @@ MaskRCNN的训练日志除了通用统计信息外,还包括`loss_cls`、`loss
#### VOC评估标准
-
+
> 注:`map`为平均准确率的平均值,即IoU(Intersection Over Union)取0.5时各个类别的准确率-召回率曲线下面积的平均值。
@@ -115,11 +115,11 @@ MaskRCNN的训练日志除了通用统计信息外,还包括`loss_cls`、`loss
COCO格式的数据集不仅可以用于训练目标检测模型,也可以用于训练实例分割模型。在目标检测中,PaddleX主要反馈针对检测框的`bbox_mmAP`指标;在实例分割中,还包括针对Mask的`seg_mmAP`指标。如下所示,第一张日志截图为目标检测的评估结果,第二张日志截图为实例分割的评估结果。
-
+
上图中红框标注的`bbox_mmap`表示整个验证集的检测框平均准确率平均值。
-
+
上图中红框标注的`bbox_mmap`和`seg_mmap`分别表示整个验证集的检测框平均准确率平均值、Mask平均准确率平均值。
## 分割特有统计信息
@@ -128,7 +128,7 @@ COCO格式的数据集不仅可以用于训练目标检测模型,也可以用
语义分割的训练日志只包括训练通用统计信息(见上文训练通用统计信息)。
-
+
### 评估日志字段
@@ -142,4 +142,4 @@ COCO格式的数据集不仅可以用于训练目标检测模型,也可以用
| category_acc | 各类别的准确率,即各类别预测正确的像素数/预测为该类别的总像素数 |
| kappa | kappa系数,用于一致性检验 |
-
+
diff --git a/docs/appendix/model_zoo.md b/docs/appendix/model_zoo.md
index f866b39173ead1c162e9e3ee722ae2ea2cb2afb3..811c6f745fba0de095a84f7a1b5ae0b1d526b6ec 100644
--- a/docs/appendix/model_zoo.md
+++ b/docs/appendix/model_zoo.md
@@ -1,7 +1,7 @@
# PaddleX模型库
## 图像分类模型
-> 表中模型相关指标均为在ImageNet数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40),预测速度为每张图片预测用时(不包括预处理和后处理),表中符号`-`表示相关指标暂未测试。
+> 表中模型相关指标均为在ImageNet数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40),预测速度为每张图片预测用时(不包括预处理和后处理),表中符号`-`表示相关指标暂未测试。
| 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) |
@@ -28,13 +28,15 @@
| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)| 84.1MB | 25.26089 | 77.6 | 93.7 |
| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB | 15.40138 | 68.8 | 88.5 |
| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514 | 76.9 | 93.4 |
+| [AlexNet](https://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 244.4MB | - | 56.7 | 79.2 |
## 目标检测模型
-> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到),表中符号`-`表示相关指标暂未测试。
+> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到),表中符号`-`表示相关指标暂未测试。
| 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) |
|:-------|:-----------|:-------------|:----------|
+|[FasterRCNN-ResNet18-FPN](https://bj.bcebos.com/paddlex/pretrained_weights/faster_rcnn_r18_fpn_1x.tar) | 173.2M | - | 32.6 |
|[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 |
|[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 |
|[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 |
@@ -50,12 +52,35 @@
## 实例分割模型
-> 表中模型相关指标均为在MSCOCO数据集上测试得到。
+> 预测时间是在一张Nvidia Tesla V100的GPU上通过'evaluate()'接口测试MSCOCO验证集得到,包括数据加载、网络前向执行和后处理, batch size是1,表中符号`-`表示相关指标暂未测试。
+
+| 模型 | 模型大小 | 预测时间(毫秒) | BoxAP (%) | MaskAP (%) |
+|:-------|:-----------|:-------------|:----------|:----------|
+|[MaskRCNN-ResNet18-FPN](https://bj.bcebos.com/paddlex/pretrained_weights/mask_rcnn_r18_fpn_1x.tar) | 189.1MB | - | 33.6 | 30.5 |
+|[MaskRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_2x.tar) | 143.9MB | 87 | 38.2 | 33.4 |
+|[MaskRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar)| 177.7MB | 63.9 | 38.7 | 34.7 |
+|[MaskRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar) | 177.7MB | 63.1 | 39.8 | 35.4 |
+|[MaskRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar) | 253.6MB | 77 | 39.5 | 35.2 |
+|[MaskRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar) | 253.7MB | 76.4 | 41.4 | 36.8 |
+|[MaskRCNN-HRNet_W18-FPN](https://bj.bcebos.com/paddlex/pretrained_weights/mask_rcnn_hrnetv2p_w18_2x.tar) | 120.7MB | - | 38.7 | 34.7 |
+
+
+## 语义分割模型
+
+> 以下指标均在MSCOCO验证集上测试得到,表中符号`-`表示相关指标暂未测试。
+
+| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) |
+|:-------|:-----------|:-------------|:----------|
+|[DeepLabv3_MobileNetV2_x1.0](https://bj.bcebos.com/v1/paddleseg/deeplab_mobilenet_x1_0_coco.tgz)| 14.7MB | - | - |
+|[DeepLabv3_Xception65](https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz)| 329.3MB | - | - |
+|[UNet](https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz) | 107.3MB | - | - |
+
+
+> 以下指标均在Cityscapes验证集上测试得到,表中符号`-`表示相关指标暂未测试。
| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) |
|:-------|:-----------|:-------------|:----------|
-|DeepLabv3+-MobileNetV2_x1.0|-| - | - |
-|DeepLabv3+-Xception41|-| - | - |
-|DeepLabv3+-Xception65|-| - | - |
-|UNet|-| - | - |
-|HRNet_w18|-| - | - |
+| [DeepLabv3_MobileNetv2_x1.0](https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz) | 14.7MB | - | 69.8 |
+| [DeepLabv3_Xception65](https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz) | 329.3MB | - | 79.3 |
+| [HRNet_W18](https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz) | 77.3MB | | 79.36 |
+| [Fast-SCNN](https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar) | 9.8MB | | 69.64 |
diff --git a/docs/appendix/parameters.md b/docs/appendix/parameters.md
index 716cdac92aa6504f1543cb91997f2f1fd89a3e13..d0f913600aa3a402be95ae9b998b3dfc2dd45901 100644
--- a/docs/appendix/parameters.md
+++ b/docs/appendix/parameters.md
@@ -5,53 +5,69 @@ PaddleX所有训练接口中,内置的参数均为根据单GPU卡相应batch_s
## 1.Epoch数的调整
Epoch数是模型训练过程,迭代的轮数,用户可以设置较大的数值,根据模型迭代过程在验证集上的指标表现,来判断模型是否收敛,进而提前终止训练。此外也可以使用`train`接口中的`early_stop`策略,模型在训练过程会自动判断模型是否收敛自动中止。
-## 2.Batch Size的调整
-Batch Size指模型在训练过程中,一次性处理的样本数量, 如若使用多卡训练, batch_size会均分到各张卡上(因此需要让batch size整除卡数)。这个参数跟机器的显存/内存高度相关,`batch_size`越高,所消耗的显存/内存就越高。PaddleX在各个`train`接口中均配置了默认的batch size,如若用户调整batch size,则也注意需要对应调整其它参数,如下表所示展示YOLOv3在训练时的参数配置
+## 2.batch_size和learning_rate
-| 参数 | 默认值 | 调整比例 | 示例 |
-|:---------------- | :------------ | :------------------ | :------------ |
-| train_batch_size | 8 | 调整为 8*alpha | 16 |
-| learning_rate | 1.0/8000 | 调整为 alpha/8000 | 2.0/8000 |
-| warmup_steps | 1000 | 调整为 1000/alpha
(该参数也可以自行根据数据情况调整) | 500 |
-| lr_decay_epochs | [213, 240] | 不变 | [213, 240] |
+> - Batch Size指模型在训练过程中,一次性处理的样本数量
+> - 如若使用多卡训练, batch_size会均分到各张卡上(因此需要让batch size整除卡数)
+> - Batch Size跟机器的显存/内存高度相关,`batch_size`越高,所消耗的显存/内存就越高
+> - PaddleX在各个`train`接口中均配置了默认的batch size(默认针对单GPU卡),如若训练时提示GPU显存不足,则相应调低BatchSize,如若GPU显存高或使用多张GPU卡时,可相应调高BatchSize。
+> - **如若用户调整batch size,则也注意需要对应调整其它参数,特别是train接口中默认的learning_rate值**。如在YOLOv3模型中,默认`train_batch_size`为8,`learning_rate`为0.000125,当用户将模型在2卡机器上训练时,可以将`train_batch_size`调整为16, 那么同时`learning_rate`也可以对应调整为0.000125 * 2 = 0.00025
+## 3.warmup_steps和warmup_start_lr
-更多训练接口可以参考
-- [分类模型-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train)
-- [目标检测检测FasterRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#id2)
-- [目标检测YOLOv3-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#train)
-- [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train)
-- [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train)
-- [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2)
+在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps`和`warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`个batch数据迭代后线性增长到设定的学习率。
-## 关于lr_decay_epoch, warmup_steps等参数的说明
+> 例如YOLOv3的train接口,默认`train_batch_size`为8,`learning_rate`为0.000125, `warmup_steps`为1000, `warmup_start_lr`为0.0;在此参数配置下表示,模型在启动训练后,在前1000个step(每个step表示一个batch的数据,也就是8个样本)内,学习率会从0.0开始线性增长到设定的0.000125。
-在PaddleX或其它深度学习模型的训练过程中,经常见到lr_decay_epoch, warmup_steps, warmup_start_lr等参数设置,下面介绍一些这些参数的作用。
+## 4.lr_decay_epochs和lr_decay_gamma
-首先这些参数都是用于控制模型训练过程中学习率的变化方式,例如我们在训练时将learning_rate设为0.1, 通常情况,在模型的训练过程中,学习率一直以0.1不变训练下去, 但为了调出更好的模型效果,我们往往不希望学习率一直保持不变。
+`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma。
-### warmup_steps和warmup_start_lr
+> 例如YOLOv3的train接口,默认`num_epochs`为270,`learning_rate`为0.000125, `lr_decay_epochs`为[213, 240],`lr_decay_gamma`为0.1;在此参数配置下表示,模型在启动训练后,在前213个epoch中,训练时使用的学习率为0.000125,在第213至240个epoch之间,训练使用的学习率为0.000125x0.1=0.0000125,在240个epoch之后,使用的学习率为0.000125x0.1x0.1=0.00000125
-我们在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps`和`warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`内线性增长到设定的学习率。
+## 5.参数设定时的约束
+根据上述几个参数,可以了解到学习率的变化分为WarmUp热身阶段和Decay衰减阶段,
+> - Wamup热身阶段:随着训练迭代,学习率从较低的值逐渐线性增长至设定的值,以step为单位
+> - Decay衰减阶段:随着训练迭代,学习率逐步衰减,如每次衰减为之前的0.1, 以epoch为单位
+> step与epoch的关系:1个epoch由多个step组成,例如训练样本有800张图像,`train_batch_size`为8, 那么每个epoch都要完整用这800张图片训一次模型,而每个epoch总共包含800//8即100个step
-### lr_decay_epochs和lr_decay_gamma
+在PaddleX中,约束warmup必须在Decay之前结束,因此各参数设置需要满足下面条件
+```
+warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch
+```
+其中`num_steps_each_epoch`计算方式如下,
+```
+num_steps_each_eposh = num_samples_in_train_dataset // train_batch_size
+```
+
+因此,如若你在启动训练时,被提示`warmup_steps should be less than...`时,即表示需要根据上述公式调整你的参数啦,可以调整`lr_decay_epochs`或者是`warmup_steps`。
-`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma
+## 6.如何使用多GPU卡进行训练
+在`import paddlex`前配置环境变量,代码如下
+```
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0' # 使用第1张GPU卡进行训练
+# 注意paddle或paddlex都需要在设置环境变量后再import
+import paddlex as pdx
+```
-### Notice
+```
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '' # 不使用GPU,使用CPU进行训练
+import paddlex as pdx
+```
-在PaddleX中,限制warmup需要在第一个学习率decay衰减前结束,因此要满足下面的公式
```
-warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,3' # 使用第1、2、4张GPU卡进行训练
+import paddlex as pdx
```
-其中公式中`num_steps_each_epoch = num_samples_in_train_dataset // train_batch_size`。
-> 因此如若在训练时PaddleX提示`warmup_steps should be less than xxx`时,即可根据上述公式来调整你的`lr_decay_epochs`或者是`warmup_steps`使得两个参数满足上面的条件
-> - 图像分类模型 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train)
-> - FasterRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn)
-> - YOLOv3 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3)
-> - MaskRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn)
-> - DeepLab [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p)
-> - UNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet)
-> - HRNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet)
+## 相关模型接口
+
+- 图像分类模型 [train接口](../apis/models/classification.html#train)
+- FasterRCNN [train接口](../apis/models/detection.html#id1)
+- YOLOv3 [train接口](../apis/models/detection.html#train)
+- MaskRCNN [train接口](../apis/models/instance_segmentation.html#train)
+- DeepLabv3p [train接口](../apis/models/semantic_segmentation.html#train)
diff --git a/docs/appendix/slim_model_zoo.md b/docs/appendix/slim_model_zoo.md
index a594d53dd7a777288571ccae6fad5ec21415de36..b43f415fad0b15584b2c7903a83996e3d9330312 100644
--- a/docs/appendix/slim_model_zoo.md
+++ b/docs/appendix/slim_model_zoo.md
@@ -40,14 +40,14 @@
### 剪裁
-PaddleLite推理耗时说明:
+Paddle Lite推理耗时说明:
环境:Qualcomm SnapDragon 845 + armv8
速度指标:Thread1/Thread2/Thread4耗时
-| 模型 | 压缩策略 | Top-1 | 存储体积 |PaddleLite推理耗时|TensorRT推理速度(FPS)|
+| 模型 | 压缩策略 | Top-1 | 存储体积 |Paddle Lite推理耗时|TensorRT推理速度(FPS)|
|:--:|:---:|:--:|:--:|:--:|:--:|
| MobileNetV1 | 无 | 70.99% | 17MB | 66.052\35.8014\19.5762|-|
| MobileNetV1 | 剪裁 -30% | 70.4% (-0.59%) | 12MB | 46.5958\25.3098\13.6982|-|
@@ -70,13 +70,13 @@ PaddleLite推理耗时说明:
数据集:Pasacl VOC & COCO2017
-PaddleLite推理耗时说明:
+Paddle Lite推理耗时说明:
环境:Qualcomm SnapDragon 845 + armv8
速度指标:Thread1/Thread2/Thread4耗时
-| 模型 | 压缩策略 | 数据集 | Image/GPU | 输入608 Box mmAP | 存储体积 | PaddleLite推理耗时(ms)(608*608) | TensorRT推理速度(FPS)(608*608) |
+| 模型 | 压缩策略 | 数据集 | Image/GPU | 输入608 Box mmAP | 存储体积 | Paddle Lite推理耗时(ms)(608*608) | TensorRT推理速度(FPS)(608*608) |
| :----------------------------: | :---------------: | :--------: | :-------: | :------------: | :----------: | :--------------: | :--------------: |
| MobileNet-V1-YOLOv3 | 无 | Pascal VOC | 8 | 76.2 | 94MB | 1238\796.943\520.101|60.04|
| MobileNet-V1-YOLOv3 | 剪裁 -52.88% | Pascal VOC | 8 | 77.6 (+1.4) | 31MB | 602.497\353.759\222.427 |99.36|
@@ -108,14 +108,14 @@ PaddleLite推理耗时说明:
### 剪裁
-PaddleLite推理耗时说明:
+Paddle Lite推理耗时说明:
环境:Qualcomm SnapDragon 845 + armv8
速度指标:Thread1/Thread2/Thread4耗时
-| 模型 | 压缩方法 | mIoU | 存储体积 | PaddleLite推理耗时 | TensorRT推理速度(FPS) |
+| 模型 | 压缩方法 | mIoU | 存储体积 | Paddle Lite推理耗时 | TensorRT推理速度(FPS) |
| :-------: | :---------------: | :-----------: | :------: | :------------: | :----: |
| FastSCNN | 无 | 69.64 | 11MB | 1226.36\682.96\415.664 |39.53|
| FastSCNN | 剪裁 -47.60% | 66.68 (-2.96) | 5.7MB | 866.693\494.467\291.748 |51.48|
diff --git a/docs/change_log.md b/docs/change_log.md
new file mode 100644
index 0000000000000000000000000000000000000000..08e95f7d5f2463fe60e1012d59dbe8a07b3173b5
--- /dev/null
+++ b/docs/change_log.md
@@ -0,0 +1,34 @@
+# 更新日志
+
+
+**v1.1.0** 2020.07.12
+
+- 模型更新
+> - 新增语义分割模型HRNet、FastSCNN
+> - 目标检测FasterRCNN、实例分割MaskRCNN新增backbone HRNet
+> - 目标检测/实例分割模型新增COCO数据集预训练模型
+> - 集成X2Paddle,PaddleX所有分类模型和语义分割模型支持导出为ONNX协议
+- 模型部署更新
+> - 模型加密增加支持Windows平台
+> - 新增Jetson、Paddle Lite模型部署预测方案
+> - C++部署代码新增batch批预测,并采用OpenMP对预处理进行并行加速
+- 新增2个PaddleX产业案例
+> - [人像分割案例]()
+> - [工业表计读数案例]()
+- 新增数据格式转换功能,LabelMe、精灵标注助手和EasyData平台标注的数据转为PaddleX支持加载的数据格式
+- PaddleX文档更新,优化文档结构
+
+
+**v1.0.0** 2020.05.20
+
+- 增加模型C++部署和Python部署代码
+- 增加模型加密部署方案
+- 增加分类模型的OpenVINO部署方案
+- 增加模型可解释性的接口
+
+
+**v0.1.8** 2020.05.17
+
+- 修复部分代码Bug
+- 新增EasyData平台数据标注格式支持
+- 支持imgaug数据增强库的pixel-level算子
diff --git a/docs/conf.py b/docs/conf.py
index 6313188c25f83092e7d8f69019936c3d7223cd54..1f62b6bf6340a2ddd61d4744dcd75590d4ec805c 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -76,5 +76,4 @@ if not on_rtd: # only import and set the theme if we're building docs locally
html_static_path = ['_static']
-html_logo = 'images/paddlex.jpg'
-
+html_logo = 'paddlex.png'
diff --git a/docs/data/annotation.md b/docs/data/annotation.md
new file mode 100755
index 0000000000000000000000000000000000000000..0220ca8ec765635d31a2f7f93d12ce3cb8c9bc5e
--- /dev/null
+++ b/docs/data/annotation.md
@@ -0,0 +1,39 @@
+# 数据标注工具
+
+PaddleX支持图像分类、目标检测、实例分割和语义分割四大视觉领域常见的任务,对于每类视觉任务,都支持了特定的数据格式。PaddleX目前支持了图像分类的ImageNet格式,目标检测的PascalVOC格式,实例分割的MSCOCO格式(MSCOCO也可以用于目标检测)以及语义分割数据格式。
+
+## 常见标注工具
+
+ 图像分类无需标注工具,用户只需以txt文件记录每张图片的类别标签即可。对于目标检测、实例分割和语义分割,PaddleX已经与主流的标注工具进行了适配,用户可根据自己的需求,选择以下标注工具进行数据标注。
+
+| 标注工具 | 图像分类 | 目标检测 | 实例分割 | 语义分割 | 安装 |
+| :--------- | :------- | :------ | :------ | :------- | :----------------------------------------------- |
+| Labelme | - | √ | √ | √ | pip install labelme (本地数据标注) |
+| 精灵标注 | √ | √* | √ | √ | [官网下载](http://www.jinglingbiaozhu.com/) (本地数据标注) |
+| EasyData | √ | √ | √ | √ | [Web页面标注](https://ai.baidu.com/easydata/) (需上传数据进行标注) |
+
+数据标注完成后,参照如下流程,将标注数据转为可用PaddleX模型训练的数据组织格式。
+
+***注意**:精灵标注的目标检测数据可以在工具内部导出为PascalVOC格式,因此paddlex未提供精灵标注数据到PascalVOC格式的转换
+
+
+## 标注数据格式转换
+
+目前所有标注工具生成的标注文件,均为与原图同名的json格式文件,如`1.jpg`在标注完成后,则会在标注文件保存的目录中生成`1.json`文件。转换时参照以下步骤:
+
+1. 将所有的原图文件放在同一个目录下,如`pics`目录
+2. 将所有的标注json文件放在同一个目录下,如`annotations`目录
+3. 使用如下命令进行转换:
+
+```
+paddlex --data_conversion --source labelme --to PascalVOC --pics ./pics --annotations ./annotations --save_dir ./converted_dataset_dir
+```
+
+| 参数 | 说明 |
+| ---- | ---- |
+| --source | 表示数据标注来源,支持`labelme`、`jingling`和`easydata`(分别表示数据来源于LabelMe,精灵标注助手和EasyData)|
+| --to | 表示数据需要转换成为的格式,支持`ImageNet`(图像分类)、`PascalVOC`(目标检测),`MSCOCO`(实例分割,也可用于目标检测)和`SEG`(语义分割) |
+| --pics | 指定原图所在的目录路径 |
+| --annotations | 指定标注文件所在的目录路径 |
+
+**注意**:精灵标注的目标检测数据可以在工具内部导出为PascalVOC格式,因此paddlex未提供精灵标注数据到PascalVOC格式的转换
diff --git a/docs/data/format/classification.md b/docs/data/format/classification.md
new file mode 100644
index 0000000000000000000000000000000000000000..131e283b256ec99b53cb14b30ed504739395972e
--- /dev/null
+++ b/docs/data/format/classification.md
@@ -0,0 +1,84 @@
+# 图像分类ImageNet
+
+## 数据文件夹结构
+
+在PaddleX中,图像分类支持ImageNet数据集格式。数据集目录`data_dir`下包含多个文件夹,每个文件夹中的图像均属于同一个类别,文件夹的命名即为类别名(注意路径中不要包括中文,空格)。
+如下为示例结构
+```
+MyDataset/ # 图像分类数据集根目录
+|--dog/ # 当前文件夹所有图片属于dog类别
+| |--d1.jpg
+| |--d2.jpg
+| |--...
+| |--...
+|
+|--...
+|
+|--snake/ # 当前文件夹所有图片属于snake类别
+| |--s1.jpg
+| |--s2.jpg
+| |--...
+| |--...
+```
+
+## 划分训练集验证集
+
+**为了用于训练,我们需要在`MyDataset`目录下准备`train_list.txt`, `val_list.txt`和`labels.txt`三个文件**,分别用于表示训练集列表,验证集列表和类别标签列表。[点击下载图像分类示例数据集](https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz)
+
+
+
+**labels.txt**
+
+labels.txt用于列出所有类别,类别对应行号表示模型训练过程中类别的id(行号从0开始计数),例如labels.txt为以下内容
+```
+dog
+cat
+snake
+```
+即表示该分类数据集中共有3个类别,分别为`dog`,`cat`和`snake`,在模型训练中`dog`对应的类别id为0, `cat`对应1,以此类推
+
+**train_list.txt**
+
+train_list.txt列出用于训练时的图片集合,与其对应的类别id,示例如下
+```
+dog/d1.jpg 0
+dog/d2.jpg 0
+cat/c1.jpg 1
+... ...
+snake/s1.jpg 2
+```
+其中第一列为相对对`MyDataset`的相对路径,第二列为图片对应类别的类别id
+
+**val_list.txt**
+
+val_list列出用于验证时的图片集成,与其对应的类别id,格式与train_list.txt一致
+
+## PaddleX数据集加载
+示例代码如下,
+```
+import paddlex as pdx
+from paddlex.cls import transforms
+train_transforms = transforms.Compose([
+ transforms.RandomCrop(crop_size=224), transforms.RandomHorizontalFlip(),
+ transforms.Normalize()
+])
+eval_transforms = transforms.Compose([
+ transforms.ResizeByShort(short_size=256),
+ transforms.CenterCrop(crop_size=224), transforms.Normalize()
+])
+train_dataset = pdx.datasets.ImageNet(
+ data_dir='./MyDataset',
+ file_list='./MyDataset/train_list.txt',
+ label_list='./MyDataset/labels.txt',
+ transforms=train_transforms)
+eval_dataset = pdx.datasets.ImageNet(
+ data_dir='./MyDataset',
+ file_list='./MyDataset/eval_list.txt',
+ label_list='./MyDataset/labels.txt',
+ transforms=eval_transforms)
+```
diff --git a/docs/data/format/detection.md b/docs/data/format/detection.md
new file mode 100644
index 0000000000000000000000000000000000000000..82c3110043b39e5a0d008f4cd1c9b4a7fe1aa040
--- /dev/null
+++ b/docs/data/format/detection.md
@@ -0,0 +1,86 @@
+# 目标检测PascalVOC
+
+## 数据集文件夹结构
+
+在PaddleX中,目标检测支持PascalVOC数据集格式。建议用户将数据集按照如下方式进行组织,原图均放在同一目录,如`JPEGImages`,标注的同名xml文件均放在同一目录,如`Annotations`,示例如下
+```
+MyDataset/ # 目标检测数据集根目录
+|--JPEGImages/ # 原图文件所在目录
+| |--1.jpg
+| |--2.jpg
+| |--...
+| |--...
+|
+|--Annotations/ # 标注文件所在目录
+| |--1.xml
+| |--2.xml
+| |--...
+| |--...
+```
+
+## 划分训练集验证集
+
+**为了用于训练,我们需要在`MyDataset`目录下准备`train_list.txt`, `val_list.txt`和`labels.txt`三个文件**,分别用于表示训练集列表,验证集列表和类别标签列表。[点击下载目标检测示例数据集](https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz)
+
+
+
+**labels.txt**
+
+labels.txt用于列出所有类别,类别对应行号表示模型训练过程中类别的id(行号从0开始计数),例如labels.txt为以下内容
+```
+dog
+cat
+snake
+```
+表示该检测数据集中共有3个目标类别,分别为`dog`,`cat`和`snake`,在模型训练中`dog`对应的类别id为0, `cat`对应1,以此类推
+
+**train_list.txt**
+
+train_list.txt列出用于训练时的图片集合,与其对应的标注文件,示例如下
+```
+JPEGImages/1.jpg Annotations/1.xml
+JPEGImages/2.jpg Annotations/2.xml
+... ...
+```
+其中第一列为原图相对`MyDataset`的相对路径,第二列为标注文件相对`MyDataset`的相对路径
+
+**val_list.txt**
+
+val_list列出用于验证时的图片集成,与其对应的标注文件,格式与val_list.txt一致
+
+## PaddleX数据集加载
+示例代码如下,
+```
+import paddlex as pdx
+from paddlex.det import transforms
+
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32),
+])
+
+train_dataset = pdx.datasets.VOCDetection(
+ data_dir='./MyDataset',
+ file_list='./MyDataset/train_list.txt',
+ label_list='./MyDataset/labels.txt',
+ transforms=train_transforms)
+eval_dataset = pdx.datasets.VOCDetection(
+ data_dir='./MyDataset',
+ file_list='./MyDataset/val_list.txt',
+ label_list='MyDataset/labels.txt',
+ transforms=eval_transforms)
+
+```
diff --git a/docs/data/format/index.rst b/docs/data/format/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..98157ad2b3450eb10be43dc4e91fa159dbfdd4a4
--- /dev/null
+++ b/docs/data/format/index.rst
@@ -0,0 +1,12 @@
+数据格式说明
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ classification.md
+ detection.md
+ instance_segmentation.md
+ segmentation.md
diff --git a/docs/data/format/instance_segmentation.md b/docs/data/format/instance_segmentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..c4f4e424e93745b7c5f2be2aed52905c47b8f574
--- /dev/null
+++ b/docs/data/format/instance_segmentation.md
@@ -0,0 +1,57 @@
+# 实例分割MSCOCO
+
+## 数据集文件夹结构
+
+在PaddleX中,实例分割支持MSCOCO数据集格式(MSCOCO格式同样也可以用于目标检测)。建议用户将数据集按照如下方式进行组织,原图均放在同一目录,如JPEGImages,标注文件(如annotations.json)放在与JPEGImages所在目录同级目录下,示例结构如下
+```
+MyDataset/ # 实例分割数据集根目录
+|--JPEGImages/ # 原图文件所在目录
+| |--1.jpg
+| |--2.jpg
+| |--...
+| |--...
+|
+|--annotations.json # 标注文件所在目录
+```
+
+## 划分训练集验证集
+
+在PaddleX中,为了区分训练集和验证集,在`MyDataset`同级目录,使用不同的json表示数据的划分,例如`train.json`和`val.json`。[点击下载实例分割示例数据集](https://bj.bcebos.com/paddlex/datasets/garbage_ins_det.tar.gz)。
+
+
+
+MSCOCO数据的标注文件采用json格式,用户可使用Labelme, 精灵标注助手或EasyData等标注工具进行标注,参见[数据标注工具](../annotations.md)
+
+## PaddleX加载数据集
+示例代码如下,
+```
+import paddlex as pdx
+from paddlex.det import transforms
+
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32),
+])
+
+train_dataset = pdx.dataset.CocoDetection(
+ data_dir='./MyDataset/JPEGImages',
+ ann_file='./MyDataset/train.json',
+ transforms=train_transforms)
+eval_dataset = pdx.dataset.CocoDetection(
+ data_dir='./MyDataset/JPEGImages',
+ ann_file='./MyDataset/val.json',
+ transforms=eval_transforms)
+```
diff --git a/docs/data/format/segmentation.md b/docs/data/format/segmentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..87590befe9b3d2277b135d9a4ddf3f40cc029502
--- /dev/null
+++ b/docs/data/format/segmentation.md
@@ -0,0 +1,87 @@
+# 语义分割Seg
+
+## 数据集文件夹结构
+
+在PaddleX中,**标注文件为png文件**。建议用户将数据集按照如下方式进行组织,原图均放在同一目录,如`JPEGImages`,标注的同名png文件均放在同一目录,如`Annotations`,示例如下
+```
+MyDataset/ # 语义分割数据集根目录
+|--JPEGImages/ # 原图文件所在目录
+| |--1.jpg
+| |--2.jpg
+| |--...
+| |--...
+|
+|--Annotations/ # 标注文件所在目录
+| |--1.png
+| |--2.png
+| |--...
+| |--...
+```
+语义分割的标注图像,如1.png,为单通道图像,像素标注类别需要从0开始递增(一般0表示background背景), 例如0, 1, 2, 3表示4种类别,标注类别最多255个类别(其中像素值255不参与训练和评估)。
+
+## 划分训练集验证集
+
+**为了用于训练,我们需要在`MyDataset`目录下准备`train_list.txt`, `val_list.txt`和`labels.txt`三个文件**,分别用于表示训练集列表,验证集列表和类别标签列表。[点击下载语义分割示例数据集](https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz)
+
+
+
+**labels.txt**
+
+labels.txt用于列出所有类别,类别对应行号表示模型训练过程中类别的id(行号从0开始计数),例如labels.txt为以下内容
+```
+backgrond
+human
+car
+```
+表示该检测数据集中共有3个分割类别,分别为`background`,`human`和`car`,在模型训练中`background`对应的类别id为0, `human`对应1,以此类推,如不知具体类别标签,可直接在labels.txt逐行写0,1,2...序列即可。
+
+**train_list.txt**
+
+train_list.txt列出用于训练时的图片集合,与其对应的标注文件,示例如下
+```
+JPEGImages/1.jpg Annotations/1.png
+JPEGImages/2.jpg Annotations/2.png
+... ...
+```
+其中第一列为原图相对`MyDataset`的相对路径,第二列为标注文件相对`MyDataset`的相对路径
+
+**val_list.txt**
+
+val_list列出用于验证时的图片集成,与其对应的标注文件,格式与val_list.txt一致
+
+## PaddleX数据集加载
+
+示例代码如下,
+```
+import paddlex as pdx
+from paddlex.seg import transforms
+
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.ResizeRangeScaling(),
+ transforms.RandomPaddingCrop(crop_size=512),
+ transforms.Normalize()
+])
+
+eval_transforms = transforms.Compose([
+ transforms.ResizeByLong(long_size=512),
+ transforms.Padding(target_size=512),
+ transforms.Normalize()
+])
+
+train_dataset = pdx.datasets.SegDataset(
+ data_dir='./MyDataset',
+ file_list='./MyDataset/train_list.txt',
+ label_list='./MyDataset/labels.txt',
+ transforms=train_transforms)
+eval_dataset = pdx.datasets.SegDataset(
+ data_dir='./MyDataset',
+ file_list='./MyDataset/val_list.txt',
+ label_list='MyDataset/labels.txt',
+ transforms=eval_transforms)
+```
diff --git a/docs/data/index.rst b/docs/data/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..419ed81f528fe786dca3e6a0d777c1412859dfb7
--- /dev/null
+++ b/docs/data/index.rst
@@ -0,0 +1,10 @@
+数据准备
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ annotation.md
+ format/index
diff --git a/docs/datasets.md b/docs/datasets.md
deleted file mode 100644
index 060756fd062332c49cc7adcc6958926a555ed895..0000000000000000000000000000000000000000
--- a/docs/datasets.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# 数据集格式说明
-该部分内容已迁移至[附录](./appendix/datasets.md)
diff --git a/docs/deploy/export_model.md b/docs/deploy/export_model.md
new file mode 100644
index 0000000000000000000000000000000000000000..2094421aacf49ec35b6e74fd72dba369a88938a4
--- /dev/null
+++ b/docs/deploy/export_model.md
@@ -0,0 +1,29 @@
+# 部署模型导出
+
+在服务端部署模型时需要将训练过程中保存的模型导出为inference格式模型,导出的inference格式模型包括`__model__`、`__params__`和`model.yml`三个文件,分别表示模型的网络结构、模型权重和模型的配置文件(包括数据预处理参数等)。
+
+> **检查你的模型文件夹**,如果里面是`model.pdparams`, `model.pdmodel`和`model.yml`3个文件时,那么就需要按照下面流程进行模型导出
+
+在安装完PaddleX后,在命令行终端使用如下命令将模型导出。可直接下载小度熊分拣模型来测试本文档的流程[xiaoduxiong_epoch_12.tar.gz](https://bj.bcebos.com/paddlex/models/xiaoduxiong_epoch_12.tar.gz)。
+
+```
+paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./inference_model
+```
+
+| 参数 | 说明 |
+| ---- | ---- |
+| --export_inference | 是否将模型导出为用于部署的inference格式,指定即为True |
+| --model_dir | 待导出的模型路径 |
+| --save_dir | 导出的模型存储路径 |
+| --fixed_input_shape | 固定导出模型的输入大小,默认值为None |
+
+
+使用TensorRT预测时,需固定模型的输入大小,通过`--fixed_input_shape `来制定输入大小[w,h]。
+
+**注意**:
+- 分类模型的固定输入大小请保持与训练时的输入大小一致;
+- 指定[w,h]时,w和h中间逗号隔开,不允许存在空格等其他字符。
+
+```
+paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./inference_model --fixed_input_shape=[640,960]
+```
diff --git a/docs/tutorials/deploy/images/encrypt.png b/docs/deploy/images/encrypt.png
similarity index 100%
rename from docs/tutorials/deploy/images/encrypt.png
rename to docs/deploy/images/encrypt.png
diff --git a/docs/tutorials/deploy/images/encryption_process.png b/docs/deploy/images/encryption_process.png
similarity index 100%
rename from docs/tutorials/deploy/images/encryption_process.png
rename to docs/deploy/images/encryption_process.png
diff --git a/docs/deploy/images/paddlex_android_sdk_framework.jpg b/docs/deploy/images/paddlex_android_sdk_framework.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2e2be7f5d2cd7cf115fd6cdd71371760850d612e
Binary files /dev/null and b/docs/deploy/images/paddlex_android_sdk_framework.jpg differ
diff --git a/docs/tutorials/deploy/images/vs2019_step1.png b/docs/deploy/images/vs2019_step1.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step1.png
rename to docs/deploy/images/vs2019_step1.png
diff --git a/docs/tutorials/deploy/images/vs2019_step2.png b/docs/deploy/images/vs2019_step2.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step2.png
rename to docs/deploy/images/vs2019_step2.png
diff --git a/docs/tutorials/deploy/images/vs2019_step3.png b/docs/deploy/images/vs2019_step3.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step3.png
rename to docs/deploy/images/vs2019_step3.png
diff --git a/docs/tutorials/deploy/images/vs2019_step4.png b/docs/deploy/images/vs2019_step4.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step4.png
rename to docs/deploy/images/vs2019_step4.png
diff --git a/docs/tutorials/deploy/images/vs2019_step5.png b/docs/deploy/images/vs2019_step5.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step5.png
rename to docs/deploy/images/vs2019_step5.png
diff --git a/docs/tutorials/deploy/images/vs2019_step6.png b/docs/deploy/images/vs2019_step6.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step6.png
rename to docs/deploy/images/vs2019_step6.png
diff --git a/docs/tutorials/deploy/images/vs2019_step7.png b/docs/deploy/images/vs2019_step7.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step7.png
rename to docs/deploy/images/vs2019_step7.png
diff --git a/docs/tutorials/deploy/images/vs2019_step_encryption.png b/docs/deploy/images/vs2019_step_encryption.png
similarity index 100%
rename from docs/tutorials/deploy/images/vs2019_step_encryption.png
rename to docs/deploy/images/vs2019_step_encryption.png
diff --git a/docs/deploy/index.rst b/docs/deploy/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..13aa36073b9b8385dcfc1a52bcd8be23a18f2e5e
--- /dev/null
+++ b/docs/deploy/index.rst
@@ -0,0 +1,12 @@
+模型部署
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ export_model.md
+ server/index
+ nvidia-jetson.md
+ paddlelite/index
diff --git a/docs/deploy/nvidia-jetson.md b/docs/deploy/nvidia-jetson.md
new file mode 100644
index 0000000000000000000000000000000000000000..7b249a5ec981c318067ee455760898bbb87b040d
--- /dev/null
+++ b/docs/deploy/nvidia-jetson.md
@@ -0,0 +1,145 @@
+# Nvidia Jetson开发板
+
+## 说明
+本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过,如果需要使用更高G++版本编译使用,则需要重新编译Paddle预测库,请参考: [Nvidia Jetson嵌入式硬件预测库源码编译](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。
+
+## 前置条件
+* G++ 4.8.2 ~ 4.9.4
+* CUDA 9.0 / CUDA 10.0, CUDNN 7+ (仅在使用GPU版本的预测库时需要)
+* CMake 3.0+
+
+请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**。
+
+### Step1: 下载代码
+
+ `git clone https://github.com/PaddlePaddle/PaddleX.git`
+
+**说明**:其中`C++`预测代码在`/root/projects/PaddleX/deploy/cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
+
+目前PaddlePaddle为Nvidia Jetson提供了一个基于1.6.2版本的C++ 预测库。
+
+| 版本说明 | 预测库(1.6.2版本) |
+| ---- | ---- |
+| nv-jetson-cuda10-cudnn7.5-trt5 | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.7.1-nv-jetson-cuda10-cudnn7.5-trt5/fluid_inference.tar.gz) |
+
+下载并解压后`/root/projects/fluid_inference`目录包含内容为:
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+### Step3: 编译
+
+编译`cmake`的命令在`scripts/jetson_build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下:
+```
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=OFF
+# 使用MKL or openblas
+WITH_MKL=OFF
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=OFF
+# TensorRT 的路径,如果需要集成TensorRT,需修改为您实际安装的TensorRT路径
+TENSORRT_DIR=/root/projects/TensorRT/
+# Paddle 预测库路径, 请修改为您实际安装的预测库路径
+PADDLE_DIR=/root/projects/fluid_inference
+# Paddle 的预测库是否使用静态库来编译
+# 使用TensorRT时,Paddle的预测库通常为动态库
+WITH_STATIC_LIB=OFF
+# CUDA 的 lib 路径
+CUDA_LIB=/usr/local/cuda/lib64
+# CUDNN 的 lib 路径
+CUDNN_LIB=/usr/local/cuda/lib64
+
+# 是否加载加密后的模型
+WITH_ENCRYPTION=OFF
+
+# OPENCV 路径, 如果使用自带预编译版本可不修改
+sh $(pwd)/scripts/jetson_bootstrap.sh # 下载预编译版本的opencv
+OPENCV_DIR=$(pwd)/deps/opencv3/
+
+# 以下无需改动
+rm -rf build
+mkdir -p build
+cd build
+cmake .. \
+ -DWITH_GPU=${WITH_GPU} \
+ -DWITH_MKL=${WITH_MKL} \
+ -DWITH_TENSORRT=${WITH_TENSORRT} \
+ -DWITH_ENCRYPTION=${WITH_ENCRYPTION} \
+ -DTENSORRT_DIR=${TENSORRT_DIR} \
+ -DPADDLE_DIR=${PADDLE_DIR} \
+ -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
+ -DCUDA_LIB=${CUDA_LIB} \
+ -DCUDNN_LIB=${CUDNN_LIB} \
+ -DENCRYPTION_DIR=${ENCRYPTION_DIR} \
+ -DOPENCV_DIR=${OPENCV_DIR}
+make
+```
+**注意:** linux环境下编译会自动下载OPENCV和YAML,如果编译环境无法访问外网,可手动下载:
+
+- [opencv3_aarch.tgz](https://bj.bcebos.com/paddlex/deploy/tools/opencv3_aarch.tgz)
+- [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
+
+opencv3_aarch.tgz文件下载后解压,然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。
+
+yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
+
+修改脚本设置好主要参数后,执行`build`脚本:
+ ```shell
+ sh ./scripts/jetson_build.sh
+ ```
+
+### Step4: 预测及可视化
+
+**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](export_model.md)将模型导出为部署格式。**
+
+编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifier`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
+
+| 参数 | 说明 |
+| ---- | ---- |
+| model_dir | 导出的预测模型所在路径 |
+| image | 要预测的图片文件路径 |
+| image_list | 按行存储图片路径的.txt文件 |
+| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
+| use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
+| gpu_id | GPU 设备ID, 默认值为0 |
+| save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** |
+| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
+| batch_size | 预测的批量大小,默认为1 |
+| thread_num | 预测的线程数,默认为cpu处理器个数 |
+| use_ir_optim | 是否使用图优化策略,支持值为0或1(默认值为1,图像分割默认值为0)|
+
+## 样例
+
+可使用[小度熊识别模型](export_model.md)中导出的`inference_model`和测试图片进行预测,导出到/root/projects,模型路径为/root/projects/inference_model。
+
+`样例一`:
+
+不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg`
+
+```shell
+./build/demo/detector --model_dir=/root/projects/inference_model --image=/root/projects/images/xiaoduxiong.jpeg --save_dir=output
+```
+图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+
+`样例二`:
+
+使用`GPU`预测多个图片`/root/projects/image_list.txt`,image_list.txt内容的格式如下:
+```
+/root/projects/images/xiaoduxiong1.jpeg
+/root/projects/images/xiaoduxiong2.jpeg
+...
+/root/projects/images/xiaoduxiongn.jpeg
+```
+```shell
+./build/demo/detector --model_dir=/root/projects/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2
+```
+图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
diff --git a/docs/deploy/openvino/index.rst b/docs/deploy/openvino/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..3f3ee83919b3c7e7e7c2d03cbd3d451803042a14
--- /dev/null
+++ b/docs/deploy/openvino/index.rst
@@ -0,0 +1,11 @@
+OpenVINO部署
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ windows.md
+ linux.md
+ intel_movidius.md
diff --git a/docs/deploy/openvino/intel_movidius.md b/docs/deploy/openvino/intel_movidius.md
new file mode 100644
index 0000000000000000000000000000000000000000..ec514562b7738abe86654e08af044b22ec450f6c
--- /dev/null
+++ b/docs/deploy/openvino/intel_movidius.md
@@ -0,0 +1 @@
+# Intel计算棒
diff --git a/docs/deploy/openvino/linux.md b/docs/deploy/openvino/linux.md
new file mode 100644
index 0000000000000000000000000000000000000000..4825da6366435f098145e42ca7b88fc6d4f84255
--- /dev/null
+++ b/docs/deploy/openvino/linux.md
@@ -0,0 +1 @@
+# Linux平台
diff --git a/docs/deploy/openvino/windows.md b/docs/deploy/openvino/windows.md
new file mode 100644
index 0000000000000000000000000000000000000000..30cdf17ded910bfda9286e5f700525c9b1bb777b
--- /dev/null
+++ b/docs/deploy/openvino/windows.md
@@ -0,0 +1 @@
+# Windows平台
diff --git a/docs/deploy/paddlelite/android.md b/docs/deploy/paddlelite/android.md
new file mode 100644
index 0000000000000000000000000000000000000000..88c0e04bfab6de691b311df691dd387b2696607d
--- /dev/null
+++ b/docs/deploy/paddlelite/android.md
@@ -0,0 +1,212 @@
+# Android平台
+
+PaddleX的安卓端部署由Paddle Lite实现,部署的流程如下,首先将训练好的模型导出为inference model,然后对模型进行优化,最后使用Paddle Lite的预测库进行部署,Paddle Lite的详细介绍和使用可参考:[Paddle Lite文档](https://paddle-lite.readthedocs.io/zh/latest/)
+
+> PaddleX --> Inference Model --> Paddle Lite Opt --> Paddle Lite Inference
+
+文章简介:
+- 1.介绍如何将PaddleX导出为inference model
+- 2.使用Paddle Lite的OPT模块对模型进行优化
+- 3.介绍基于PaddleX Android SDK的安卓demo,以及如何快速部署训练好的模型
+- 4.介绍PaddleX Android SDK和二次开发
+
+## 1. 将PaddleX模型导出为inference模型
+
+参考[导出inference模型](../export_model.md)将模型导出为inference格式模型。
+
+## 2. 将inference模型优化为Paddle Lite模型
+
+目前提供了两种方法将Paddle模型优化为Paddle Lite模型:
+
+- 1.python脚本优化模型,简单上手,目前支持最新的Paddle Lite 2.6.1版本
+- 2.bin文件优化模型(linux),支持develop版本(Commit Id:11cbd50e),适用于部署`DeepLab模型`和`Unet模型`的用户。
+
+### 2.1 使用python脚本优化模型
+
+```bash
+pip install paddlelite
+python export_lite.py --model_dir /path/to/inference_model --save_file /path/to/lite_model_name --place place/to/run
+```
+> 其中`export_lite.py`脚本请至github下载:https://github.com/PaddlePaddle/PaddleX/blob/develop/deploy/lite/export_lite.py
+
+| 参数 | 说明 |
+| ---- | ---- |
+| --model_dir | 预测模型所在路径,包含"\_\_model\_\_", "\_\_params\_\_", "model.yml"文件 |
+| --save_file | 模型输出的名称,假设为/path/to/lite_model_name, 则输出为路径为/path/to/lite_model_name.nb |
+| --place | 运行的平台,可选:arm\|opencl\|x86\|npu\|xpu\|rknpu\|apu,安卓部署请选择`arm`|
+
+### 2.3 使用bin文件优化模型(linux)
+
+首先下载并解压: [模型优化工具opt](https://bj.bcebos.com/paddlex/deploy/lite/model_optimize_tool_11cbd50e.tar.gz)
+
+``` bash
+./opt --model_file= \
+ --param_file= \
+ --valid_targets=arm \
+ --optimize_out_type=naive_buffer \
+ --optimize_out=model_output_name
+```
+
+| 参数 | 说明 |
+| ---- | ---- |
+| --model_file | 导出inference模型中包含的网络结构文件:`__model__`所在的路径|
+| --param_file | 导出inference模型中包含的参数文件:`__params__`所在的路径|
+| --valid_targets |指定模型可执行的backend,这里请指定为`arm`|
+| --optimize_out_type | 输出模型类型,目前支持两种类型:protobuf和naive_buffer,其中naive_buffer是一种更轻量级的序列化/反序列化,这里请指定为`naive_buffer`|
+
+详细的使用方法和参数含义请参考: [使用opt转化模型](https://paddle-lite.readthedocs.io/zh/latest/user_guides/opt/opt_bin.html)
+
+## 3. 移动端(Android)Demo
+
+PaddleX提供了一个基于Mobilenetv2模型和PaddleX Android SDK的安卓demo,可供用户体验,该demo位于`/PaddleX/deploy/lite/android/demo`,可直接导入Android Studio后运行,并支持用户替换其他PaddleX导出的检测或分割模型进行预测。
+
+### 3.1 要求
+
+- Android Studio 3.4
+- Android手机或开发板
+
+### 3.2 分类Demo
+
+#### 3.2.1 导入工程并运行
+
+- 打开Android Studio,在"Welcome to Android Studio"窗口点击"Open an existing Android Studio project",在弹出的路径选择窗口中进入`/PaddleX/deploy/lite/android/demo`目录,然后点击右下角的"Open"按钮,导入工程;
+- 通过USB连接Android手机或开发板;
+- 载入工程后,点击菜单栏的Run->Run 'App'按钮,在弹出的"Select Deployment Target"窗口选择已经连接的Android设备,然后点击"OK"按钮;
+- 运行成功后,Android设备将加载一个名为PaddleX Demo的App,默认会加载一个测试图片,同时还支持拍照和从图库选择照片进行预测;
+
+**注意**:在工程构建的过程中会远程下载Mobilenetv2模型、yml配置文件、测试的图片,以及PaddleX Android SDK。
+
+### 3.3 部署自定义模型
+
+该demo还支持用户自定义模型来进行预测,可帮助用户快速验证自己训练好的模型,首先我们已经根据step1~step2描述,准备好了Lite模型(.nb文件)和yml配置文件(注意:导出Lite模型时需指定--place=arm),然后在Android Studio的project视图中:
+
+- 将.nb文件拷贝到`/src/main/assets/model/`目录下, 根据.nb文件的名字,修改文件`/src/main/res/values/strings.xml`中的`MODEL_PATH_DEFAULT`;
+- 将.yml文件拷贝到`/src/main/assets/config/`目录下,根据.yml文件的名字,修改文件`/src/main/res/values/strings.xml`中的`YAML_PATH_DEFAULT`;
+- 可根据需要替换测试图片,将图片拷贝到`/src/main/assets/images/`目录下,根据图片文件的名字,修改文件`/src/main/res/values/strings.xml`中的`IMAGE_PATH_DEFAULT`;
+- 点击菜单栏的Run->Run 'App'按钮,在弹出的"Select Deployment Target"窗口选择已经连接的Android设备,然后点击"OK"按钮;
+
+## 4. PaddleX Android SDK和二次开发
+
+PaddleX Android SDK是PaddleX基于Paddle Lite开发的安卓端AI推理工具,以PaddleX导出的Yaml配置文件为接口,针对不同的模型实现图片的预处理,后处理,并进行可视化,开发者可集成到业务中。
+该SDK自底向上主要包括:Paddle Lite推理引擎层,Paddle Lite接口层以及PaddleX业务层。
+
+- Paddle Lite推理引擎层,是在Android上编译好的二进制包,只涉及到Kernel 的执行,且可以单独部署,以支持极致的轻量级部署。
+- Paddle Lite接口层,以Java接口封装了底层c++推理库。
+- PaddleX业务层,封装了PaddleX导出模型的预处理,推理和后处理,以及可视化,支持PaddleX导出的检测、分割、分类模型。
+
+
+
+### 4.1 SDK安装
+
+首先下载并解压[PaddleX Android SDK](https://bj.bcebos.com/paddlex/deploy/lite/paddlex_lite_11cbd50e.tar.gz),得到paddlex.aar文件,将拷贝到android工程目录app/libs/下面,然后为app的build.gradle添加依赖:
+
+```
+dependencies {
+ implementation fileTree(include: ['*.jar','*aar'], dir: 'libs')
+}
+
+```
+
+### 4.2 SDK使用用例
+```
+import com.baidu.paddlex.Predictor;
+import com.baidu.paddlex.config.ConfigParser;
+import com.baidu.paddlex.postprocess.DetResult;
+import com.baidu.paddlex.postprocess.SegResult;
+import com.baidu.paddlex.postprocess.ClsResult;
+import com.baidu.paddlex.visual.Visualize;
+
+// Predictor
+Predictor predictor = new Predictor();
+// model config
+ConfigParser configParser = new ConfigParser();
+// Visualize
+Visualize visualize = new Visualize();
+// image to predict
+Mat predictMat;
+
+// initialize
+configParser.init(context, model_path, yaml_path, cpu_thread_num, cpu_power_mode);
+visualize.init(configParser.getNumClasses());
+predictor.init(context, configParser)
+
+// run model
+if (predictImage != null && predictor.isLoaded()) {
+ predictor.setInputMat(predictMat);
+ runModel();
+}
+
+// get result & visualize
+if (configParser.getModelType().equalsIgnoreCase("segmenter")) {
+ SegResult segResult = predictor.getSegResult();
+ Mat visualizeMat = visualize.draw(segResult, predictMat, predictor.getImageBlob());
+} else if (configParser.getModelType().equalsIgnoreCase("detector")) {
+ DetResult detResult = predictor.getDetResult();
+ Mat visualizeMat = visualize.draw(detResult, predictMat);
+} else if (configParser.getModelType().equalsIgnoreCase("classifier")) {
+ ClsResult clsResult = predictor.getClsResult();
+}
+```
+### 4.3 Result成员变量
+
+**注意**:Result所有的成员变量以java bean的方式获取。
+
+```java
+com.baidu.paddlex.postprocess.ClsResult
+```
+
+##### Fields
+> * **type** (String|static): 值为"cls"。
+> * **categoryId** (int): 类别ID。
+> * **category** (String): 类别名称。
+> * **score** (float): 预测置信度。
+
+```java
+com.baidu.paddlex.postprocess.DetResult
+```
+##### Nested classes
+> * **DetResult.Box** 模型预测的box结果。
+
+##### Fields
+> * **type** (String|static): 值为"det"。
+> * **boxes** (List): 模型预测的box结果。
+
+```java
+com.baidu.paddlex.postprocess.DetResult.Box
+```
+##### Fields
+> * **categoryId** (int): 类别ID。
+> * **category** (String): 类别名称。
+> * **score** (float): 预测置信度。
+> * **coordinate** (float[4]): 预测框值:{xmin, ymin, xmax, ymax}。
+
+```java
+com.baidu.paddlex.postprocess.SegResult
+```
+##### Nested classes
+> * **SegResult.Mask**: 模型预测的mask结果。
+
+##### Fields
+> * **type** (String|static): 值为"Seg"。
+> * **mask** (SegResult.Mask): 模型预测的mask结果。
+
+```java
+com.baidu.paddlex.postprocess.SegResult.Mask
+```
+##### Fields
+> * **scoreData** (float[]): 模型预测在各个类别的置信度,长度为: 1 * numClass * H * W
+> * **scoreShape** (long[4]): scoreData的shape信息,[1,numClass,H,W]
+> * **labelData** (long[]): 模型预测置信度最高的label,长度为: 1 * H * W * 1
+> * **labelShape** (long[4]): labelData的shape信息,[1,H,W,1]
+
+### 4.4 SDK二次开发
+
+- 打开Android Studio新建项目(或加载已有项目)。点击菜单File->New->Import Module,导入工程`/PaddleX/deploy/lite/android/sdk`, Project视图会新增名为sdk的module
+- 在app的build.grade里面添加依赖:
+ ```
+ dependencies {
+ implementation project(':sdk')
+ }
+ ```
+
+- 源代码位于sdk/main/java/下,修改源码进行二次开发后,点击菜单栏的Build->Run 'sdk'按钮可编译生成aar,文件位于sdk/build/outputs/aar/路径下。
diff --git a/docs/deploy/paddlelite/index.rst b/docs/deploy/paddlelite/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..f5b08811134dd66602db7122ec15f6f8a54353d3
--- /dev/null
+++ b/docs/deploy/paddlelite/index.rst
@@ -0,0 +1,10 @@
+Paddle Lite移动端部署
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ slim/index
+ android.md
diff --git a/docs/deploy/paddlelite/slim/index.rst b/docs/deploy/paddlelite/slim/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..ccc8d5080d84c90c2590306c3bb58022d6bbe97b
--- /dev/null
+++ b/docs/deploy/paddlelite/slim/index.rst
@@ -0,0 +1,11 @@
+模型压缩
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ quant.md
+ prune.md
+ tutorials/index
diff --git a/docs/slim/prune.md b/docs/deploy/paddlelite/slim/prune.md
similarity index 73%
rename from docs/slim/prune.md
rename to docs/deploy/paddlelite/slim/prune.md
index c1ff51e5e08c2ce8da5e2042d0a1c359a9e64dff..acf89ad31df59946f819a6e7eedfac717d914bf1 100644
--- a/docs/slim/prune.md
+++ b/docs/deploy/paddlelite/slim/prune.md
@@ -1,8 +1,10 @@
# 模型裁剪
+为了更好地满足端侧部署场景下,低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,PaddleX通过集成PaddleSlim实现`模型裁剪`,可提升Paddle Lite端侧部署性能。
+
## 原理介绍
-模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)。**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**。
+模型裁剪通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小,来减小模型大小和降低模型计算复杂度,可以加快模型部署后的预测速度,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)。**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**。
## 裁剪方法
PaddleX提供了两种方式:
@@ -23,10 +25,10 @@ PaddleX提供了两种方式:
> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集
## 裁剪实验
-基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示,
+基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示:
### 图像分类
-实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md)
+实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,裁剪训练代码见[tutorials/compress/classification](https://github.com/PaddlePaddle/PaddleX/tree/develop/tutorials/compress/classification)
| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- |:----------|
@@ -35,8 +37,7 @@ PaddleX提供了两种方式:
|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms |
### 目标检测
-实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md)
-
+实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,裁剪训练代码见[tutorials/compress/detection](https://github.com/PaddlePaddle/PaddleX/tree/develop/tutorials/compress/detection)
| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
@@ -45,7 +46,7 @@ PaddleX提供了两种方式:
|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms |
### 语义分割
-实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md)
+实验背景:使用UNet模型,数据集为视盘分割示例数据,裁剪训练代码见[tutorials/compress/segmentation](https://github.com/PaddlePaddle/PaddleX/tree/develop/tutorials/compress/segmentation)
| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
diff --git a/docs/deploy/paddlelite/slim/quant.md b/docs/deploy/paddlelite/slim/quant.md
new file mode 100644
index 0000000000000000000000000000000000000000..cb0558c0d1fbf2e321ca434313349c7f4b5c3c3f
--- /dev/null
+++ b/docs/deploy/paddlelite/slim/quant.md
@@ -0,0 +1,12 @@
+# 模型量化
+
+为了更好地满足端侧部署场景下,低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,PaddleX通过集成PaddleSlim实现`模型量化`,可提升Paddle Lite端侧部署性能。
+
+## 原理介绍
+定点量化使用更少的比特数(如8-bit、3-bit、2-bit等)表示神经网络的权重和激活值,从而加速模型推理速度。PaddleX提供了训练后量化技术,其原理可参见[训练后量化原理](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id14),该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。
+
+## 使用PaddleX量化模型
+PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../../../apis/slim.html)。
+
+## 量化性能对比
+模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html)
diff --git a/docs/deploy/server/cpp/index.rst b/docs/deploy/server/cpp/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..a65cb3501c8b392362e8c871898fca7360097496
--- /dev/null
+++ b/docs/deploy/server/cpp/index.rst
@@ -0,0 +1,10 @@
+C++部署
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ windows.md
+ linux.md
diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md b/docs/deploy/server/cpp/linux.md
old mode 100755
new mode 100644
similarity index 79%
rename from docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
rename to docs/deploy/server/cpp/linux.md
index b4309ba896f1ae7c0d6f413e537343b608c5fa9f..ee95a24d7726cf764de18ddb3a3a8bb3c80ac8d7
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
+++ b/docs/deploy/server/cpp/linux.md
@@ -17,18 +17,18 @@
**说明**:其中`C++`预测代码在`/root/projects/PaddleX/deploy/cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。
-### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持TensorRT,提供了不同的预编译版本,目前PaddleX依赖于Paddle1.8版本,以下提供了多个不同版本的Paddle预测库:
| 版本说明 | 预测库(1.8.2版本) |
| ---- | ---- |
-| ubuntu14.04_cpu_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cpu_avx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cpu_noavx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cuda9.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cuda10.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
-| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_mkl | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_openblas | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_noavx_openblas | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cuda9.0_cudnn7_avx_mkl | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cuda10.0_cudnn7_avx_mkl | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
+| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6 | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
更多和更新的版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
@@ -45,7 +45,7 @@ fluid_inference
**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。
-### Step4: 编译
+### Step3: 编译
编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下:
```
@@ -112,9 +112,9 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://
sh ./scripts/build.sh
```
-### Step5: 预测及可视化
+### Step4: 预测及可视化
-**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
+**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../../export_model.md)将模型导出为部署格式。**
编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifier`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
@@ -130,10 +130,13 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
| batch_size | 预测的批量大小,默认为1 |
| thread_num | 预测的线程数,默认为cpu处理器个数 |
+| use_ir_optim | 是否使用图优化策略,支持值为0或1(默认值为1,图像分割默认值为0)|
## 样例
-可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测,导出到/root/projects,模型路径为/root/projects/inference_model。
+可使用[小度熊识别模型](../../export_model.md)中导出的`inference_model`和测试图片进行预测,导出到/root/projects,模型路径为/root/projects/inference_model。
+
+> 关于预测速度的说明:Paddle在部署预测时,由于涉及到内存显存初始化等原因,在模型加载后刚开始预测速度会较慢,一般在模型运行20~50后(即预测20~30张图片)预测速度才会稳定。
`样例一`:
diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md b/docs/deploy/server/cpp/windows.md
old mode 100755
new mode 100644
similarity index 82%
rename from docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
rename to docs/deploy/server/cpp/windows.md
index 48d936fd8a9e75e668b44db08352eebe1c20b338..aab06a9748a806c96b224ad70fce9b571ddbe134
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
+++ b/docs/deploy/server/cpp/windows.md
@@ -24,17 +24,17 @@ git clone https://github.com/PaddlePaddle/PaddleX.git
**说明**:其中`C++`预测代码在`PaddleX\deploy\cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。
-### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库,目前PaddleX依赖于Paddle 1.8,基于Paddle 1.8的Paddle预测库下载链接如下所示:
| 版本说明 | 预测库(1.8.2版本) | 编译器 | 构建工具| cuDNN | CUDA |
| ---- | ---- | ---- | ---- | ---- | ---- |
-| cpu_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cpu_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cuda9.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda9.0_cudnn7_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda10.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
+| cpu_avx_mkl | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_openblas | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cuda9.0_cudnn7_avx_mkl | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_openblas | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda10.0_cudnn7_avx_mkl | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
请根据实际情况选择下载,如若以上版本不满足您的需求,请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)选择符合的版本。
@@ -101,7 +101,7 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://
### Step5: 预测及可视化
-**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
+**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../../export_model.md)将模型导出为部署格式。**
上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
@@ -123,10 +123,13 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
| batch_size | 预测的批量大小,默认为1 |
| thread_num | 预测的线程数,默认为cpu处理器个数 |
+| use_ir_optim | 是否使用图优化策略,支持值为0或1(默认值为1,图像分割默认值为0)|
## 样例
-可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测, 例如导出到D:\projects,模型路径为D:\projects\inference_model。
+可使用[小度熊识别模型](../../export_model.md)中导出的`inference_model`和测试图片进行预测, 例如导出到D:\projects,模型路径为D:\projects\inference_model。
+
+> 关于预测速度的说明:Paddle在部署预测时,由于涉及到内存显存初始化等原因,在模型加载后刚开始预测速度会较慢,一般在模型运行20~50后(即预测20~30张图片)预测速度才会稳定。
### 样例一:(使用未加密的模型对单张图像做预测)
diff --git a/docs/tutorials/deploy/deploy_server/encryption.md b/docs/deploy/server/encryption.md
similarity index 83%
rename from docs/tutorials/deploy/deploy_server/encryption.md
rename to docs/deploy/server/encryption.md
index 89eee6b8f1089964834bc0d88d1306f8ac3961ba..fa9c65efae1ef135381d4c9df7696f40aec3e204 100644
--- a/docs/tutorials/deploy/deploy_server/encryption.md
+++ b/docs/deploy/server/encryption.md
@@ -26,9 +26,9 @@ PaddleX提供一个轻量级的模型加密部署方案,通过PaddleX内置的
>
> 2)项目集成OpenSSL,使用静态库的形式。
>
-> 3)实现AES算法接口,借助OpenSSL提供的EVP接口,在EVP接口中指定算法类型,算法使用对称加解密算法中的AES,加解密模式使用AES-GCM, 密钥长度为256位,AES-GCM的实现可以参考官方提供的例子自己进行封装接口:https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption。
+> 3)实现AES算法接口,借助OpenSSL提供的EVP接口,在EVP接口中指定算法类型,算法使用对称加解密算法中的AES,加解密模式使用AES-GCM, 密钥长度为256位,AES-GCM的实现可以参考官方提供的例子自己进行封装接口:[AES-GCM实现](https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption)。
>
-> 4)利用OpenSSL库实现SHA256摘要算法,这部分下面有用(可选)。关于SHA256的hash计算可以参考OpenSSL提供的example:https://wiki.openssl.org/index.php/EVP_Message_Digests
+> 4)利用OpenSSL库实现SHA256摘要算法,这部分下面有用(可选)。关于SHA256的hash计算可以参考OpenSSL提供的example:[OpenSSL 信息摘要例子](https://wiki.openssl.org/index.php/EVP_Message_Digests)。
>
> 5)在模型加密环节直接对model文件和params文件的数据内容进行加密后保存到新的文件,为了新的文件能够被区分和可迭代,除了加密后的数据外还添加了头部信息,比如为了判断该文件类型使用固定的魔数作为文件的开头;为了便于后面需求迭代写入版本号以示区别;为了能够在解密时判断是否采用了相同的密钥将加密时的密钥进行SHA256计算后存储;这三部分构成了目前加密后文件的头部信息。加密后的文件包含头部信息 + 密文信息。
>
@@ -80,14 +80,14 @@ Windows平台:
.\paddlex-encryption\tool\paddlex_encrypt_tool.exe -model_dir D:\projects\paddlex_inference_model -save_dir D:\projects\paddlex_encrypted_model
```
-`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
+`-model_dir`用于指定inference模型路径(参考[导出inference模型](../export_model.md)将模型导出为inference格式模型),可使用[导出小度熊识别模型](../export_model.md)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`

## 2. PaddleX C++加密部署
### 2.1 Linux平台使用
-参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.md)编译C++部署代码。编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifier`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
+参考[Linux平台编译指南](cpp/linux.md)编译C++部署代码。编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifier`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
@@ -101,11 +101,12 @@ Windows平台:
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
| batch_size | 预测的批量大小,默认为1 |
| thread_num | 预测的线程数,默认为cpu处理器个数 |
+| use_ir_optim | 是否使用图优化策略,支持值为0或1(默认值为1,图像分割默认值为0)|
### 样例
-可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
+可使用[导出小度熊识别模型](../export_model.md)中的测试图片进行预测。
#### 样例一:
@@ -132,11 +133,11 @@ Windows平台:
`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
### 2.2 Windows平台使用
-参考[Windows平台编译指南](deploy_cpp/deploy_cpp_win_vs2019.md)。需自行下载Windows版PaddleX加密工具压缩包,解压,在编译指南的编译流程基础上,在CMake设置中勾选WITH_ENCRYPTION,ENCRYPTION_DIR填写为加密工具包解压后的目录,再进行编译。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe,paddlex_inference\classifier.exe,paddlex_inference\segmenter.exe。
+参考[Windows平台编译指南](cpp/windows.md)。需自行下载Windows版PaddleX加密工具压缩包,解压,在编译指南的编译流程基础上,在CMake设置中勾选WITH_ENCRYPTION,ENCRYPTION_DIR填写为加密工具包解压后的目录,再进行编译。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe,paddlex_inference\classifier.exe,paddlex_inference\segmenter.exe。
### 样例
-可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
+可使用[导出小度熊识别模型](../export_model.md)中的测试图片进行预测。
#### 样例一:
diff --git a/docs/deploy/server/index.rst b/docs/deploy/server/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..f362afa915f85a832ed1aa152d845333f3764f45
--- /dev/null
+++ b/docs/deploy/server/index.rst
@@ -0,0 +1,11 @@
+服务端部署
+=======================================
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ python.md
+ cpp/index
+ encryption.md
diff --git a/docs/deploy/server/python.md b/docs/deploy/server/python.md
new file mode 100644
index 0000000000000000000000000000000000000000..e30e2f41e40b46dc3c24ec3ea924f2ce66f19095
--- /dev/null
+++ b/docs/deploy/server/python.md
@@ -0,0 +1,59 @@
+# Python部署
+
+PaddleX已经集成了基于Python的高性能预测接口,在安装PaddleX后,可参照如下代码示例,进行预测。
+
+## 导出预测模型
+
+可参考[模型导出](../export_model.md)将模型导出为inference格式。
+
+## 预测部署
+
+预测接口说明可参考[paddlex.deploy](../../apis/deploy.md)
+
+点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz)
+
+* 单张图片预测
+
+```
+import paddlex as pdx
+predictor = pdx.deploy.Predictor('./inference_model')
+result = predictor.predict(image='xiaoduxiong_test_image/JPEGImages/WeChatIMG110.jpeg')
+```
+
+* 批量图片预测
+
+```
+import paddlex as pdx
+predictor = pdx.deploy.Predictor('./inference_model')
+image_list = ['xiaoduxiong_test_image/JPEGImages/WeChatIMG110.jpeg',
+ 'xiaoduxiong_test_image/JPEGImages/WeChatIMG111.jpeg']
+result = predictor.predict(image_list=image_list)
+```
+
+**关于预测速度的说明**:加载模型后前几张图片的预测速度会较慢,这是因为运行启动时涉及到内存显存初始化等步骤,通常在预测20-30张图片后模型的预测速度表现稳定。
+
+## 预测性能对比
+### 测试环境
+
+- CUDA 9.0
+- CUDNN 7.5
+- PaddlePaddle 1.71
+- GPU: Tesla P40
+- AnalysisPredictor 指采用Python的高性能预测方式
+- Executor 指采用PaddlePaddle普通的Python预测方式
+- Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理
+
+### 性能对比
+
+
+| 模型 | AnalysisPredictor耗时 | Executor耗时 | 输入图像大小 |
+| :---- | :--------------------- | :------------ | :------------ |
+| resnet50 | 4.84 | 7.57 | 224*224 |
+| mobilenet_v2 | 3.27 | 5.76 | 224*224 |
+| unet | 22.51 | 34.60 |513*513 |
+| deeplab_mobile | 63.44 | 358.31 |1025*2049 |
+| yolo_mobilenetv2 | 15.20 | 19.54 | 608*608 |
+| faster_rcnn_r50_fpn_1x | 50.05 | 69.58 |800*1088 |
+| faster_rcnn_r50_1x | 326.11 | 347.22 | 800*1067 |
+| mask_rcnn_r50_fpn_1x | 67.49 | 91.02 | 800*1088 |
+| mask_rcnn_r50_1x | 326.11 | 350.94 | 800*1067 |
diff --git a/docs/tutorials/deploy/upgrade_version.md b/docs/deploy/upgrade_version.md
similarity index 95%
rename from docs/tutorials/deploy/upgrade_version.md
rename to docs/deploy/upgrade_version.md
index 3fbe92026593b0f0deb39f0e5b6cd2baa4b953b2..f4affdaadcf5d5d53a1c9ed241507b611e802352 100644
--- a/docs/tutorials/deploy/upgrade_version.md
+++ b/docs/deploy/upgrade_version.md
@@ -9,6 +9,6 @@
## 版本转换
```
-paddlex --export_inference --model_dir=/path/to/low_version_model --save_dir=SSpath/to/high_version_model
+paddlex --export_inference --model_dir=/path/to/low_version_model --save_dir=/path/to/high_version_model
```
`--model_dir`为版本号小于1.0.0的模型路径,可以是PaddleX训练过程保存的模型,也可以是导出为inference格式的模型。`--save_dir`为转换为高版本的模型,后续可用于多端部署。
diff --git a/docs/examples/human_segmentation.md b/docs/examples/human_segmentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..b4c707709c9ea0304a44daec085ea4fa1ca2678c
--- /dev/null
+++ b/docs/examples/human_segmentation.md
@@ -0,0 +1,305 @@
+# 人像分割模型
+
+本教程基于PaddleX核心分割模型实现人像分割,开放预训练模型和测试数据、支持视频流人像分割、提供模型Fine-tune到Paddle Lite移动端部署的全流程应用指南。
+
+## 预训练模型和测试数据
+
+#### 预训练模型
+
+本案例开放了两个在大规模人像数据集上训练好的模型,以满足服务器端场景和移动端场景的需求。使用这些模型可以快速体验视频流人像分割,也可以部署到移动端进行实时人像分割,也可以用于完成模型Fine-tuning。
+
+| 模型类型 | Checkpoint Parameter | Inference Model | Quant Inference Model | 备注 |
+| --- | --- | --- | ---| --- |
+| HumanSeg-server | [humanseg_server_params](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_server_params.tar) | [humanseg_server_inference](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_server_inference.tar) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 输入大小(512, 512) |
+| HumanSeg-mobile | [humanseg_mobile_params](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_mobile_params.tar) | [humanseg_mobile_inference](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_mobile_inference.tar) | [humanseg_mobile_quant](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_mobile_quant.tar) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_small_v1,输入大小(192, 192) |
+
+> * Checkpoint Parameter为模型权重,用于Fine-tuning场景,包含`__params__`模型参数和`model.yaml`基础的模型配置信息。
+> * Inference Model和Quant Inference Model为预测部署模型,包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。
+> * 其中Inference Model适用于服务端的CPU和GPU预测部署,Qunat Inference Model为量化版本,适用于通过Paddle Lite进行移动端等端侧设备部署。
+
+
+预训练模型的存储大小和推理时长如下所示,其中移动端模型的运行环境为cpu:骁龙855,内存:6GB,图片大小:192*192
+
+| 模型 | 模型大小 | 计算耗时 |
+| --- | --- | --- |
+|humanseg_server_inference| 158M | - |
+|humanseg_mobile_inference | 5.8 M | 42.35ms |
+|humanseg_mobile_quant | 1.6M | 24.93ms |
+
+执行以下脚本下载全部的预训练模型:
+
+* 下载PaddleX源码:
+
+```bash
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+* 下载预训练模型的代码位于`PaddleX/examples/human_segmentation`,进入该目录:
+
+```bash
+cd PaddleX/examples/human_segmentation
+```
+
+* 执行下载
+
+```bash
+python pretrain_weights/download_pretrain_weights.py
+```
+
+#### 测试数据
+
+[supervise.ly](https://supervise.ly/)发布了人像分割数据集**Supervisely Persons**, 本案例从中随机抽取一小部分数据并转化成PaddleX可直接加载的数据格式,运行以下代码可下载该数据、以及手机前置摄像头拍摄的人像测试视频`video_test.mp4`.
+
+* 下载测试数据的代码位于`PaddleX/xamples/human_segmentation`,进入该目录并执行下载:
+
+```bash
+python data/download_data.py
+```
+
+## 快速体验视频流人像分割
+
+#### 前置依赖
+
+* PaddlePaddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
+
+安装的相关问题参考[PaddleX安装](../../docs/install.md)
+
+* 下载PaddleX源码:
+
+```bash
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+* 视频流人像分割和背景替换的执行文件均位于`PaddleX/examples/human_segmentation`,进入该目录:
+
+```bash
+cd PaddleX/examples/human_segmentation
+```
+
+### 光流跟踪辅助的视频流人像分割
+
+本案例将DIS(Dense Inverse Search-basedmethod)光流跟踪算法的预测结果与PaddleX的分割结果进行融合,以此改善视频流人像分割的效果。运行以下代码进行体验,以下代码位于`PaddleX/xamples/human_segmentation`:
+
+* 通过电脑摄像头进行实时分割处理
+
+```bash
+python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference
+```
+* 对离线人像视频进行分割处理
+
+```bash
+python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4
+```
+
+视频分割结果如下所示:
+
+
+
+### 人像背景替换
+
+本案例还实现了人像背景替换功能,根据所选背景对人像的背景画面进行替换,背景可以是一张图片,也可以是一段视频。人像背景替换的代码位于`PaddleX/xamples/human_segmentation`,进入该目录并执行:
+
+* 通过电脑摄像头进行实时背景替换处理, 通过'--background_video_path'传入背景视频
+```bash
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --background_image_path data/background.jpg
+```
+
+* 对人像视频进行背景替换处理, 通过'--background_video_path'传入背景视频
+```bash
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4 --background_image_path data/background.jpg
+```
+
+* 对单张图像进行背景替换
+```bash
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --image_path data/human_image.jpg --background_image_path data/background.jpg
+```
+
+背景替换结果如下:
+
+
+
+**注意**:
+* 视频分割处理时间需要几分钟,请耐心等待。
+* 提供的模型适用于手机摄像头竖屏拍摄场景,宽屏效果会略差一些。
+
+## 模型Fine-tune
+
+#### 前置依赖
+
+* PaddlePaddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
+
+安装的相关问题参考[PaddleX安装](../../docs/install.md)
+
+* 下载PaddleX源码:
+
+```bash
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+* 人像分割训练、评估、预测、模型导出、离线量化的执行文件均位于`PaddleX/examples/human_segmentation`,进入该目录:
+
+```bash
+cd PaddleX/examples/human_segmentation
+```
+
+### 模型训练
+
+使用下述命令进行基于预训练模型的模型训练,请确保选用的模型结构`model_type`与模型参数`pretrain_weights`匹配。如果不需要本案例提供的测试数据,可更换数据、选择合适的模型并调整训练参数。
+
+```bash
+# 指定GPU卡号(以0号卡为例)
+export CUDA_VISIBLE_DEVICES=0
+# 若不使用GPU,则将CUDA_VISIBLE_DEVICES指定为空
+# export CUDA_VISIBLE_DEVICES=
+python train.py --model_type HumanSegMobile \
+--save_dir output/ \
+--data_dir data/mini_supervisely \
+--train_list data/mini_supervisely/train.txt \
+--val_list data/mini_supervisely/val.txt \
+--pretrain_weights pretrain_weights/humanseg_mobile_params \
+--batch_size 8 \
+--learning_rate 0.001 \
+--num_epochs 10 \
+--image_shape 192 192
+```
+其中参数含义如下:
+* `--model_type`: 模型类型,可选项为:HumanSegServer和HumanSegMobile
+* `--save_dir`: 模型保存路径
+* `--data_dir`: 数据集路径
+* `--train_list`: 训练集列表路径
+* `--val_list`: 验证集列表路径
+* `--pretrain_weights`: 预训练模型路径
+* `--batch_size`: 批大小
+* `--learning_rate`: 初始学习率
+* `--num_epochs`: 训练轮数
+* `--image_shape`: 网络输入图像大小(w, h)
+
+更多命令行帮助可运行下述命令进行查看:
+```bash
+python train.py --help
+```
+**注意**:可以通过更换`--model_type`变量与对应的`--pretrain_weights`使用不同的模型快速尝试。
+
+### 评估
+
+使用下述命令对模型在验证集上的精度进行评估:
+
+```bash
+python eval.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--val_list data/mini_supervisely/val.txt \
+--image_shape 192 192
+```
+其中参数含义如下:
+* `--model_dir`: 模型路径
+* `--data_dir`: 数据集路径
+* `--val_list`: 验证集列表路径
+* `--image_shape`: 网络输入图像大小(w, h)
+
+### 预测
+
+使用下述命令对测试集进行预测,预测可视化结果默认保存在`./output/result/`文件夹中。
+```bash
+python infer.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--test_list data/mini_supervisely/test.txt \
+--save_dir output/result \
+--image_shape 192 192
+```
+其中参数含义如下:
+* `--model_dir`: 模型路径
+* `--data_dir`: 数据集路径
+* `--test_list`: 测试集列表路径
+* `--image_shape`: 网络输入图像大小(w, h)
+
+### 模型导出
+
+在服务端部署的模型需要首先将模型导出为inference格式模型,导出的模型将包括`__model__`、`__params__`和`model.yml`三个文名,分别为模型的网络结构,模型权重和模型的配置文件(包括数据预处理参数等等)。在安装完PaddleX后,在命令行终端使用如下命令完成模型导出:
+
+```bash
+paddlex --export_inference --model_dir output/best_model \
+--save_dir output/export
+```
+其中参数含义如下:
+* `--model_dir`: 模型路径
+* `--save_dir`: 导出模型保存路径
+
+### 离线量化
+```bash
+python quant_offline.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--quant_list data/mini_supervisely/val.txt \
+--save_dir output/quant_offline \
+--image_shape 192 192
+```
+其中参数含义如下:
+* `--model_dir`: 待量化模型路径
+* `--data_dir`: 数据集路径
+* `--quant_list`: 量化数据集列表路径,一般直接选择训练集或验证集
+* `--save_dir`: 量化模型保存路径
+* `--image_shape`: 网络输入图像大小(w, h)
+
+## Paddle Lite移动端部署
+
+本案例将人像分割模型在移动端进行部署,部署流程展示如下,通用的移动端部署流程参见[Paddle Lite移动端部署](../../docs/deploy/paddlelite/android.md)。
+
+### 1. 将PaddleX模型导出为inference模型
+
+本案例使用humanseg_mobile_quant预训练模型,该模型已经是inference模型,不需要再执行模型导出步骤。如果不使用预训练模型,则执行上一章节`模型训练`中的`模型导出`将自己训练的模型导出为inference格式。
+
+### 2. 将inference模型优化为Paddle Lite模型
+
+下载并解压 [模型优化工具opt](https://bj.bcebos.com/paddlex/deploy/lite/model_optimize_tool_11cbd50e.tar.gz),进入模型优化工具opt所在路径后,执行以下命令:
+
+``` bash
+./opt --model_file= \
+ --param_file= \
+ --valid_targets=arm \
+ --optimize_out_type=naive_buffer \
+ --optimize_out=model_output_name
+```
+
+| 参数 | 说明 |
+| ---- | ---- |
+| --model_file | 导出inference模型中包含的网络结构文件:`__model__`所在的路径|
+| --param_file | 导出inference模型中包含的参数文件:`__params__`所在的路径|
+| --valid_targets |指定模型可执行的backend,这里请指定为`arm`|
+| --optimize_out_type | 输出模型类型,目前支持两种类型:protobuf和naive_buffer,其中naive_buffer是一种更轻量级的序列化/反序列化,这里请指定为`naive_buffer`|
+| --optimize_out | 输出模型的名称 |
+
+更详细的使用方法和参数含义请参考: [使用opt转化模型](https://paddle-lite.readthedocs.io/zh/latest/user_guides/opt/opt_bin.html)
+
+### 3. 移动端预测
+
+PaddleX提供了基于PaddleX Android SDK的安卓demo,可供用户体验图像分类、目标检测、实例分割和语义分割,该demo位于`PaddleX/deploy/lite/android/demo`,用户将模型、配置文件和测试图片拷贝至该demo下进行预测。
+
+#### 3.1 前置依赖
+
+* Android Studio 3.4
+* Android手机或开发板
+
+#### 3.2 拷贝模型、配置文件和测试图片
+
+* 将Lite模型(.nb文件)拷贝到`PaddleX/deploy/lite/android/demo/app/src/main/assets/model/`目录下, 根据.nb文件的名字,修改文件`PaddleX/deploy/lite/android/demo/app/src/main/res/values/strings.xml`中的`MODEL_PATH_DEFAULT`;
+
+* 将配置文件(.yml文件)拷贝到`PaddleX/deploy/lite/android/demo/app/src/main/assets/config/`目录下,根据.yml文件的名字,修改文件`PaddleX/deploy/lite/android/demo/app/src/main/res/values/strings.xml`中的`YAML_PATH_DEFAULT`;
+
+* 将测试图片拷贝到`PaddleX/deploy/lite/android/demo/app/src/main/assets/images/`目录下,根据图片文件的名字,修改文件`PaddleX/deploy/lite/android/demo/app/src/main/res/values/strings.xml`中的`IMAGE_PATH_DEFAULT`。
+
+#### 3.3 导入工程并运行
+
+* 打开Android Studio,在"Welcome to Android Studio"窗口点击"Open an existing Android Studio project",在弹出的路径选择窗口中进入`PaddleX/deploy/lite/android/demo`目录,然后点击右下角的"Open"按钮,导入工程;
+
+* 通过USB连接Android手机或开发板;
+
+* 工程编译完成后,点击菜单栏的Run->Run 'App'按钮,在弹出的"Select Deployment Target"窗口选择已经连接的Android设备,然后点击"OK"按钮;
+
+* 运行成功后,Android设备将加载一个名为PaddleX Demo的App,默认会加载一个测试图片,同时还支持拍照和从图库选择照片进行预测。
+
+测试图片及其分割结果如下所示:
+
+
diff --git a/docs/examples/images/MeterReader_Architecture.jpg b/docs/examples/images/MeterReader_Architecture.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f60b597c7431e1b43fb8d3fc7b168caacd13c37e
Binary files /dev/null and b/docs/examples/images/MeterReader_Architecture.jpg differ
diff --git a/docs/examples/images/PaddleX_Panorama.png b/docs/examples/images/PaddleX_Panorama.png
new file mode 100644
index 0000000000000000000000000000000000000000..363630673daa837420450c27f49b690bf0094349
Binary files /dev/null and b/docs/examples/images/PaddleX_Panorama.png differ
diff --git a/docs/examples/images/beauty.png b/docs/examples/images/beauty.png
new file mode 100644
index 0000000000000000000000000000000000000000..21343fa343a33620981041179e0fc8c6a717c598
Binary files /dev/null and b/docs/examples/images/beauty.png differ
diff --git a/docs/images/image_classification.png b/docs/examples/images/image_classification.png
similarity index 100%
rename from docs/images/image_classification.png
rename to docs/examples/images/image_classification.png
diff --git a/docs/images/instance_segmentation.png b/docs/examples/images/instance_segmentation.png
similarity index 100%
rename from docs/images/instance_segmentation.png
rename to docs/examples/images/instance_segmentation.png
diff --git a/docs/images/object_detection.png b/docs/examples/images/object_detection.png
similarity index 100%
rename from docs/images/object_detection.png
rename to docs/examples/images/object_detection.png
diff --git a/docs/images/semantic_segmentation.png b/docs/examples/images/semantic_segmentation.png
similarity index 100%
rename from docs/images/semantic_segmentation.png
rename to docs/examples/images/semantic_segmentation.png
diff --git a/docs/examples/index.rst b/docs/examples/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..0d660c9af9d0bde92767dd8a422c939abbf1984d
--- /dev/null
+++ b/docs/examples/index.rst
@@ -0,0 +1,14 @@
+产业案例集
+=======================================
+
+PaddleX精选飞桨视觉开发套件在产业实践中的成熟模型结构,提供统一易用的全流程API和模型部署SDK,打通模型在各种硬件设备上的部署流程,开放从模型训练到多端安全部署的全流程案例实践教程。
+
+.. figure:: images/PaddleX_Panorama.png
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ solutions.md
+ meter_reader.md
+ human_segmentation.md
diff --git a/docs/examples/meter_reader.md b/docs/examples/meter_reader.md
new file mode 100644
index 0000000000000000000000000000000000000000..6eabe48aa124672ce33caba16f2e93cdb62edc92
--- /dev/null
+++ b/docs/examples/meter_reader.md
@@ -0,0 +1,272 @@
+# 工业表计读数
+
+本案例基于PaddleX实现对传统机械式指针表计的检测与自动读数功能,开放表计数据和预训练模型,并提供在windows系统的服务器端以及linux系统的jetson嵌入式设备上的部署指南。
+
+## 读数流程
+
+表计读数共分为三个步骤完成:
+
+* 第一步,使用目标检测模型检测出图像中的表计
+* 第二步,使用语义分割模型将各表计的指针和刻度分割出来
+* 第三步,根据指针的相对位置和预知的量程计算出各表计的读数
+
+
+
+* **表计检测**:由于本案例中没有面积较小的表计,所以目标检测模型选择性能更优的**YOLOv3**。考虑到本案例主要在有GPU的设备上部署,所以骨干网路选择精度更高的**DarkNet53**。
+* **刻度和指针分割**:考虑到刻度和指针均为细小区域,语义分割模型选择效果更好的**DeepLapv3**。
+* **读数后处理**:首先,对语义分割的预测类别图进行图像腐蚀操作,以达到刻度细分的目的。然后把环形的表盘展开为矩形图像,根据图像中类别信息生成一维的刻度数组和一维的指针数组。接着计算刻度数组的均值,用均值对刻度数组进行二值化操作。最后定位出指针相对刻度的位置,根据刻度的根数判断表盘的类型以此获取表盘的量程,将指针相对位置与量程做乘积得到表盘的读数。
+
+
+## 表计数据和预训练模型
+
+本案例开放了表计测试图片,用于体验表计读数的预测推理全流程。还开放了表计检测数据集、指针和刻度分割数据集,用户可以使用这些数据集重新训练模型。
+
+| 表计测试图片 | 表计检测数据集 | 指针和刻度分割数据集 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| [meter_test](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_test.tar.gz) | [meter_det](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_det.tar.gz) | [meter_seg](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_seg.tar.gz) |
+
+本案例开放了预先训练好的检测模型和语义分割模型,可以使用这些模型快速体验表计读数全流程,也可以直接将这些模型部署在服务器端或jetson嵌入式设备上进行推理预测。
+
+| 表计检测模型 | 指针和刻度分割模型 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ |
+| [meter_det_inference_model](https://bj.bcebos.com/paddlex/examples/meter_reader/models/meter_det_inference_model.tar.gz) | [meter_seg_inference_model](https://bj.bcebos.com/paddlex/examples/meter_reader/models/meter_seg_inference_model.tar.gz) |
+
+
+## 快速体验表盘读数
+
+可以使用本案例提供的预训练模型快速体验表计读数的自动预测全流程。如果不需要预训练模型,可以跳转至小节`模型训练` 重新训练模型。
+
+#### 前置依赖
+
+* Paddle paddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
+
+安装的相关问题参考[PaddleX安装](../install.md)
+
+#### 测试表计读数
+
+1. 下载PaddleX源码:
+
+```
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+2. 预测执行文件位于`PaddleX/examples/meter_reader/`,进入该目录:
+
+```
+cd PaddleX/examples/meter_reader/
+```
+
+预测执行文件为`reader_infer.py`,其主要参数说明如下:
+
+
+| 参数 | 说明 |
+| ---- | ---- |
+| detector_dir | 表计检测模型路径 |
+| segmenter_dir | 指针和刻度分割模型路径|
+| image | 待预测的图片路径 |
+| image_dir | 存储待预测图片的文件夹路径 |
+| save_dir | 保存可视化结果的路径, 默认值为"output"|
+| score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
+| seg_batch_size | 分割的批量大小,默认为2 |
+| seg_thread_num | 分割预测的线程数,默认为cpu处理器个数 |
+| use_camera | 是否使用摄像头采集图片,默认为False |
+| camera_id | 摄像头设备ID,默认值为0 |
+| use_erode | 是否使用图像腐蚀对分割预测图进行细分,默认为False |
+| erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
+
+3. 预测
+
+若要使用GPU,则指定GPU卡号(以0号卡为例):
+
+```shell
+export CUDA_VISIBLE_DEVICES=0
+```
+若不使用GPU,则将CUDA_VISIBLE_DEVICES指定为空:
+```shell
+export CUDA_VISIBLE_DEVICES=
+```
+
+* 预测单张图片
+
+```shell
+python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image /path/to/meter_test/20190822_168.jpg --save_dir ./output --use_erode
+```
+
+* 预测多张图片
+
+```shell
+python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image_dir /path/to/meter_test --save_dir ./output --use_erode
+```
+
+* 开启摄像头预测
+
+```shell
+python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --save_dir ./output --use_erode --use_camera
+```
+
+## 推理部署
+
+### Windows系统的服务器端安全部署
+
+#### c++部署
+
+1. 下载PaddleX源码:
+
+```
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+2. 将`PaddleX\examples\meter_reader\deploy\cpp`下的`meter_reader`文件夹和`CMakeList.txt`拷贝至`PaddleX\deploy\cpp`目录下,拷贝之前可以将`PaddleX\deploy\cpp`下原本的`CMakeList.txt`做好备份。
+
+3. 按照[Windows平台部署](../deploy/server/cpp/windows.md)中的Step2至Step4完成C++预测代码的编译。
+
+4. 编译成功后,可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
+
+ ```
+ cd PaddleX\deploy\cpp\out\build\x64-Release
+ ```
+
+ 预测程序为paddle_inference\meter_reader.exe,其主要命令参数说明如下:
+
+ | 参数 | 说明 |
+ | ---- | ---- |
+ | det_model_dir | 表计检测模型路径 |
+ | seg_model_dir | 指针和刻度分割模型路径|
+ | image | 待预测的图片路径 |
+ | image_list | 按行存储图片路径的.txt文件 |
+ | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+ | gpu_id | GPU 设备ID, 默认值为0 |
+ | save_dir | 保存可视化结果的路径, 默认值为"output"|
+ | det_key | 检测模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的检测模型 |
+ | seg_key | 分割模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的分割模型 |
+ | seg_batch_size | 分割的批量大小,默认为2 |
+ | thread_num | 分割预测的线程数,默认为cpu处理器个数 |
+ | use_camera | 是否使用摄像头采集图片,支持值为0或1(默认值为0) |
+ | camera_id | 摄像头设备ID,默认值为0 |
+ | use_erode | 是否使用图像腐蚀对分割预测图进行去噪,支持值为0或1(默认值为1) |
+ | erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
+ | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
+
+5. 推理预测:
+
+ 用于部署推理的模型应为inference格式,本案例提供的预训练模型均为inference格式,如若是重新训练的模型,需参考[部署模型导出](../deploy/export_model.md)将模型导出为inference格式。
+
+ * 使用未加密的模型对单张图片做预测
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image=\path\to\meter_test\20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型对图像列表做预测
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image_list=\path\to\meter_test\image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型开启摄像头做预测
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用加密后的模型对单张图片做预测
+
+ 如果未对模型进行加密,请参考[加密PaddleX模型](../deploy/server/encryption.html#paddlex)对模型进行加密。例如加密后的检测模型所在目录为`\path\to\encrypted_det_inference_model`,密钥为`yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0=`;加密后的分割模型所在目录为`\path\to\encrypted_seg_inference_model`,密钥为`DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=`
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\encrypted_det_inference_model --seg_model_dir=\path\to\encrypted_seg_inference_model --image=\path\to\test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
+ ```
+
+### Linux系统的jetson嵌入式设备安全部署
+
+#### c++部署
+
+1. 下载PaddleX源码:
+
+```
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+2. 将`PaddleX/examples/meter_reader/deploy/cpp`下的`meter_reader`文件夹和`CMakeList.txt`拷贝至`PaddleX/deploy/cpp`目录下,拷贝之前可以将`PaddleX/deploy/cpp`下原本的`CMakeList.txt`做好备份。
+
+3. 按照[Nvidia Jetson开发板部署](../deploy/nvidia-jetson.md)中的Step2至Step3完成C++预测代码的编译。
+
+4. 编译成功后,可执行程为`build/meter_reader/meter_reader`,其主要命令参数说明如下:
+
+ | 参数 | 说明 |
+ | ---- | ---- |
+ | det_model_dir | 表计检测模型路径 |
+ | seg_model_dir | 指针和刻度分割模型路径|
+ | image | 待预测的图片路径 |
+ | image_list | 按行存储图片路径的.txt文件 |
+ | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+ | gpu_id | GPU 设备ID, 默认值为0 |
+ | save_dir | 保存可视化结果的路径, 默认值为"output"|
+ | det_key | 检测模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的检测模型 |
+ | seg_key | 分割模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的分割模型 |
+ | seg_batch_size | 分割的批量大小,默认为2 |
+ | thread_num | 分割预测的线程数,默认为cpu处理器个数 |
+ | use_camera | 是否使用摄像头采集图片,支持值为0或1(默认值为0) |
+ | camera_id | 摄像头设备ID,默认值为0 |
+ | use_erode | 是否使用图像腐蚀对分割预测图进行细分,支持值为0或1(默认值为1) |
+ | erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
+ | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
+
+5. 推理预测:
+
+ 用于部署推理的模型应为inference格式,本案例提供的预训练模型均为inference格式,如若是重新训练的模型,需参考[部署模型导出](../deploy/export_model.md)将模型导出为inference格式。
+
+ * 使用未加密的模型对单张图片做预测
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image=/path/to/meter_test/20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型对图像列表做预测
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image_list=/path/to/image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型开启摄像头做预测
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用加密后的模型对单张图片做预测
+
+ 如果未对模型进行加密,请参考[加密PaddleX模型](../deploy/server/encryption.html#paddlex)对模型进行加密。例如加密后的检测模型所在目录为`/path/to/encrypted_det_inference_model`,密钥为`yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0=`;加密后的分割模型所在目录为`/path/to/encrypted_seg_inference_model`,密钥为`DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=`
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/encrypted_det_inference_model --seg_model_dir=/path/to/encrypted_seg_inference_model --image=/path/to/test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
+ ```
+
+
+## 模型训练
+
+
+#### 前置依赖
+
+* Paddle paddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
+
+安装的相关问题参考[PaddleX安装](../install.md)
+
+#### 训练
+
+* 表盘检测的训练
+```
+python3 /path/to/PaddleX/examples/meter_reader/train_detection.py
+```
+* 指针和刻度分割的训练
+
+```
+python3 /path/to/PaddleX/examples/meter_reader/train_segmentation.py
+
+```
+
+运行以上脚本可以训练本案例的检测模型和分割模型。如果不需要本案例的数据和模型参数,可更换数据,选择合适的模型并调整训练参数。
diff --git a/docs/examples/remote_sensing/index.rst b/docs/examples/remote_sensing/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..dc375659be121c4bd04843fd281416a4d00ad865
--- /dev/null
+++ b/docs/examples/remote_sensing/index.rst
@@ -0,0 +1,5 @@
+遥感分割案例
+=======================================
+
+
+这里面写遥感分割案例,可根据需求拆分为多个文档
diff --git a/docs/cv_solutions.md b/docs/examples/solutions.md
old mode 100755
new mode 100644
similarity index 63%
rename from docs/cv_solutions.md
rename to docs/examples/solutions.md
index 4d8482da94423ba5cc4f0695bf3f9669ef5f732a..329d78626b506d2e486d0ed77201f5863e99f40f
--- a/docs/cv_solutions.md
+++ b/docs/examples/solutions.md
@@ -1,10 +1,8 @@
-# PaddleX视觉方案介绍
+# PaddleX模型介绍
-PaddleX针对图像分类、目标检测、实例分割和语义分割4种视觉任务提供了包含模型选择、压缩策略选择、部署方案选择在内的解决方案。用户根据自己的需求选择合适的模型,选择合适的压缩策略来减小模型的计算量和存储体积、加速模型预测推理,最后选择合适的部署方案将模型部署在移动端或者服务器端。
+PaddleX针对图像分类、目标检测、实例分割和语义分割4种视觉任务提供了丰富的模型算法,用户根据在实际场景中的需求选择合适的模型。
-## 模型选择
-
-### 图像分类
+## 图像分类
图像分类任务指的是输入一张图片,模型预测图片的类别,如识别为风景、动物、车等。

@@ -28,7 +26,7 @@ PaddleX针对图像分类、目标检测、实例分割和语义分割4种视觉
包括上述模型,PaddleX支持近20种图像分类模型,其余模型可参考[PaddleX模型库](../appendix/model_zoo.md)
-### 目标检测
+## 目标检测
目标检测任务指的是输入图像,模型识别出图像中物体的位置(用矩形框框出来,并给出框的位置),和物体的类别,如在手机等零件质检中,用于检测外观上的瑕疵等。

@@ -63,11 +61,11 @@ PaddleX目前提供了实例分割MaskRCNN模型,支持5种不同的backbone
| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| Box mmAP | Seg mmAP |
| :---- | :------- | :---------- | :---------- | :----- | :----- | :--- |:--- |
-| MaskRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | - | - | - | - | 37.0 | 33.4 |
-| MaskRCNN-ResNet50-FPN | 精度较高,适合大多数的服务器端场景| 185.5M | - | - | - | 37.9 | 34.2 |
-| MaskRCNN-ResNet101_vd-FPN | 高精度但预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 268.6M | - | - | - | 41.4 | 36.8 |
+| MaskRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | 143.9MB | - | - | - | 38.2 | 33.4 |
+| MaskRCNN-ResNet50-FPN | 精度较高,适合大多数的服务器端场景| 177.7M | - | - | - | 38.7 | 34.7 |
+| MaskRCNN-ResNet101_vd-FPN | 高精度但预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 253.7M | - | - | - | 41.4 | 36.8 |
-### 语义分割
+## 语义分割
语义分割用于对图像做像素级的分类,应用在人像分类、遥感图像识别等场景。

@@ -85,48 +83,3 @@ PaddleX目前提供了实例分割MaskRCNN模型,支持5种不同的backbone
| FastSCNN | 轻量高速,适用于追求高速预测的移动端或服务器端场景 | - | - | - | 69.64 |
| HRNet_W18 | 高精度模型,适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景| - | - | - | 79.36 |
| DeepLabv3p-Xception65 | 高精度但预测时间更长,在处理较大数据量时有较高的精度,适用于服务器且背景复杂的场景| - | - | - | 79.3% |
-
-## 压缩策略选择
-
-PaddleX提供包含模型剪裁、定点量化的模型压缩策略来减小模型的计算量和存储体积,加快模型部署后的预测速度。使用不同压缩策略在图像分类、目标检测和语义分割模型上的模型精度和预测速度详见以下内容,用户可以选择根据自己的需求选择合适的压缩策略,进一步优化模型的性能。
-
-| 压缩策略 | 策略特点 |
-| :---- | :------- |
-| 量化 | 较为显著地减少模型的存储体积,适用于移动端或服务期端TensorRT部署,在移动端对于MobileNet系列模型有明显的加速效果 |
-| 剪裁 | 能够去除冗余的参数,达到显著减少参数计算量和模型体积的效果,提升模型的预测性能,适用于CPU部署或移动端部署(GPU上无明显加速效果) |
-| 先剪裁后量化 | 可以进一步提升模型的预测性能,适用于移动端或服务器端TensorRT部署 |
-
-### 性能对比
-
-* 表中各指标的格式为XXX/YYY,XXX表示未采取压缩策略时的指标,YYY表示压缩后的指标
-* 分类模型的准确率指的是ImageNet-1000数据集上的Top1准确率(模型输入大小为224x224),检测模型的准确率指的是COCO2017数据集上的mmAP(模型输入大小为608x608),分割模型的准确率指的是Cityscapes数据集上mIOU(模型输入大小为769x769)
-* 量化策略中,PaddleLiter推理环境为Qualcomm SnapDragon 855 + armv8,速度指标为Thread4耗时
-* 剪裁策略中,PaddleLiter推理环境为Qualcomm SnapDragon 845 + armv8,速度指标为Thread4耗时
-
-
-| 模型 | 压缩策略 | 存储体积(MB) | 准确率(%) | PaddleLite推理耗时(ms) |
-| :--: | :------: | :------: | :----: | :----------------: |
-| MobileNetV1 | 量化 | 17/4.4 | 70.99/70.18 | 10.0811/4.2934 |
-| MobileNetV1 | 剪裁 -30% | 17/12 | 70.99/70.4 | 19.5762/13.6982 |
-| YOLOv3-MobileNetV1 | 量化 | 95/25 | 29.3/27.9 | - |
-| YOLOv3-MobileNetV1 | 剪裁 -51.77% | 95/25 | 29.3/26 | - |
-| Deeplabv3-MobileNetV2 | 量化 | 7.4/1.8 | 63.26/62.03 | 593.4522/484.0018 |
-| FastSCNN | 剪裁 -47.60% | 11/5.7 | 69.64/66.68 | 415.664/291.748 |
-
-更多模型在不同设备上压缩前后的指标对比详见[PaddleX压缩模型库](appendix/slim_model_zoo.md)
-
-压缩策略的具体使用流程详见[模型压缩](tutorials/compress)
-
-**注意:PaddleX中全部图像分类模型和语义分割模型都支持量化和剪裁操作,目标检测仅有YOLOv3支持量化和剪裁操作。**
-
-## 模型部署
-
-PaddleX提供服务器端python部署、服务器端c++部署、服务器端加密部署、OpenVINO部署、移动端部署共5种部署方案,用户可以根据自己的需求选择合适的部署方案,点击以下链接了解部署的具体流程。
-
-| 部署方案 | 部署流程 |
-| :------: | :------: |
-| 服务器端python部署 | [部署流程](tutorials/deploy/deploy_server/deploy_python.html)|
-| 服务器端c++部署 | [部署流程](tutorials/deploy/deploy_server/deploy_cpp/) |
-| 服务器端加密部署 | [部署流程](tutorials/deploy/deploy_server/encryption.html) |
-| OpenVINO部署 | [部署流程](tutorials/deploy/deploy_openvino.html) |
-| 移动端部署 | [部署流程](tutorials/deploy/deploy_lite.html) |
diff --git a/docs/paddlex_gui/download.md b/docs/gui/download.md
similarity index 99%
rename from docs/paddlex_gui/download.md
rename to docs/gui/download.md
index bf5d2ceaeadfc14612d2d83498796108469ae166..77bb9962b37498ec3279a51cdc1faa34da1f498b 100644
--- a/docs/paddlex_gui/download.md
+++ b/docs/gui/download.md
@@ -25,3 +25,4 @@
* **硬盘空间**:建议SSD剩余空间1T以上(非必须)
***注:PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
+
diff --git a/docs/gui/faq.md b/docs/gui/faq.md
new file mode 100644
index 0000000000000000000000000000000000000000..f90bcbf7dd878ecfcae077cb2cf07bd851ae03b4
--- /dev/null
+++ b/docs/gui/faq.md
@@ -0,0 +1,36 @@
+## FAQ
+
+1. **为什么训练速度这么慢?**
+
+ PaddleX完全采用您本地的硬件进行计算,深度学习任务确实对算力要求较高,为了使您能快速体验应用PaddleX进行开发,我们适配了CPU硬件,但强烈建议您使用GPU以提升训练速度和开发体验。
+
+
+
+2. **我可以在服务器或云平台上部署PaddleX么?**
+
+ PaddleX GUI是一个适配本地单机安装的客户端,无法在服务器上直接进行部署,您可以直接使用PaddleX API,或采用飞桨核心框架进行服务器上的部署。如果您希望使用公有算力,强烈建议您尝试飞桨产品系列中的 [EasyDL](https://ai.baidu.com/easydl/) 或 [AI Studio](https://aistudio.baidu.com/aistudio/index)进行开发。
+
+
+
+3. **PaddleX支持EasyData标注的数据吗?**
+
+ 支持,PaddleX可顺畅读取EasyData标注的数据。但当前版本的PaddleX GUI暂时无法支持直接导入EasyData数据格式,您可以参照文档,将[数据集进行转换](https://paddlex.readthedocs.io/zh_CN/latest/appendix/how_to_convert_dataset.html)再导入PaddleX GUI进行后续开发。
+ 同时,我们也在紧密开发PaddleX GUI可直接导入EasyData数据格式的功能。
+
+
+
+4. **为什么模型裁剪分析耗时这么长?**
+
+ 模型裁剪分析过程是对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪。此过程需要重复多次直至FLOPS满足要求,最后再进行精调训练获得最终裁剪后的模型,因此耗时较长。有关模型裁剪的原理,可参见文档[剪裁原理介绍](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
+
+
+
+5. **如何调用后端代码?**
+
+ PaddleX 团队为您整理了相关的API接口文档,方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html)
+
+
+
+**如果您有任何问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求**
+
+
\ No newline at end of file
diff --git a/docs/gui/how_to_use.md b/docs/gui/how_to_use.md
new file mode 100644
index 0000000000000000000000000000000000000000..0832cd84e6ef47a419186c35a07c4e662ea9e9e2
--- /dev/null
+++ b/docs/gui/how_to_use.md
@@ -0,0 +1,79 @@
+# PaddleX GUI使用教程
+
+**第一步:准备数据**
+
+在开始模型训练前,您需要根据不同的任务类型,将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)。
+
+
+
+**第二步:导入我的数据集**
+
+①数据标注完成后,您需要根据不同的任务,将数据和标注文件,按照客户端提示更名并保存到正确的文件中。
+
+②在客户端新建数据集,选择与数据集匹配的任务类型,并选择数据集对应的路径,将数据集导入。
+
+
+
+③选定导入数据集后,客户端会自动校验数据及标注文件是否合规,校验成功后,您可根据实际需求,将数据集按比例划分为训练集、验证集、测试集。
+
+④您可在「数据分析」模块按规则预览您标注的数据集,双击单张图片可放大查看。
+
+
+
+**第三步:创建项目**
+
+① 在完成数据导入后,您可以点击「新建项目」创建一个项目。
+
+② 您可根据实际任务需求选择项目的任务类型,需要注意项目所采用的数据集也带有任务类型属性,两者需要进行匹配。
+
+
+
+
+
+**第四步:项目开发**
+
+① **数据选择**:项目创建完成后,您需要选择已载入客户端并校验后的数据集,并点击下一步,进入参数配置页面。
+
+
+
+② **参数配置**:主要分为**模型参数**、**训练参数**、**优化策略**三部分。您可根据实际需求选择模型结构、骨架网络及对应的训练参数、优化策略,使得任务效果最佳。
+
+
+
+参数配置完成后,点击启动训练,模型开始训练并进行效果评估。
+
+③ **训练可视化**:在训练过程中,您可通过VisualDL查看模型训练过程参数变化、日志详情,及当前最优的训练集和验证集训练指标。模型在训练过程中通过点击"中止训练"随时中止训练过程。
+
+
+
+模型训练结束后,可选择进入『模型剪裁分析』或者直接进入『模型评估』。
+
+
+
+④ **模型裁剪**:如果开发者希望减少模型的体积、计算量,提升模型在设备上的预测性能,可以采用PaddleX提供的模型裁剪策略。裁剪过程将对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪,再进行精调训练获得最终裁剪后的模型。
+
+
+
+⑤ **模型评估**:在模型评估页面,您可查看训练后的模型效果。评估方法包括混淆矩阵、精度、召回率等。
+
+
+
+您还可以选择『数据集切分』时留出的『测试数据集』或从本地文件夹中导入一张/多张图片,将训练后的模型进行测试。根据测试结果,您可决定是否将训练完成的模型保存为预训练模型并进入模型发布页面,或返回先前步骤调整参数配置重新进行训练。
+
+
+
+
+
+**第五步:模型发布**
+
+当模型效果满意后,您可根据实际的生产环境需求,选择将模型发布为需要的版本。
+
+
+
+
+
+
+
+**如果您有任何问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求**
+
+
diff --git a/docs/paddlex_gui/images/QR.jpg b/docs/gui/images/QR.jpg
similarity index 100%
rename from docs/paddlex_gui/images/QR.jpg
rename to docs/gui/images/QR.jpg
diff --git a/docs/paddlex_gui/images/ReadMe b/docs/gui/images/ReadMe
similarity index 100%
rename from docs/paddlex_gui/images/ReadMe
rename to docs/gui/images/ReadMe
diff --git a/docs/paddlex_gui/images/dataset2.jpg b/docs/gui/images/dataset2.jpg
similarity index 100%
rename from docs/paddlex_gui/images/dataset2.jpg
rename to docs/gui/images/dataset2.jpg
diff --git a/docs/paddlex_gui/images/datasets1.jpg b/docs/gui/images/datasets1.jpg
similarity index 100%
rename from docs/paddlex_gui/images/datasets1.jpg
rename to docs/gui/images/datasets1.jpg
diff --git a/docs/images/paddlex.png b/docs/gui/images/paddlex.png
similarity index 100%
rename from docs/images/paddlex.png
rename to docs/gui/images/paddlex.png
diff --git a/docs/paddlex_gui/images/project1.jpg b/docs/gui/images/project1.jpg
similarity index 100%
rename from docs/paddlex_gui/images/project1.jpg
rename to docs/gui/images/project1.jpg
diff --git a/docs/paddlex_gui/images/project2.jpg b/docs/gui/images/project2.jpg
similarity index 100%
rename from docs/paddlex_gui/images/project2.jpg
rename to docs/gui/images/project2.jpg
diff --git a/docs/paddlex_gui/images/project3.jpg b/docs/gui/images/project3.jpg
similarity index 100%
rename from docs/paddlex_gui/images/project3.jpg
rename to docs/gui/images/project3.jpg
diff --git a/docs/paddlex_gui/images/publish.jpg b/docs/gui/images/publish.jpg
similarity index 100%
rename from docs/paddlex_gui/images/publish.jpg
rename to docs/gui/images/publish.jpg
diff --git a/docs/paddlex_gui/images/visualization1.jpg b/docs/gui/images/visualization1.jpg
similarity index 100%
rename from docs/paddlex_gui/images/visualization1.jpg
rename to docs/gui/images/visualization1.jpg
diff --git a/docs/paddlex_gui/images/visualization2.jpg b/docs/gui/images/visualization2.jpg
similarity index 100%
rename from docs/paddlex_gui/images/visualization2.jpg
rename to docs/gui/images/visualization2.jpg
diff --git a/docs/paddlex_gui/images/visualization3.jpg b/docs/gui/images/visualization3.jpg
similarity index 100%
rename from docs/paddlex_gui/images/visualization3.jpg
rename to docs/gui/images/visualization3.jpg
diff --git a/docs/paddlex_gui/images/visualization4.jpg b/docs/gui/images/visualization4.jpg
similarity index 100%
rename from docs/paddlex_gui/images/visualization4.jpg
rename to docs/gui/images/visualization4.jpg
diff --git a/docs/paddlex_gui/images/visualization5.jpg b/docs/gui/images/visualization5.jpg
similarity index 100%
rename from docs/paddlex_gui/images/visualization5.jpg
rename to docs/gui/images/visualization5.jpg
diff --git a/docs/gui/index.rst b/docs/gui/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..a42ac0441fcb324ee2338367b854462c707c9d59
--- /dev/null
+++ b/docs/gui/index.rst
@@ -0,0 +1,51 @@
+PaddleX GUI
+=======================================
+
+PaddleX GUI是基于PaddleX实现的可视化开发客户端。开发者可以点选、键入的方式快速体验深度学习模型开发的全流程。不仅可以作为您提升深度学习模型开发效率的工具,更可以作为您们应用PaddleX API搭建专属的行业软件/应用的示例参照。
+
+PaddleX GUI 当前提供Windows,Mac,Ubuntu三种版本一键绿色安装的方式。请至[飞桨官网](https://www.paddlepaddle.org.cn/)下载您需要的版本。
+
+功能简介
+---------------------------------------
+PaddleX GUI是PaddleX API的衍生品,它在集成API功能的基础上,额外提供了可视化分析、评估等附加功能,致力于为开发者带来极致顺畅的开发体验。其拥有以下独特的功能:
+
+- **全流程打通**
+PaddleX GUI覆盖深度学习模型开发必经的**数据处理**、**超参配置**、**模型训练及优化**、**模型发布**全流程,无需开发一行代码,即可得到高性深度学习推理模型。
+
+- **数据集智能分析**
+详细的数据结构说明,并提供**数据标签自动校验**。支持**可视化数据预览**、**数据分布图表展示**、**一键数据集切分**等实用功能
+
+- **自动超参推荐**
+集成飞桨团队长时间产业实践经验,根据用户选择的模型类别、骨架网络等,提供多种针对性优化的**预训练模型**,并**提供推荐超参配置**,可**一键开启多种优化策略**
+
+- **可视化模型评估**
+集成**可视化分析工具:VisualDL**, 以线性图表的形式展示acc、lr等关键参数在训练过程中的变化趋势。提供**混淆矩阵**等实用方法,帮助快速定位问题,加速调参。模型评估报告一键导出,方便项目复盘分析。
+
+- **模型裁剪及量化**
+一键启动模型裁剪、量化,在不同阶段为开发者提供模型优化的策略,满足不同环境对模型性能的需求。
+
+- **预训练模型管理**
+可对历史训练模型进行保存及管理,未进行裁剪的模型可以保存为预训练模型,在后续任务中使用。
+
+- **可视化模型测试**
+客户端直接展示模型预测效果,无需上线即可进行效果评估
+
+- **模型多端部署**
+点选式选择模型发布平台、格式,一键导出预测模型,并匹配完善的模型预测部署说明文档,贴心助力产业端到端项目落地
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: 文档目录:
+
+ download.md
+ how_to_use.md
+ faq.md
+
+
+* PaddleX GUI版本: v1.0
+* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
+* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop
+* 官方QQ用户群: 1045148026
+* GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues
+
diff --git a/docs/images/00_loaddata.png b/docs/images/00_loaddata.png
deleted file mode 100755
index d837e655b323943d1c094651d959c727e3b97145..0000000000000000000000000000000000000000
Binary files a/docs/images/00_loaddata.png and /dev/null differ
diff --git a/docs/images/01_datasplit.png b/docs/images/01_datasplit.png
deleted file mode 100755
index 6e415e7d81c2021b8e7f842d5a5a9b6f79c83b08..0000000000000000000000000000000000000000
Binary files a/docs/images/01_datasplit.png and /dev/null differ
diff --git a/docs/images/02_newproject.png b/docs/images/02_newproject.png
deleted file mode 100755
index adf65d113eb7f6d644a5aedbd051856a0f9f3f28..0000000000000000000000000000000000000000
Binary files a/docs/images/02_newproject.png and /dev/null differ
diff --git a/docs/images/03_choosedata.png b/docs/images/03_choosedata.png
deleted file mode 100755
index d9b0c83ec75978328e1f995b1d6f56a5ee4b5052..0000000000000000000000000000000000000000
Binary files a/docs/images/03_choosedata.png and /dev/null differ
diff --git a/docs/images/04_parameter.png b/docs/images/04_parameter.png
deleted file mode 100755
index 398c74c1fc3a00eb8ac1ceb7d811887584fcbbbe..0000000000000000000000000000000000000000
Binary files a/docs/images/04_parameter.png and /dev/null differ
diff --git a/docs/images/05_train.png b/docs/images/05_train.png
deleted file mode 100755
index a299238432098648259d622fb4d6017790478ea8..0000000000000000000000000000000000000000
Binary files a/docs/images/05_train.png and /dev/null differ
diff --git a/docs/images/06_VisualDL.png b/docs/images/06_VisualDL.png
deleted file mode 100755
index 3e9642f07809b85fe1652f81916ce5f3928e1c92..0000000000000000000000000000000000000000
Binary files a/docs/images/06_VisualDL.png and /dev/null differ
diff --git a/docs/images/07_evaluate.png b/docs/images/07_evaluate.png
deleted file mode 100755
index b663009afc974f87101825496ec4b0beac067953..0000000000000000000000000000000000000000
Binary files a/docs/images/07_evaluate.png and /dev/null differ
diff --git a/docs/images/08_deploy.png b/docs/images/08_deploy.png
deleted file mode 100755
index f0f6cbfedef8a58d6f7cc0e4105ca32bb8002031..0000000000000000000000000000000000000000
Binary files a/docs/images/08_deploy.png and /dev/null differ
diff --git a/docs/images/PaddleX-Pipe-Line.png b/docs/images/PaddleX-Pipe-Line.png
deleted file mode 100755
index 7831d256a7159d465a8cfd4977430639b30b9829..0000000000000000000000000000000000000000
Binary files a/docs/images/PaddleX-Pipe-Line.png and /dev/null differ
diff --git a/docs/images/QQGroup.jpeg b/docs/images/QQGroup.jpeg
deleted file mode 100755
index de6fa4fd70aee1631cc99e6fd1414287723ccdb2..0000000000000000000000000000000000000000
Binary files a/docs/images/QQGroup.jpeg and /dev/null differ
diff --git a/docs/images/garbage.bmp b/docs/images/garbage.bmp
deleted file mode 100755
index ba652f41584ed43abf0431208c6de597a4cacd7b..0000000000000000000000000000000000000000
Binary files a/docs/images/garbage.bmp and /dev/null differ
diff --git a/docs/images/mask_eval.png b/docs/images/mask_eval.png
deleted file mode 100755
index 6365f081de680dec735c3c30e03ab468e88315a7..0000000000000000000000000000000000000000
Binary files a/docs/images/mask_eval.png and /dev/null differ
diff --git a/docs/images/normlime.png b/docs/images/normlime.png
deleted file mode 100644
index dd9a2f8f96a3ade26179010f340c7c5185bf0656..0000000000000000000000000000000000000000
Binary files a/docs/images/normlime.png and /dev/null differ
diff --git a/docs/images/paddlex.jpg b/docs/images/paddlex.jpg
deleted file mode 100755
index 421e9e1155fb347a2e57f5e4a21f0a4e1ddbd21c..0000000000000000000000000000000000000000
Binary files a/docs/images/paddlex.jpg and /dev/null differ
diff --git a/docs/images/vdl1.jpg b/docs/images/vdl1.jpg
deleted file mode 100644
index 5b0c90d28bc9bda583008fe2fb9729a7c3e06df6..0000000000000000000000000000000000000000
Binary files a/docs/images/vdl1.jpg and /dev/null differ
diff --git a/docs/images/vdl2.jpg b/docs/images/vdl2.jpg
deleted file mode 100644
index 502a5f861104e2b20869b06cf8eb215ec58f0435..0000000000000000000000000000000000000000
Binary files a/docs/images/vdl2.jpg and /dev/null differ
diff --git a/docs/images/vdl3.jpg b/docs/images/vdl3.jpg
deleted file mode 100644
index a16d6924d8867949ecae258ee588296845c6da86..0000000000000000000000000000000000000000
Binary files a/docs/images/vdl3.jpg and /dev/null differ
diff --git a/docs/images/visualized_deeplab.jpg b/docs/images/visualized_deeplab.jpg
deleted file mode 100755
index b417728e3385f6eb83885f388c988e2893966e42..0000000000000000000000000000000000000000
Binary files a/docs/images/visualized_deeplab.jpg and /dev/null differ
diff --git a/docs/images/visualized_fasterrcnn.jpg b/docs/images/visualized_fasterrcnn.jpg
deleted file mode 100755
index fe1ca0ed6346ef3e62206527ce23577554596e4c..0000000000000000000000000000000000000000
Binary files a/docs/images/visualized_fasterrcnn.jpg and /dev/null differ
diff --git a/docs/images/visualized_maskrcnn.jpeg b/docs/images/visualized_maskrcnn.jpeg
deleted file mode 100755
index dead77b758aaa5e3755847c4af295be8b1d774be..0000000000000000000000000000000000000000
Binary files a/docs/images/visualized_maskrcnn.jpeg and /dev/null differ
diff --git a/docs/index.rst b/docs/index.rst
index 1d8d9c0c124dd0015e7b90634fcb1b1551db87f5..61220dbc9c2fb4046c91187b6f19d0e88d5c1951 100755
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,37 +3,23 @@
PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习全流程开发工具。具备 **全流程打通** 、**融合产业实践** 、**易用易集成** 三大特点。
-全流程打通
- | - **数据准备**: 支持 `EasyData智能数据服务平台数据协议 `_ ,通过平台便捷完成智能标注,低质数据清洗工作;同时兼容主流标注工具协议, 助力开发者更快完成数据准备工作。
- | - **模型训练**: 基于飞桨核心框架集成 `PaddleClas `_ ,`PaddleDetection `_ ,`PaddleSeg `_ 视觉开发套件,丰富的高质量预训练模型,更快实现工业级模型训练。
- | - **模型调优**: 内置模型可解释性模块、`VisualDL `_ 可视化分析组件,提供丰富的信息更好地理解模型,优化模型。
- | - **多端安全部署**: 内置 `PaddleSlim `_ 模型压缩工具和AES模型加密SDK,结合Paddle Inference和 `Paddle Lite `_ 便捷完成高性能的多端安全部署。
-
-融合产业实践
- | - 精选飞桨产业实践的成熟模型结构,开放案例实践教程,加速开发者产业落地。
+* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
+* 项目GitHub: https://github.com/PaddlePaddle/PaddleX
+* 官方QQ用户群: 1045148026
+* GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues
-易用易集成
- | - 统一易用的全流程API,5步完成模型训练,10行代码实现Python/C++高性能部署。
- | - 提供以PaddleX为核心集成的跨平台可视化开发工具PaddleX-GUI,更低门槛快速体验飞桨深度学习全流程。
.. toctree::
:maxdepth: 2
- :caption: 文档目录:
+ :caption: PaddleX使用文档目录
quick_start.md
install.md
- tutorials/index.rst
- cv_solutions.md
- apis/index.rst
- paddlex_gui/index.rst
- tuning_strategy/index.rst
- update.md
- FAQ.md
- appendix/index.rst
-
-* PaddleX版本: v1.0.0
-* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
-* 项目GitHub: https://github.com/PaddlePaddle/PaddleX
-* 官方QQ用户群: 1045148026
-* GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues
-
+ data/index
+ train/index
+ deploy/index
+ examples/index
+ gui/index
+ apis/index
+ change_log.md
+ appendix/index
diff --git a/docs/install.md b/docs/install.md
index cdbc84cab3dc825e69e325348b3c528d83bcd225..6bd4000ac5ff37d48f1e3228e852ed7c27ced8d0 100755
--- a/docs/install.md
+++ b/docs/install.md
@@ -1,19 +1,21 @@
# 快速安装
-以下安装过程默认用户已安装好**paddlepaddle-gpu或paddlepaddle(版本大于或等于1.7.1)**,paddlepaddle安装方式参照[飞桨官网](https://www.paddlepaddle.org.cn/install/quick)
-
-> 推荐使用Anaconda Python环境,Anaconda下安装PaddleX参考文档[Anaconda安装使用](../appendix/anaconda_install.md)
+以下安装过程默认用户已安装好**paddlepaddle-gpu或paddlepaddle(版本大于或等于1.8.1)**,paddlepaddle安装方式参照[飞桨官网](https://www.paddlepaddle.org.cn/install/quick)
## pip安装
-> 注意其中pycocotools在Windows安装较为特殊,可参考下面的Windows安装命令
+注意其中pycocotools在Windows安装较为特殊,可参考下面的Windows安装命令
```
pip install paddlex -i https://mirror.baidu.com/pypi/simple
```
+## Anaconda安装
+Anaconda是一个开源的Python发行版本,其包含了conda、Python等180多个科学包及其依赖项。使用Anaconda可以通过创建多个独立的Python环境,避免用户的Python环境安装太多不同版本依赖导致冲突。
+- 参考[Anaconda安装PaddleX文档](appendix/anaconda_install.md)
+
+## 代码安装
-## Github代码安装
github代码会跟随开发进度不断更新
```
@@ -24,18 +26,20 @@ python setup.py install
```
-## 安装问题
-### 1. pycocotools安装问题
-> PaddleX依赖pycocotools包,如安装pycocotools失败,可参照如下方式安装pycocotools
+## pycocotools安装问题
+
+PaddleX依赖pycocotools包,如安装pycocotools失败,可参照如下方式安装pycocotools
+
+* Windows安装时可能会提示`Microsoft Visual C++ 14.0 is required`,从而导致安装出错,[点击下载VC build tools](https://go.microsoft.com/fwlink/?LinkId=691126)安装再执行如下pip命令
+> 注意:安装完后,需要重新打开新的终端命令窗口
-**Windows**
-> Windows安装时可能会提示缺少`Microsoft Visual C++ 2015 build tools`,[点击下载](https://go.microsoft.com/fwlink/?LinkId=691126)安装再执行如下pip命令
```
pip install cython
pip install git+https://gitee.com/jiangjiajun/philferriere-cocoapi.git#subdirectory=PythonAPI
```
-**Linux/Mac安装**
+* Linux/Mac系统下,直接使用pip安装如下两个依赖即可
+
```
pip install cython
pip install pycocotools
diff --git a/docs/metrics.md b/docs/metrics.md
deleted file mode 100644
index 7834a43a00028935ccda5bdb89db5c15e886240c..0000000000000000000000000000000000000000
--- a/docs/metrics.md
+++ /dev/null
@@ -1 +0,0 @@
-本页面已移至 [这里](./appendix/metrics.md)
diff --git a/docs/model_zoo.md b/docs/model_zoo.md
deleted file mode 100644
index e0e5c51b5922df35a89a69a1103d7f54edb73afd..0000000000000000000000000000000000000000
--- a/docs/model_zoo.md
+++ /dev/null
@@ -1 +0,0 @@
-本页面已移至 [这里](./appendix/model_zoo.md)
diff --git a/docs/paddlex.png b/docs/paddlex.png
new file mode 100644
index 0000000000000000000000000000000000000000..bc1135abfab7aa48f29392da4bca614f688314af
Binary files /dev/null and b/docs/paddlex.png differ
diff --git a/docs/paddlex_gui/how_to_use.md b/docs/paddlex_gui/how_to_use.md
deleted file mode 100644
index db5e9b1f58b3012e1104a7dfe8ff63394ecf3eee..0000000000000000000000000000000000000000
--- a/docs/paddlex_gui/how_to_use.md
+++ /dev/null
@@ -1,148 +0,0 @@
-# PaddleX GUI使用文档
-
-飞桨全流程开发工具,集飞桨核心框架、模型库、工具及组件等深度学习开发全流程所需能力于一身,易用易集成,是开发者快速入门深度学习、提升深度学习项目开发效率的最佳辅助工具。
-
-PaddleX GUI是一个应用PaddleX实现的一个图形化开发客户端产品,它使得开发者通过键入式输入即可完成深度学习模型全流程开发,可大幅度提升项目开发效率。飞桨团队期待各位开发者基于PaddleX,实现出各种符合自己产业实际需求的产品。
-
-我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddlex)下载试用PaddleX GUI,并获得您宝贵的意见或开源项目贡献。
-
-
-
-## 目录
-
-* **产品特性**
-* **PaddleX GUI可视化前端**
-* **FAQ**
-
-
-
-## 产品特性
-
-\- **全流程打通**
-
-将深度学习开发全流程打通,并提供可视化开发界面, 省去了对各环节API的熟悉过程及重复的代码开发,极大地提升了开发效率。
-
-\- **易用易集成**
-
-提供功能最全、最灵活的Python API开发模式,完全开源开放,易于集成和二次开发。键入式输入的图形化开发界面,使得非专业算法人员也可快速进行业务POC。
-
-\- **融合产业实践经验**
-
-融合飞桨产业落地经验,精选高质量的视觉模型方案,开放实际的案例教学,手把手带您实现产业需求落地。
-
-\- **教程与服务**
-
-从数据集准备到上线部署,为您提供业务开发全流程的文档说明及技术服务。开发者可以通过QQ群、微信群、GitHub社区等多种形式与飞桨团队及同业合作伙伴交流沟通。
-
-
-
-## PaddleX GUI 可视化前端
-
-**第一步:准备数据**
-
-在开始模型训练前,您需要根据不同的任务类型,将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)。
-
-
-
-**第二步:导入我的数据集**
-
-①数据标注完成后,您需要根据不同的任务,将数据和标注文件,按照客户端提示更名并保存到正确的文件中。
-
-②在客户端新建数据集,选择与数据集匹配的任务类型,并选择数据集对应的路径,将数据集导入。
-
-
-
-③选定导入数据集后,客户端会自动校验数据及标注文件是否合规,校验成功后,您可根据实际需求,将数据集按比例划分为训练集、验证集、测试集。
-
-④您可在「数据分析」模块按规则预览您标注的数据集,双击单张图片可放大查看。
-
-
-
-**第三步:创建项目**
-
-① 在完成数据导入后,您可以点击「新建项目」创建一个项目。
-
-② 您可根据实际任务需求选择项目的任务类型,需要注意项目所采用的数据集也带有任务类型属性,两者需要进行匹配。
-
-
-
-
-
-**第四步:项目开发**
-
-① **数据选择**:项目创建完成后,您需要选择已载入客户端并校验后的数据集,并点击下一步,进入参数配置页面。
-
-
-
-② **参数配置**:主要分为**模型参数**、**训练参数**、**优化策略**三部分。您可根据实际需求选择模型结构、骨架网络及对应的训练参数、优化策略,使得任务效果最佳。
-
-
-
-参数配置完成后,点击启动训练,模型开始训练并进行效果评估。
-
-③ **训练可视化**:在训练过程中,您可通过VisualDL查看模型训练过程参数变化、日志详情,及当前最优的训练集和验证集训练指标。模型在训练过程中通过点击"中止训练"随时中止训练过程。
-
-
-
-模型训练结束后,可选择进入『模型剪裁分析』或者直接进入『模型评估』。
-
-
-
-④ **模型裁剪**:如果开发者希望减少模型的体积、计算量,提升模型在设备上的预测性能,可以采用PaddleX提供的模型裁剪策略。裁剪过程将对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪,再进行精调训练获得最终裁剪后的模型。
-
-
-
-⑤ **模型评估**:在模型评估页面,您可查看训练后的模型效果。评估方法包括混淆矩阵、精度、召回率等。
-
-
-
-您还可以选择『数据集切分』时留出的『测试数据集』或从本地文件夹中导入一张/多张图片,将训练后的模型进行测试。根据测试结果,您可决定是否将训练完成的模型保存为预训练模型并进入模型发布页面,或返回先前步骤调整参数配置重新进行训练。
-
-
-
-
-
-**第五步:模型发布**
-
-当模型效果满意后,您可根据实际的生产环境需求,选择将模型发布为需要的版本。
-
-
-
-
-
-## FAQ
-
-1. **为什么训练速度这么慢?**
-
- PaddleX完全采用您本地的硬件进行计算,深度学习任务确实对算力要求较高,为了使您能快速体验应用PaddleX进行开发,我们适配了CPU硬件,但强烈建议您使用GPU以提升训练速度和开发体验。
-
-
-
-2. **我可以在服务器或云平台上部署PaddleX么?**
-
- PaddleX GUI是一个适配本地单机安装的客户端,无法在服务器上直接进行部署,您可以直接使用PaddleX API,或采用飞桨核心框架进行服务器上的部署。如果您希望使用公有算力,强烈建议您尝试飞桨产品系列中的 [EasyDL](https://ai.baidu.com/easydl/) 或 [AI Studio](https://aistudio.baidu.com/aistudio/index)进行开发。
-
-
-
-3. **PaddleX支持EasyData标注的数据吗?**
-
- 支持,PaddleX可顺畅读取EasyData标注的数据。但当前版本的PaddleX GUI暂时无法支持直接导入EasyData数据格式,您可以参照文档,将[数据集进行转换](https://paddlex.readthedocs.io/zh_CN/latest/appendix/how_to_convert_dataset.html)再导入PaddleX GUI进行后续开发。
- 同时,我们也在紧密开发PaddleX GUI可直接导入EasyData数据格式的功能。
-
-
-
-4. **为什么模型裁剪分析耗时这么长?**
-
- 模型裁剪分析过程是对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪。此过程需要重复多次直至FLOPS满足要求,最后再进行精调训练获得最终裁剪后的模型,因此耗时较长。有关模型裁剪的原理,可参见文档[剪裁原理介绍](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
-
-
-
-5. **如何调用后端代码?**
-
- PaddleX 团队为您整理了相关的API接口文档,方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html)
-
-
-
-**如果您有更多问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求**
-
-
diff --git a/docs/paddlex_gui/index.rst b/docs/paddlex_gui/index.rst
deleted file mode 100755
index 3fc80fc0afa4e87bf60f28140ebcce673e5ee283..0000000000000000000000000000000000000000
--- a/docs/paddlex_gui/index.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-PaddleX GUI使用文档
-=======================================
-
-PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以让开发者免去代码开发的步骤,通过点选式地操作就可以快速完成模型的训练开发。PaddleXGUI具有 **数据集可视化分析** 、**模型参数自动推荐** 、**跨平台使用** 三大特点。
-
-数据集可视化分析
- | PaddleX支持导入常见的图像分类、目标检测、实例分割和语义分割数据集,并对数据集的样本分布,标注结果进行可视化展示,数据集的情况一目了然!
-
-模型参数自动推荐
- | 根据用户的电脑配置和数据集情况,自动推荐模型训练参数,免去用户查看文档,被各种参数所烦的忧心事!
-
-跨平台使用
- | PaddleX GUI完全跨平台,支持Linux、Windows和Mac三大主流系统!
-
-
-.. toctree::
- :maxdepth: 2
- :caption: 文档目录:
-
- download.md
- how_to_use.md
- xx.md
-
-* PaddleX GUI版本: v1.0
-* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
-* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop
-* 官方QQ用户群: 1045148026
-* GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues
-
diff --git a/docs/paddlex_gui/xx.md b/docs/paddlex_gui/xx.md
deleted file mode 100644
index 97fd16977da33ba4f122f5c556527fdead2109e5..0000000000000000000000000000000000000000
--- a/docs/paddlex_gui/xx.md
+++ /dev/null
@@ -1 +0,0 @@
-# 其它
diff --git a/docs/quick_start.md b/docs/quick_start.md
index a24cdadf410abd738750a18b4f5e99b8265cb7d3..bd4a1960da89d599cf5b1d66a54ea7ba58709871 100644
--- a/docs/quick_start.md
+++ b/docs/quick_start.md
@@ -1,32 +1,43 @@
# 10分钟快速上手使用
-本文档在一个小数据集上展示了如何通过PaddleX进行训练,您可以阅读PaddleX的**使用教程**来了解更多模型任务的训练使用方式。本示例同步在AIStudio上,可直接[在线体验模型训练](https://aistudio.baidu.com/aistudio/projectdetail/439860)
+本文档在一个小数据集上展示了如何通过PaddleX进行训练。本示例同步在AIStudio上,可直接[在线体验模型训练](https://aistudio.baidu.com/aistudio/projectdetail/450220)。
+本示例代码源于Github [tutorials/train/classification/mobilenetv3_small_ssld.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/image_classification/mobilenetv3_small_ssld.py),用户可自行下载至本地运行。
-## 1. 安装PaddleX
+PaddleX中的所有模型训练跟随以下3个步骤,即可快速完成训练代码开发!
+
+| 步骤 | |说明 |
+| :--- | :--------------- | :-------------- |
+| 第1步| 定义transforms | 用于定义模型训练、验证、预测过程中,
输入图像的预处理和数据增强操作 |
+| 第2步| 定义datasets | 用于定义模型要加载的训练、验证数据集 |
+| 第3步| 定义模型开始训练 | 选择需要的模型,进行训练 |
+
+> **注意**:不同模型的transforms、datasets和训练参数都有较大差异,更多模型训练,可直接根据文档教程获取更多模型的训练代码。[模型训练教程](train/index.html)
+
+PaddleX的其它用法
+
+- 使用VisualDL查看训练过程中的指标变化
+- 加载训练保存的模型进行预测
+
+
+
+**1. 安装PaddleX**
> 安装相关过程和问题可以参考PaddleX的[安装文档](./install.md)。
```
pip install paddlex -i https://mirror.baidu.com/pypi/simple
```
-## 2. 准备蔬菜分类数据集
+
+**2. 准备蔬菜分类数据集**
```
wget https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz
tar xzvf vegetables_cls.tar.gz
```
-## 3. 训练代码开发
-PaddleX的所有模型训练和预测均只涉及到5个API接口,分别是
-> - [transforms](apis/transforms/index.html) 图像数据处理
-> - [datasets](apis/datasets/classification.md) 数据集加载
-> - [models](apis/models/classification.md) 模型类型定义
-> - [train](apis/models/classification.html#train) 开始训练
-> - [predict](apis/models/classification.html#predict) 模型预测
-
-在本示例,通过如下`train.py`代码进行训练, 训练环境为1张Tesla P40 GPU卡。
+
+**3. 定义训练/验证图像处理流程transforms**
-### 3.1 定义`transforms`数据处理流程
-由于训练时数据增强操作的加入,因此模型在训练和验证过程中,数据处理流程需要分别进行定义。如下所示,代码在`train_transforms`中加入了[RandomCrop](apis/transforms/cls_transforms.html#RandomCrop)和[RandomHorizontalFlip](apis/transforms/cls_transforms.html#RandomHorizontalFlip)两种数据增强方式, 更多方法可以参考[数据增强文档](apis/transforms/augment.md)。
+因为训练时加入了数据增强操作,因此在训练和验证过程中,模型的数据处理流程需要分别进行定义。如下所示,代码在`train_transforms`中加入了[RandomCrop](apis/transforms/cls_transforms.html#randomcrop)和[RandomHorizontalFlip](apis/transforms/cls_transforms.html#randomhorizontalflip)两种数据增强方式, 更多方法可以参考[数据增强文档](apis/transforms/augment.md)。
```
from paddlex.cls import transforms
train_transforms = transforms.Compose([
@@ -41,8 +52,13 @@ eval_transforms = transforms.Compose([
])
```
-### 3.2 定义`dataset`加载数据集
-定义数据集,`pdx.datasets.ImageNet`表示读取ImageNet格式的分类数据集, 更多数据集细节可以查阅[数据集格式说明](datasets.md)和[ImageNet接口文档](apis/datasets/classification.md)
+
+**4. 定义`dataset`加载图像分类数据集**
+
+定义数据集,`pdx.datasets.ImageNet`表示读取ImageNet格式的分类数据集
+- [paddlex.datasets.ImageNet接口说明](apis/datasets.md)
+- [ImageNet数据格式说明](data/format/classification.md)
+
```
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
@@ -57,54 +73,54 @@ eval_dataset = pdx.datasets.ImageNet(
transforms=eval_transforms)
```
-### 3.3 定义分类模型
+
+**5. 使用MobileNetV3_small_ssld模型开始训练**
+
本文档中使用百度基于蒸馏方法得到的MobileNetV3预训练模型,模型结构与MobileNetV3一致,但精度更高。PaddleX内置了20多种分类模型,查阅[PaddleX模型库](appendix/model_zoo.md)了解更多分类模型。
```
num_classes = len(train_dataset.labels)
model = pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes)
-```
-### 3.4 定义训练参数
-定义好模型后,即可直接调用`train`接口,定义训练时的参数,分类模型内置了`piecewise_decay`学习率衰减策略,相关参数见[分类train接口文档](apis/models/classification.html#train)。
-```
-model.train(num_epochs=10,
+model.train(num_epochs=20,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
lr_decay_epochs=[4, 6, 8],
- learning_rate=0.025,
- save_dir='output/mobilenetv2',
+ save_dir='output/mobilenetv3_small_ssld',
use_vdl=True)
```
-## 4. 模型开始训练
-`train.py`与解压后的数据集目录`vegetables_cls`放在同一目录下,在此目录下运行`train.py`即可开始训练。如果您的电脑上有GPU,这将会在10分钟内训练完成,如果为CPU也大概会在30分钟内训练完毕。
-```
-python train.py
-```
+
+**6. 训练过程使用VisualDL查看训练指标变化**
-## 5. 训练过程中查看训练指标
-模型在训练过程中,所有的迭代信息将以标注输出流的形式,输出到命令执行的终端上,用户也可通过visualdl以可视化的方式查看训练指标的变化,通过如下方式启动visualdl后,在浏览器打开https://0.0.0.0:8001 (或 https://localhost:8001)即可。
+训练过程中,模型在训练集和验证集上的指标均会以标准输出流形式输出到命令终端。当用户设定`use_vdl=True`时,也会使用VisualDL格式将指标打点到`save_dir`目录下的`vdl_log`文件夹,在终端运行如下命令启动visualdl并查看可视化的指标变化情况。
```
-visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+visualdl --logdir output/mobilenetv3_small_ssld --port 8001
```
-
+服务启动后,通过浏览器打开https://0.0.0.0:8001或https://localhost:8001即可。
+
+如果您使用的是AIStudio平台进行训练,不能通过此方式启动visualdl,请参考AIStudio VisualDL启动教程使用
+
+
+**7. 加载训练保存的模型预测**
-## 6. 训练完成使用模型进行测试
-如下代码使用训练过程中第8轮保存的模型进行测试。
+模型在训练过程中,会每间隔一定轮数保存一次模型,在验证集上评估效果最好的一轮会保存在`save_dir`目录下的`best_model`文件夹。通过如下方式可加载模型,进行预测。
+- [load_model接口说明](apis/load_model.md)
+- [分类模型predict接口说明](apis/models/classification.html#predict)
```
import paddlex as pdx
-model = pdx.load_model('output/mobilenetv2/epoch_8')
-result = model.predict('vegetables_cls/bocai/100.jpg', topk=3)
-print("Predict Result:", result)
+model = pdx.load_model('output/mobilenetv3_small_ssld/best_model')
+result = model.predict('vegetables_cls/bocai/100.jpg')
+print("Predict Result: ", result)
```
-> 预测结果输出如下,预测按score进行排序,得到前三分类结果
+预测结果输出如下,
```
-Predict Result: Predict Result: [{'score': 0.9999393, 'category': 'bocai', 'category_id': 0}, {'score': 6.010089e-05, 'category': 'hongxiancai', 'category_id': 2}, {'score': 5.593914e-07, 'category': 'xilanhua', 'category_id': 5}]
+Predict Result: Predict Result: [{'score': 0.9999393, 'category': 'bocai', 'category_id': 0}]
```
-## 其它推荐
-- 1.[目标检测模型训练](tutorials/train/detection.md)
-- 2.[语义分割模型训练](tutorials/train/segmentation.md)
-- 3.[实例分割模型训练](tutorials/train/instance_segmentation.md)
-- 3.[模型太大,想要更小的模型,试试模型裁剪吧!](tutorials/compress/classification.md)
+
+**更多使用教程**
+- 1.[目标检测模型训练](train/object_detection.md)
+- 2.[语义分割模型训练](train/semantic_segmentation.md)
+- 3.[实例分割模型训练](train/instance_segmentation.md)
+- 4.[模型太大,想要更小的模型,试试模型裁剪吧!](https://github.com/PaddlePaddle/PaddleX/tree/develop/tutorials/compress)
diff --git a/docs/slim/index.rst b/docs/slim/index.rst
deleted file mode 100644
index 48a16f6e08f3f80a7048d1666719b9b08e150362..0000000000000000000000000000000000000000
--- a/docs/slim/index.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-模型压缩
-============================
-
-.. toctree::
- :maxdepth: 2
-
- prune.md
- quant.md
diff --git a/docs/slim/quant.md b/docs/slim/quant.md
deleted file mode 100644
index 1686a9fb8d33e770d55a378ebdf76876058514fb..0000000000000000000000000000000000000000
--- a/docs/slim/quant.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# 模型量化
-
-## 原理介绍
-为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。
-
-
-## 使用PaddleX量化模型
-PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)。
-
-## 量化性能对比
-模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html)
diff --git a/docs/train/classification.md b/docs/train/classification.md
new file mode 100644
index 0000000000000000000000000000000000000000..76c947e8dda482d7c78d952ba2c593e61feadfd3
--- /dev/null
+++ b/docs/train/classification.md
@@ -0,0 +1,32 @@
+# 图像分类
+
+## 介绍
+
+PaddleX共提供了20+的图像分类模型,可满足开发者不同场景的需求下的使用。
+
+- **Top1精度**: 模型在ImageNet数据集上的测试精度
+- **预测速度**:单张图片的预测用时(不包括预处理和后处理)
+- "-"表示指标暂未更新
+
+| 模型(点击获取代码) | Top1精度 | 模型大小 | GPU预测速度 | Arm预测速度 | 备注 |
+| :---------------- | :------- | :------- | :--------- | :--------- | :----- |
+| [MobileNetV3_small_ssld](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/image_classification/mobilenetv3_small_ssld.py) | 71.3% | 21.0MB | 6.809ms | - | 模型小,预测速度快,适用于低性能或移动端设备 |
+| [MobileNetV2](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/image_classification/mobilenetv2.py) | 72.2% | 14.0MB | 4.546ms | - | 模型小,预测速度快,适用于低性能或移动端设备 |
+| [ShuffleNetV2](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/image_classification/shufflenetv2.py) | 68.8% | 9.0MB | 6.101ms | - | 模型体积小,预测速度快,适用于低性能或移动端设备 |
+| [ResNet50_vd_ssld](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/image_classification/resnet50_vd_ssld.py) | 82.4% | 102.8MB | 9.058ms | - | 模型精度高,适用于服务端部署 |
+
+
+## 开始训练
+
+将代码保存到本地后运行(代码下载链接位于上面的表格),**代码会自动下载训练数据并开始训练**。如保存为`mobilenetv3_small_ssld.py`,执行如下命令即可开始训练:
+
+```
+python mobilenetv3_small_ssld.py
+```
+
+
+## 相关文档
+
+- 【**重要**】针对自己的机器环境和数据,调整训练参数?先了解下PaddleX中训练参数作用。[——>>传送门](../appendix/parameters.md)
+- 【**有用**】没有机器资源?使用AIStudio免费的GPU资源在线训练模型。[——>>传送门](https://aistudio.baidu.com/aistudio/projectdetail/450925)
+- 【**拓展**】更多图像分类模型,查阅[PaddleX模型库](../appendix/model_zoo.md)和[API使用文档](../apis/models/index.html)。
diff --git a/docs/train/images/deeplab_predict.jpg b/docs/train/images/deeplab_predict.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8b4b6c00a35f2183f0de7ed7eed9e93e5fb60edb
Binary files /dev/null and b/docs/train/images/deeplab_predict.jpg differ
diff --git a/docs/train/images/mask_predict.jpg b/docs/train/images/mask_predict.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..02ffca20ab8dab17655f5bce94712d3a3c5f4703
Binary files /dev/null and b/docs/train/images/mask_predict.jpg differ
diff --git a/docs/train/images/test.jpg b/docs/train/images/test.jpg
new file mode 100755
index 0000000000000000000000000000000000000000..0c4f58e304176bae537f8c0a5b399db2443c7541
Binary files /dev/null and b/docs/train/images/test.jpg differ
diff --git a/docs/train/images/yolo_predict.jpg b/docs/train/images/yolo_predict.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ff7eb75358629d53bfc859344697044d6ebce0ca
Binary files /dev/null and b/docs/train/images/yolo_predict.jpg differ
diff --git a/docs/train/index.rst b/docs/train/index.rst
new file mode 100755
index 0000000000000000000000000000000000000000..54a8a1a7d39019a33a87d1c94ce04b76eb6fb8e8
--- /dev/null
+++ b/docs/train/index.rst
@@ -0,0 +1,15 @@
+模型训练
+=======================================
+
+PaddleX集成了PaddleClas、PaddleDetection和PaddleSeg三大CV工具套件中在工业领域应用成熟的模型,并提供了统一易用的API使用接口,帮助用户快速完成视觉领域的图像分类、目标检测、实例分割和语义分割模型的训练。
+
+
+.. toctree::
+ :maxdepth: 1
+ :caption: 文档目录:
+
+ classification.md
+ object_detection.md
+ instance_segmentation.md
+ semantic_segmentation.md
+ prediction.md
diff --git a/docs/train/instance_segmentation.md b/docs/train/instance_segmentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..de0f14eaea631e5b398b7fcc6669fcda96878907
--- /dev/null
+++ b/docs/train/instance_segmentation.md
@@ -0,0 +1,30 @@
+# 实例分割
+
+## 介绍
+
+PaddleX目前提供了MaskRCNN实例分割模型结构,多种backbone模型,可满足开发者不同场景和性能的需求。
+
+- **Box MMAP/Seg MMAP**: 模型在COCO数据集上的测试精度
+- **预测速度**:单张图片的预测用时(不包括预处理和后处理)
+- "-"表示指标暂未更新
+
+| 模型(点击获取代码) | Box MMAP/Seg MMAP | 模型大小 | GPU预测速度 | Arm预测速度 | 备注 |
+| :---------------- | :------- | :------- | :--------- | :--------- | :----- |
+| [MaskRCNN-ResNet50-FPN](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/instance_segmentation/mask_rcnn_r50_fpn.py) | 38.7%/34.7% | 177.7MB | 160.185ms | - | 模型精度高,适用于服务端部署 |
+| [MaskRCNN-ResNet18-FPN](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/instance_segmentation/mask_rcnn_r18_fpn.py) | 33.6%/30.5% | 189.1MB | - | - | 模型精度高,适用于服务端部署 |
+| [MaskRCNN-HRNet-FPN](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/instance_segmentation/mask_rcnn_hrnet_fpn.py) | 38.7%/34.7% | 120.7MB | - | - | 模型精度高,预测速度快,适用于服务端部署 |
+
+
+## 开始训练
+
+将代码保存到本地后运行(代码下载链接位于上面表格中),**代码会自动下载训练数据并开始训练**。如保存为`mask_rcnn_r50_fpn.py`,执行如下命令即可开始训练:
+
+```
+python mask_rcnn_r50_fpn.py
+```
+
+## 相关文档
+
+- 【**重要**】针对自己的机器环境和数据,调整训练参数?先了解下PaddleX中训练参数作用。[——>>传送门](../appendix/parameters.md)
+- 【**有用**】没有机器资源?使用AIStudio免费的GPU资源在线训练模型。[——>>传送门](https://aistudio.baidu.com/aistudio/projectdetail/450925)
+- 【**拓展**】更多实例分割模型,查阅[PaddleX模型库](../appendix/model_zoo.md)和[API使用文档](../apis/models/index.html)。
diff --git a/docs/train/object_detection.md b/docs/train/object_detection.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b7da69a865f07d73691f13045bfc7792df783c1
--- /dev/null
+++ b/docs/train/object_detection.md
@@ -0,0 +1,34 @@
+# 目标检测
+
+## 介绍
+
+PaddleX目前提供了FasterRCNN和YOLOv3两种检测结构,多种backbone模型,可满足开发者不同场景和性能的需求。
+
+- **Box MMAP**: 模型在COCO数据集上的测试精度
+- **预测速度**:单张图片的预测用时(不包括预处理和后处理)
+- "-"表示指标暂未更新
+
+| 模型(点击获取代码) | Box MMAP | 模型大小 | GPU预测速度 | Arm预测速度 | 备注 |
+| :---------------- | :------- | :------- | :--------- | :--------- | :----- |
+| [YOLOv3-MobileNetV1](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/object_detection/yolov3_mobilenetv1.py) | 29.3% | 99.2MB | 15.442ms | - | 模型小,预测速度快,适用于低性能或移动端设备 |
+| [YOLOv3-MobileNetV3](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/object_detection/yolov3_mobilenetv3.py) | 31.6% | 100.7MB | 143.322ms | - | 模型小,移动端上预测速度有优势 |
+| [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/object_detection/yolov3_darknet53.py) | 38.9% | 249.2MB | 42.672ms | - | 模型较大,预测速度快,适用于服务端 |
+| [FasterRCNN-ResNet50-FPN](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/object_detection/faster_rcnn_r50_fpn.py) | 37.2% | 167.7MB | 197.715ms | - | 模型精度高,适用于服务端部署 |
+| [FasterRCNN-ResNet18-FPN](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/object_detection/faster_rcnn_r18_fpn.py) | 32.6% | 173.2MB | - | - | 模型精度高,适用于服务端部署 |
+| [FasterRCNN-HRNet-FPN](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/object_detection/faster_rcnn_hrnet_fpn.py) | 36.0% | 115.MB | 81.592ms | - | 模型精度高,预测速度快,适用于服务端部署 |
+
+
+## 开始训练
+
+将代码保存到本地后运行(代码下载链接位于上面的表格),**代码会自动下载训练数据并开始训练**。如保存为`yolov3_mobilenetv1.py`,执行如下命令即可开始训练:
+
+```
+python yolov3_mobilenetv1.py
+```
+
+
+## 相关文档
+
+- 【**重要**】针对自己的机器环境和数据,调整训练参数?先了解下PaddleX中训练参数作用。[——>>传送门](../appendix/parameters.md)
+- 【**有用**】没有机器资源?使用AIStudio免费的GPU资源在线训练模型。[——>>传送门](https://aistudio.baidu.com/aistudio/projectdetail/450925)
+- 【**拓展**】更多目标检测模型,查阅[PaddleX模型库](../appendix/model_zoo.md)和[API使用文档](../apis/models/index.html)。
diff --git a/docs/train/prediction.md b/docs/train/prediction.md
new file mode 100644
index 0000000000000000000000000000000000000000..977a93d4a55b31f3a842bd37cb65e0bcafad9548
--- /dev/null
+++ b/docs/train/prediction.md
@@ -0,0 +1,107 @@
+# 加载模型预测
+
+PaddleX可以使用`paddlex.load_model`接口加载模型(包括训练过程中保存的模型,导出的部署模型,量化模型以及裁剪的模型)进行预测,同时PaddleX中也内置了一系列的可视化工具函数,帮助用户方便地检查模型的效果。
+
+**注意**:使用`paddlex.load_model`接口加载仅用于模型预测,如需要在此模型基础上继续训练,可以将该模型作为预训练模型进行训练,具体做法是在训练代码中,将train函数中的`pretrain_weights`参数指定为预训练模型路径。
+
+## 图像分类
+
+[点击下载](https://bj.bcebos.com/paddlex/models/mobilenetv3_small_ssld_imagenet.tar.gz)如下示例代码中的模型
+
+```
+import paddlex as pdx
+test_jpg = 'mobilenetv3_small_ssld_imagenet/test.jpg'
+model = pdx.load_model('mobilenetv3_small_ssld_imagenet')
+result = model.predict(test_jpg)
+print("Predict Result: ", result)
+```
+结果输出如下:
+```
+Predict Result: [{'category_id': 549, 'category': 'envelope', 'score': 0.29062933}]
+```
+
+测试图片如下:
+
+
+
+- 分类模型predict接口[说明文档](../apis/models/classification.html#predict)
+
+
+## 目标检测
+
+[点击下载](https://bj.bcebos.com/paddlex/models/yolov3_mobilenetv1_coco.tar.gz)如下示例代码中模型
+
+```
+import paddlex as pdx
+test_jpg = 'yolov3_mobilenetv1_coco/test.jpg'
+model = pdx.load_model('yolov3_mobilenetv1_coco')
+
+# predict接口并未过滤低置信度识别结果,用户根据需求按score值进行过滤
+result = model.predict(test_jpg)
+
+# 可视化结果存储在./visualized_test.jpg, 见下图
+pdx.det.visualize(test_jpg, result, threshold=0.3, save_dir='./')
+```
+- YOLOv3模型predict接口[说明文档](../apis/models/detection.html#predict)
+- 可视化pdx.det.visualize接口[说明文档](../apis/visualize.html#paddlex-det-visualize)
+> 注意:目标检测和实例分割模型在调用`predict`接口得到的结果需用户自行过滤低置信度结果,在`paddlex.det.visualize`接口中,我们提供了`threshold`用于过滤,置信度低于此值的结果将被过滤,不会可视化。
+
+
+## 实例分割
+
+[点击下载](https://bj.bcebos.com/paddlex/models/mask_r50_fpn_coco.tar.gz)如下示例代码中模型
+
+```
+import paddlex as pdx
+test_jpg = 'mask_r50_fpn_coco/test.jpg'
+model = pdx.load_model('mask_r50_fpn_coco')
+
+# predict接口并未过滤低置信度识别结果,用户根据需求按score值进行过滤
+result = model.predict(test_jpg)
+
+# 可视化结果存储在./visualized_test.jpg, 见下图
+pdx.det.visualize(test_jpg, result, threshold=0.5, save_dir='./')
+```
+- MaskRCNN模型predict接口[说明文档](../apis/models/instance_segmentation.html#predict)
+- 可视化pdx.det.visualize接口[说明文档](../apis/visualize.html#paddlex-det-visualize)
+
+**注意**:目标检测和实例分割模型在调用`predict`接口得到的结果需用户自行过滤低置信度结果,在`paddlex.det.visualize`接口中,我们提供了`threshold`用于过滤,置信度低于此值的结果将被过滤,不会可视化。
+
+
+## 语义分割
+
+[点击下载](https://bj.bcebos.com/paddlex/models/deeplabv3p_mobilenetv2_voc.tar.gz)如下示例代码中模型
+
+```
+import paddlex as pdx
+test_jpg = './deeplabv3p_mobilenetv2_voc/test.jpg'
+model = pdx.load_model('./deeplabv3p_mobilenetv2_voc')
+result = model.predict(test_jpg)
+# 可视化结果存储在./visualized_test.jpg,见下图右(左图为原图)
+pdx.seg.visualize(test_jpg, result, weight=0.0, save_dir='./')
+```
+
+在上述示例代码中,通过调用`paddlex.seg.visualize`可以对语义分割的预测结果进行可视化,可视化的结果保存在`save_dir`下,见下图。其中`weight`参数用于调整预测结果和原图结果融合展现时的权重,0.0时只展示预测结果mask的可视化,1.0时只展示原图可视化。
+
+
+
+## 公开数据集训练模型下载
+
+PaddleX提供了部分公开数据集上训练好的模型,用户可以直接下载后参照本文档加载使用。
+
+| 类型 | 模型(点击下载) | 数据集 | 大小 | 指标 | 指标数值 |
+|:--- | :---------- | :----------- | :---------- | :---------- | :------------- |
+| 图像分类 | [MobileNetV3_small_ssld](https://bj.bcebos.com/paddlex/models/mobilenetv3_small_ssld_imagenet.tar.gz) | ImageNet | 13MB | Accuracy | 71.3% |
+| 图像分类 | [ResNet50_vd_ssld](https://bj.bcebos.com/paddlex/models/resnet50_vd_ssld_imagenet.tar.gz) | ImageNet | 110MB | Accuracy | 82.4% |
+| 目标检测 | [FasterRCNN-ResNet50-FPN](https://bj.bcebos.com/paddlex/models/faster_r50_fpn_coco.tar.gz) | MSCOCO | 179MB | Box MAP | 37.7% |
+| 目标检测 | [YOLOv3-MobileNetV1](https://bj.bcebos.com/paddlex/models/yolov3_mobilenetv1_coco.tar.gz) | MSCOCO | 106MB | Box MAP | 29.3% |
+| 目标检测 | [YOLOv3-DarkNet53](https://bj.bcebos.com/paddlex/models/yolov3_darknet53_coco.tar.gz) | MSCOCO | 266MMB | Box MAP | 34.8% |
+| 目标检测 | [YOLOv3-MobileNetV3](https://bj.bcebos.com/paddlex/models/yolov3_mobilenetv3_coco.tar.gz) | MSCOCO | 101MB | Box MAP | 31.6% |
+| 实例分割 | [MaskRCNN-ResNet50-FPN](https://bj.bcebos.com/paddlex/models/mask_r50_fpn_coco.tar.gz) | MSCOCO | 193MB | Box MAP/Seg MAP | 38.7% / 34.7% |
+| 语义分割 | [DeepLabv3p-Xception65](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_server_params.tar) | 人像分割 | - | mIoU | - |
+| 语义分割 | [HRNet_w18_small](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_mobile_params.tar) | 人像分割 | - | mIou | - |
+
+PaddleX的`load_model`接口可以满足用户一般的模型调研需求,如果是追求更高性能的预测部署,可以参考如下文档
+
+- [服务端Python部署](../deploy/server/python.md)
+- [服务端C++部署](../deploy/server/cpp/index.html)
diff --git a/docs/train/semantic_segmentation.md b/docs/train/semantic_segmentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..391df0aca7b3103dc89068cc7a2603bcc86226b0
--- /dev/null
+++ b/docs/train/semantic_segmentation.md
@@ -0,0 +1,33 @@
+# 语义分割
+
+## 介绍
+
+PaddleX目前提供了DeepLabv3p、UNet、HRNet和FastSCNN四种语义分割结构,多种backbone模型,可满足开发者不同场景和性能的需求。
+
+- **mIOU**: 模型在CityScape数据集上的测试精度
+- **预测速度**:单张图片的预测用时(不包括预处理和后处理)
+- "-"表示指标暂未更新
+
+| 模型(点击获取代码) | mIOU | 模型大小 | GPU预测速度 | Arm预测速度 | 备注 |
+| :---------------- | :------- | :------- | :--------- | :--------- | :----- |
+| [DeepLabv3p-MobileNetV2-x0.25](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2_x0.25.py) | - | 2.9MB | - | - | 模型小,预测速度快,适用于低性能或移动端设备 |
+| [DeepLabv3p-MobileNetV2-x1.0](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2.py) | 69.8% | 11MB | - | - | 模型小,预测速度快,适用于低性能或移动端设备 |
+| [DeepLabv3p-Xception65](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/semantic_segmentation/deeplabv3p_xception65.pyy) | 79.3% | 158MB | - | - | 模型大,精度高,适用于服务端 |
+| [UNet](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/semantic_segmentation/unet.py) | - | 52MB | - | - | 模型较大,精度高,适用于服务端 |
+| [HRNet](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/semantic_segmentation/hrnet.py) | 79.4% | 37MB | - | - | 模型较小,模型精度高,适用于服务端部署 |
+| [FastSCNN](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/semantic_segmentation/fast_scnn.py) | - | 4.5MB | - | - | 模型小,预测速度快,适用于低性能或移动端设备 |
+
+
+## 开始训练
+
+将代码保存到本地后运行(代码下载链接位于上面的表格中),**代码会自动下载训练数据并开始训练**。如保存为`deeplabv3p_mobilenetv2_x0.25.py`,执行如下命令即可开始训练:
+```
+python deeplabv3p_mobilenetv2_x0.25.py
+```
+
+
+## 相关文档
+
+- 【**重要**】针对自己的机器环境和数据,调整训练参数?先了解下PaddleX中训练参数作用。[——>>传送门](../appendix/parameters.md)
+- 【**有用**】没有机器资源?使用AIStudio免费的GPU资源在线训练模型。[——>>传送门](https://aistudio.baidu.com/aistudio/projectdetail/450925)
+- 【**拓展**】更多语义分割模型,查阅[PaddleX模型库](../appendix/model_zoo.md)和[API使用文档](../apis/models/index.html)。
diff --git a/docs/tuning_strategy/detection/index.rst b/docs/tuning_strategy/detection/index.rst
deleted file mode 100644
index 5457adeeea053df4de9332bd4df61cd450830f96..0000000000000000000000000000000000000000
--- a/docs/tuning_strategy/detection/index.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-目标检测
-============================
-
-PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略,用户可根据需求及应用场景使用该策略对模型进行调优。
-
-.. toctree::
- :maxdepth: 1
-
- negatives_training.md
-
diff --git a/docs/tuning_strategy/detection/negatives_training.md b/docs/tuning_strategy/detection/negatives_training.md
deleted file mode 100644
index d3590e3222018faf90462935588a785b8fae4e7f..0000000000000000000000000000000000000000
--- a/docs/tuning_strategy/detection/negatives_training.md
+++ /dev/null
@@ -1,101 +0,0 @@
-# 通过负样本学习降低误检率
-
-## 应用场景
-
-在背景和目标相似的场景下,模型容易把背景误检成目标。为了降低误检率,可以通过负样本学习来降低误检率,即在训练过程中把无目标真值的图片加入训练。
-
-## 效果对比
-
-* 与基准模型相比,通过负样本学习后的模型**mmAP有3.6%的提升,mAP有0.1%的提升**。
-* 与基准模型相比,通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**。
-
-表1 违禁品验证集上**框级别精度**对比
-
-||mmAP(AP@IoU=0.5:0.95)| mAP (AP@IoU=0.5)|
-|:---|:---|:---|
-|基准模型 | 45.8% | 83% |
-|通过负样本学习后的模型 | 49.4% | 83.1% |
-
-表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比
-
-||违禁品图片级别的召回率| 无违禁品图片级别的误检率|
-|:---|:--------------------|:------------------------|
-|基准模型 | 98.97% | 55.27% |
-|通过负样本学习后的模型 | 97.75% | 5.59% |
-
-【名词解释】
-
- * 图片级别的召回率:只要在有目标的图片上检测出目标(不论框的个数),该图片被认为召回。批量有目标图片中被召回图片所占的比例,即为图片级别的召回率。
-
- * 图片级别的误检率:只要在无目标的图片上检测出目标(不论框的个数),该图片被认为误检。批量无目标图片中被误检图片所占的比例,即为图片级别的误检率。
-
-
-## 使用方法
-
-在定义训练所用的数据集之后,使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下:
-
-```
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-from paddlex.det import transforms
-import paddlex as pdx
-
-# 定义训练和验证时的transforms
-train_transforms = transforms.ComposedRCNNTransforms(
- mode='train', min_max_size=[600, 1000])
-eval_transforms = transforms.ComposedRCNNTransforms(
- mode='eval', min_max_size=[600, 1000])
-
-# 定义训练所用的数据集
-train_dataset = pdx.datasets.CocoDetection(
- data_dir='jinnan2_round1_train_20190305/restricted/',
- ann_file='jinnan2_round1_train_20190305/train.json',
- transforms=train_transforms,
- shuffle=True,
- num_workers=2)
-# 训练集中加入无目标背景图片
-train_dataset.add_negative_samples(
- 'jinnan2_round1_train_20190305/normal_train_back/')
-
-# 定义验证所用的数据集
-eval_dataset = pdx.datasets.CocoDetection(
- data_dir='jinnan2_round1_train_20190305/restricted/',
- ann_file='jinnan2_round1_train_20190305/val.json',
- transforms=eval_transforms,
- num_workers=2)
-
-# 初始化模型,并进行训练
-model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1)
-model.train(
- num_epochs=17,
- train_dataset=train_dataset,
- eval_dataset=eval_dataset,
- train_batch_size=8,
- learning_rate=0.01,
- lr_decay_epochs=[13, 16],
- save_dir='./output')
-```
-
-## 实验细则
-
-(1) 数据集
-
-我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂,很多背景物体与目标物体较为相似。
-
-* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。
-
-* 训练集有883张违禁品图片,验证集有98张违禁品图片。
-
-* 无违禁品的X光图片有2540张。
-
-(2) 基准模型
-
-使用FasterRCNN-ResNet50作为检测模型,除了水平翻转外没有使用其他的数据增强方式,只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1,mmAP有45.8%,mAP达到83%。
-
-(3) 通过负样本学习后的模型
-
-把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试,发现图片级别的误检率高达55.27%。为了降低该误检率,将基准模型在无违禁品训练集进行测试,挑选出被误检图片共663张,将这663张图片加入训练,训练参数配置与基准模型训练时一致。
-
-通过负样本学习后的模型在违禁品验证集上的精度见表1,mmAP有49.4%,mAP达到83.1%。与基准模型相比,**mmAP有3.6%的提升,mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%,与基准模型相比,**误检率降低了49.68%**。
-
-此外,还测试了两个模型在有违禁品验证集上图片级别的召回率,见表2,与基准模型相比,通过负样本学习后的模型仅漏检了1张图片,召回率几乎是无损的。
diff --git a/docs/tuning_strategy/index.rst b/docs/tuning_strategy/index.rst
deleted file mode 100644
index f9d5cd50f914609f864135dfba922f857f771dbf..0000000000000000000000000000000000000000
--- a/docs/tuning_strategy/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-PaddleX调优策略介绍
-============================
-
-.. toctree::
- :maxdepth: 2
-
- detection/index.rst
diff --git a/docs/tutorials/compress/classification.md b/docs/tutorials/compress/classification.md
deleted file mode 100755
index 515a4b3527c82b1a78f2ce598be961acf5926bd1..0000000000000000000000000000000000000000
--- a/docs/tutorials/compress/classification.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# 分类模型裁剪
-
----
-本文档训练代码可直接在PaddleX的Repo中下载,[代码tutorials/compress/classification](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/classification)
-本文档按如下方式对模型进行了裁剪
-> 第一步:在训练数据集上训练MobileNetV2
-> 第二步:在验证数据集上计算模型中各个参数的敏感度信息
-> 第三步:根据第二步计算的敏感度,设定`eval_metric_loss`,对模型裁剪后重新在训练数据集上训练
-
-## 步骤一 训练MobileNetV2
-> 模型训练使用文档可以直接参考[分类模型训练](../train/classification.md),本文档在该代码基础上添加了部分参数选项,用户可直接下载模型训练代码[tutorials/compress/classification/mobilenetv2.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/classification/mobilenetv2.py)
-> 使用如下命令开始模型训练
-```
-python mobilenetv2.py
-```
-
-## 步骤二 计算参数敏感度
-> 参数敏感度的计算可以直接使用PaddleX提供的API`paddlex.slim.cal_params_sensitivities`,使用代码如下, 敏感度信息文件会保存至`save_file`
-
-```
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-
-model_dir = './output/mobilenetv2/best_model'
-model = pdx.load_model(model_dir)
-
-# 定义验证所用的数据集
-eval_dataset = pdx.datasets.ImageNet(
- data_dir=dataset,
- file_list=os.path.join(dataset, 'val_list.txt'),
- label_list=os.path.join(dataset, 'labels.txt'),
- transforms=model.eval_transforms)
-
-pdx.slim.cal_params_sensitivities(model,
- save_file,
- eval_dataset,
- batch_size=8)
-```
-> 本步骤代码已整理至[tutorials/compress/classification/cal_sensitivities_file.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/classification/cal_sensitivities_file.py),用户可直接下载使用
-> 使用如下命令开始计算敏感度
-```
-python cal_sensitivities_file.py --model_dir output/mobilenetv2/best_model --dataset vegetables_cls --save_file sensitivities.data
-```
-
-## 步骤三 开始裁剪训练
-> 本步骤代码与步骤一使用同一份代码文件,使用如下命令开始裁剪训练
-```
-python mobilenetv2.py --model_dir output/mobilenetv2/best_model --sensitivities_file sensitivities.data --eval_metric_loss 0.10
-```
-
-## 实验效果
-本教程的实验效果可以查阅[模型压缩文档](../../slim/prune.md)
diff --git a/docs/tutorials/compress/detection.md b/docs/tutorials/compress/detection.md
deleted file mode 100755
index 514e19b5e9351b9c3a6debc059b7e521c2c40ba2..0000000000000000000000000000000000000000
--- a/docs/tutorials/compress/detection.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# 检测模型裁剪
-
----
-本文档训练代码可直接在PaddleX的Repo中下载,[代码tutorials/compress/detection](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/detection)
-本文档按如下方式对模型进行了裁剪
-> 第一步:在训练数据集上训练YOLOv3
-> 第二步:在验证数据集上计算模型中各个参数的敏感度信息
-> 第三步:根据第二步计算的敏感度,设定`eval_metric_loss`,对模型裁剪后重新在训练数据集上训练
-
-## 步骤一 训练YOLOv3
-> 模型训练使用文档可以直接参考[检测模型训练](../train/detection.md),本文档在该代码基础上添加了部分参数选项,用户可直接下载模型训练代码[tutorials/compress/detection/yolov3_mobilnet.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/detection/yolov3_mobilenet.py)
-> 使用如下命令开始模型训练
-```
-python yolov3_mobilenet.py
-```
-
-## 步骤二 计算参数敏感度
-> 参数敏感度的计算可以直接使用PaddleX提供的API`paddlex.slim.cal_params_sensitivities`,使用代码如下, 敏感度信息文件会保存至`save_file`
-
-```
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-
-model = pdx.load_model(model_dir)
-
-# 定义验证所用的数据集
-eval_dataset = pdx.datasets.ImageNet(
- data_dir=dataset,
- file_list=os.path.join(dataset, 'val_list.txt'),
- label_list=os.path.join(dataset, 'labels.txt'),
- transforms=model.eval_transforms)
-
-pdx.slim.cal_params_sensitivities(model,
- save_file,
- eval_dataset,
- batch_size=8)
-```
-> 本步骤代码已整理至[tutorials/compress/detection/cal_sensitivities_file.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/detection/cal_sensitivities_file.py),用户可直接下载使用
-> 使用如下命令开始计算敏感度
-```
-python cal_sensitivities_file.py --model_dir output/yolov3_mobile/best_model --dataset insect_det --save_file sensitivities.data
-```
-
-## 步骤三 开始裁剪训练
-> 本步骤代码与步骤一使用同一份代码文件,使用如下命令开始裁剪训练
-```
-python yolov3_mobilenet.py --model_dir output/yolov3_mobile/best_model --sensitivities_file sensitivities.data --eval_metric_loss 0.10
-```
-
-## 实验效果
-本教程的实验效果可以查阅[模型压缩文档](../../slim/prune.md)
diff --git a/docs/tutorials/compress/index.rst b/docs/tutorials/compress/index.rst
deleted file mode 100755
index 3e0dcd752cdf09b93b0beda01b33b77a060c0711..0000000000000000000000000000000000000000
--- a/docs/tutorials/compress/index.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-模型压缩
-=========================
-
-.. toctree::
- :maxdepth: 1
-
- classification.md
- detection.md
- segmentation.md
-
diff --git a/docs/tutorials/compress/segmentation.md b/docs/tutorials/compress/segmentation.md
deleted file mode 100755
index c19a14bb5eceace97454024e8212b7a7e675436e..0000000000000000000000000000000000000000
--- a/docs/tutorials/compress/segmentation.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# 分割模型裁剪
-
----
-本文档训练代码可直接在PaddleX的Repo中下载,[代码tutorials/compress/segmentation](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/segmentation)
-本文档按如下方式对模型进行了裁剪
-> 第一步:在训练数据集上训练UNet
-> 第二步:在验证数据集上计算模型中各个参数的敏感度信息
-> 第三步:根据第二步计算的敏感度,设定`eval_metric_loss`,对模型裁剪后重新在训练数据集上训练
-
-## 步骤一 训练UNet
-> 模型训练使用文档可以直接参考[检测模型训练](../train/segmentation.md),本文档在该代码基础上添加了部分参数选项,用户可直接下载模型训练代码[tutorials/compress/segmentation/unet.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/segmentation/unet.py)
-> 使用如下命令开始模型训练
-```
-python unet.py
-```
-
-## 步骤二 计算参数敏感度
-> 参数敏感度的计算可以直接使用PaddleX提供的API`paddlex.slim.cal_params_sensitivities`,使用代码如下, 敏感度信息文件会保存至`save_file`
-
-```
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-
-model = pdx.load_model(model_dir)
-
-# 定义验证所用的数据集
-eval_dataset = pdx.datasets.ImageNet(
- data_dir=dataset,
- file_list=os.path.join(dataset, 'val_list.txt'),
- label_list=os.path.join(dataset, 'labels.txt'),
- transforms=model.eval_transforms)
-
-pdx.slim.cal_params_sensitivities(model,
- save_file,
- eval_dataset,
- batch_size=8)
-```
-> 本步骤代码已整理至[tutorials/compress/detection/cal_sensitivities_file.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/compress/segmentation/cal_sensitivities_file.py),用户可直接下载使用
-> 使用如下命令开始计算敏感度
-```
-python cal_sensitivities_file.py --model_dir output/unet/best_model --dataset optic_disc_seg --save_file sensitivities.data
-```
-
-## 步骤三 开始裁剪训练
-> 本步骤代码与步骤一使用同一份代码文件,使用如下命令开始裁剪训练
-```
-python unet.py --model_dir output/unet/best_model --sensitivities_file sensitivities.data --eval_metric_loss 0.10
-```
-
-## 实验效果
-本教程的实验效果可以查阅[模型压缩文档](../../slim/prune.md)
diff --git a/docs/tutorials/compress/slim/index.rst b/docs/tutorials/compress/slim/index.rst
deleted file mode 100755
index 48a16f6e08f3f80a7048d1666719b9b08e150362..0000000000000000000000000000000000000000
--- a/docs/tutorials/compress/slim/index.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-模型压缩
-============================
-
-.. toctree::
- :maxdepth: 2
-
- prune.md
- quant.md
diff --git a/docs/tutorials/compress/slim/prune.md b/docs/tutorials/compress/slim/prune.md
deleted file mode 100755
index c1ff51e5e08c2ce8da5e2042d0a1c359a9e64dff..0000000000000000000000000000000000000000
--- a/docs/tutorials/compress/slim/prune.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# 模型裁剪
-
-## 原理介绍
-
-模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)。**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**。
-
-## 裁剪方法
-PaddleX提供了两种方式:
-
-**1.用户自行计算裁剪配置(推荐),整体流程包含三个步骤,**
-> **第一步**: 使用数据集训练原始模型
-> **第二步**:利用第一步训练好的模型,在验证数据集上计算模型中各个参数的敏感度,并将敏感度信息存储至本地文件
-> **第三步**:使用数据集训练裁剪模型(与第一步差异在于需要在`train`接口中,将第二步计算得到的敏感信息文件传给接口的`sensitivities_file`参数)
-
-> 在如上三个步骤中,**相当于模型共需要训练两遍**,分别对应第一步和第三步,但其中第三步训练的是裁剪后的模型,因此训练速度较第一步会更快。
-> 第二步会遍历模型中的部分裁剪参数,分别计算各个参数裁剪后对于模型在验证集上效果的影响,**因此会反复在验证集上评估多次**。
-
-**2.使用PaddleX内置的裁剪方案**
-> PaddleX内置的模型裁剪方案是**基于标准数据集**上计算得到的参数敏感度信息,由于不同数据集特征分布会有较大差异,所以该方案相较于第1种方案训练得到的模型**精度一般而言会更低**(**且用户自定义数据集与标准数据集特征分布差异越大,导致训练的模型精度会越低**),仅在用户想节省时间的前提下可以参考使用,使用方式只需一步,
-
-> **一步**: 使用数据集训练裁剪模型,在训练调用`train`接口时,将接口中的`sensitivities_file`参数设置为'DEFAULT'字符串
-
-> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集
-
-## 裁剪实验
-基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示,
-
-### 图像分类
-实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md)
-
-| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 |
-| :-----| :--------| :-------- | :---------- |:---------- |:----------|
-|MobileNetV2 | 无裁剪(原模型)| 13.0M | 97.50|6.47ms |47.44ms |
-|MobileNetV2 | 方案一(eval_metric_loss=0.10) | 2.1M | 99.58 |5.03ms |20.22ms |
-|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms |
-
-### 目标检测
-实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md)
-
-
-| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 |
-| :-----| :--------| :-------- | :---------- |:---------- | :---------|
-|YOLOv3-MobileNetV1 | 无裁剪(原模型)| 139M | 67.57| 14.88ms |976.42ms |
-|YOLOv3-MobileNetV1 | 方案一(eval_metric_loss=0.10) | 34M | 75.49 |10.60ms |558.49ms |
-|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms |
-
-### 语义分割
-实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md)
-
-| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 |
-| :-----| :--------| :-------- | :---------- |:---------- | :---------|
-|UNet | 无裁剪(原模型)| 77M | 91.22 |33.28ms |9523.55ms |
-|UNet | 方案一(eval_metric_loss=0.10) |26M | 90.37 |21.04ms |3936.20ms |
-|UNet | 方案二(eval_metric_loss=0.10) |23M | 91.21 |18.61ms |3447.75ms |
diff --git a/docs/tutorials/compress/slim/quant.md b/docs/tutorials/compress/slim/quant.md
deleted file mode 100755
index 1686a9fb8d33e770d55a378ebdf76876058514fb..0000000000000000000000000000000000000000
--- a/docs/tutorials/compress/slim/quant.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# 模型量化
-
-## 原理介绍
-为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。
-
-
-## 使用PaddleX量化模型
-PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)。
-
-## 量化性能对比
-模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html)
diff --git a/docs/tutorials/dataset_prepare.md b/docs/tutorials/dataset_prepare.md
deleted file mode 100644
index 95d1094c216857d4dc708cf39be74dca98d78f59..0000000000000000000000000000000000000000
--- a/docs/tutorials/dataset_prepare.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# 数据准备
-
-该部分内容已迁移至[附录](../appendix/datasets.md)
diff --git a/docs/tutorials/datasets.md b/docs/tutorials/datasets.md
deleted file mode 100755
index 8264d06a91ba1125036d4ab44f1fc06fe11d3049..0000000000000000000000000000000000000000
--- a/docs/tutorials/datasets.md
+++ /dev/null
@@ -1,366 +0,0 @@
-# 数据集格式说明
-
----
-## 图像分类ImageNet
-
-图像分类ImageNet数据集包含对应多个标签的图像文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--labelA # 标签为labelA的图像目录
-| |--a1.jpg
-| |--...
-| └--...
-|
-|--...
-|
-|--labelZ # 标签为labelZ的图像目录
-| |--z1.jpg
-| |--...
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为图像文件对应的标签id(从0开始)。如下所示:
-```
-labelA/a1.jpg 0
-labelZ/z1.jpg 25
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-labelA
-labelB
-...
-```
-[点击这里](https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz),下载蔬菜分类分类数据集。
-在PaddleX中,使用`paddlex.cv.datasets.ImageNet`([API说明](./apis/datasets.html#imagenet))加载分类数据集。
-
-## 目标检测VOC
-目标检测VOC数据集包含图像文件夹、标注信息文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--JPEGImages # 图像目录
-| |--xxx1.jpg
-| |--...
-| └--...
-|
-|--Annotations # 标注信息目录
-| |--xxx1.xml
-| |--...
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注文件相对于dataset的相对路径。如下所示:
-```
-JPEGImages/xxx1.jpg Annotations/xxx1.xml
-JPEGImages/xxx2.jpg Annotations/xxx2.xml
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-labelA
-labelB
-...
-```
-[点击这里](https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz),下载昆虫检测数据集。
-在PaddleX中,使用`paddlex.cv.datasets.VOCDetection`([API说明](./apis/datasets.html#vocdetection))加载目标检测VOC数据集。
-
-## 目标检测和实例分割COCO
-目标检测和实例分割COCO数据集包含图像文件夹及图像标注信息文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--JPEGImages # 图像目录
-| |--xxx1.jpg
-| |--...
-| └--...
-|
-|--train.json # 训练相关信息文件
-|
-└--val.json # 验证相关信息文件
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train.json`和`val.json`存储与标注信息、图像文件相关的信息。如下所示:
-
-```
-{
- "annotations": [
- {
- "iscrowd": 0,
- "category_id": 1,
- "id": 1,
- "area": 33672.0,
- "image_id": 1,
- "bbox": [232, 32, 138, 244],
- "segmentation": [[32, 168, 365, 117, ...]]
- },
- ...
- ],
- "images": [
- {
- "file_name": "xxx1.jpg",
- "height": 512,
- "id": 267,
- "width": 612
- },
- ...
- ]
- "categories": [
- {
- "name": "labelA",
- "id": 1,
- "supercategory": "component"
- }
- ]
-}
-```
-其中,每个字段的含义如下所示:
-
-| 域名 | 字段名 | 含义 | 数据类型 | 备注 |
-|:-----|:--------|:------------|------|:-----|
-| annotations | id | 标注信息id | int | 从1开始 |
-| annotations | iscrowd | 标注框是否为一组对象 | int | 只有0、1两种取值 |
-| annotations | category_id | 标注框类别id | int | |
-| annotations | area | 标注框的面积 | float | |
-| annotations | image_id | 当前标注信息所在图像的id | int | |
-| annotations | bbox | 标注框坐标 | list | 长度为4,分别代表x,y,w,h |
-| annotations | segmentation | 标注区域坐标 | list | list中有至少1个list,每个list由每个小区域坐标点的横纵坐标(x,y)组成 |
-| images | id | 图像id | int | 从1开始 |
-| images | file_name | 图像文件名 | str | |
-| images | height | 图像高度 | int | |
-| images | width | 图像宽度 | int | |
-| categories | id | 类别id | int | 从1开始 |
-| categories | name | 类别标签名 | str | |
-| categories | supercategory | 类别父类的标签名 | str | |
-
-
-[点击这里](https://bj.bcebos.com/paddlex/datasets/garbage_ins_det.tar.gz),下载垃圾实例分割数据集。
-在PaddleX中,使用`paddlex.cv.datasets.COCODetection`([API说明](./apis/datasets.html#cocodetection))加载COCO格式数据集。
-
-## 语义分割数据
-语义分割数据集包含原图、标注图及相应的文件列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--images # 原图目录
-| |--xxx1.png
-| |--...
-| └--...
-|
-|--annotations # 标注图目录
-| |--xxx1.png
-| |--...
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表
-
-```
-其中,相应的文件名可根据需要自行定义。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注图像文件相对于dataset的相对路径。如下所示:
-```
-images/xxx1.png annotations/xxx1.png
-images/xxx2.png annotations/xxx2.png
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-background
-labelA
-labelB
-...
-```
-
-标注图像为单通道图像,像素值即为对应的类别,像素标注类别需要从0开始递增(一般第一个类别为`background`),
-例如0,1,2,3表示有4种类别,标注类别最多为256类。其中可以指定特定的像素值用于表示该值的像素不参与训练和评估(默认为255)。
-
-[点击这里](https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz),下载视盘语义分割数据集。
-在PaddleX中,使用`paddlex.cv.datasets.SegReader`([API说明](./apis/datasets.html#segreader))加载语义分割数据集。
-
-
-## 图像分类EasyDataCls
-
-图像分类EasyDataCls数据集包含存放图像和json文件的文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录
-|--easydata # 存放图像和json文件的文件夹
-| |--0001.jpg
-| |--0001.json
-| |--0002.jpg
-| |--0002.json
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,图像文件名应与json文件名一一对应。
-
-每个json文件存储于`labels`相关的信息。如下所示:
-```
-{"labels": [{"name": "labelA"}]}
-```
-其中,`name`字段代表对应图像的类别。
-
-`train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为json文件相对于dataset的相对路径。如下所示:
-```
-easydata/0001.jpg easydata/0001.json
-easydata/0002.jpg easydata/0002.json
-...
-```
-
-`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示:
-```
-labelA
-labelB
-...
-```
-[点击这里](https://ai.baidu.com/easydata/),可以标注图像分类EasyDataCls数据集。
-在PaddleX中,使用`paddlex.cv.datasets.EasyDataCls`([API说明](./apis/datasets.html#easydatacls))加载分类数据集。
-
-
-## 目标检测和实例分割EasyDataDet
-
-目标检测和实例分割EasyDataDet数据集包含存放图像和json文件的文件夹、标签文件及图像列表文件。
-参考数据文件结构如下:
-```
-./dataset/ # 数据集根目录ß
-|--easydata # 存放图像和json文件的文件夹
-| |--0001.jpg
-| |--0001.json
-| |--0002.jpg
-| |--0002.json
-| └--...
-|
-|--train_list.txt # 训练文件列表文件
-|
-|--val_list.txt # 验证文件列表文件
-|
-└--labels.txt # 标签列表文件
-
-```
-其中,图像文件名应与json文件名一一对应。
-
-每个json文件存储于`labels`相关的信息。如下所示:
-```
-"labels": [{"y1": 18, "x2": 883, "x1": 371, "y2": 404, "name": "labelA",
- "mask": "kVfc0`0Zg0 PaddleX --> Inference Model --> PaddleLite Opt --> PaddleLite Inference
-
-以下介绍如何将PaddleX导出为inference model,然后使用PaddleLite的OPT模块对模型进行优化:
-
-step 1: 安装PaddleLite
-
-```
-pip install paddlelite
-```
-
-step 2: 将PaddleX模型导出为inference模型
-
-参考[导出inference模型](deploy_server/deploy_python.html#inference)将模型导出为inference格式模型。
-**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](./upgrade_version.md)对模型版本进行升级。**
-
-step 3: 将inference模型转换成PaddleLite模型
-
-```
-python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/lite_model --place place/to/run
-
-```
-
-| 参数 | 说明 |
-| ---- | ---- |
-| model_dir | 预测模型所在路径,包含"__model__", "__params__"文件 |
-| save_file | 模型输出的名称,默认为"paddlex.nb" |
-| place | 运行的平台,可选:arm|opencl|x86|npu|xpu|rknpu|apu |
-
-
-step 4: 预测
-
-Lite模型预测正在集成中,即将开源...
diff --git a/docs/tutorials/deploy/deploy_openvino.md b/docs/tutorials/deploy/deploy_openvino.md
deleted file mode 100644
index 7011ca2cd78a366cb5aaebbe8e95fc18101a6e4b..0000000000000000000000000000000000000000
--- a/docs/tutorials/deploy/deploy_openvino.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# OpenVINO部署
-
-## 方案简介
-OpenVINO部署方案位于目录`PaddleX/deploy/openvino/`下,且独立于PaddleX其他模块,该方案目前支持在 **Linux** 完成编译和部署运行。
-
-PaddleX到OpenVINO的部署流程如下:
-
-> PaddleX --> ONNX --> OpenVINO IR --> OpenVINO Inference Engine
-
-|目前支持OpenVINO部署的PaddleX模型|
-|-----|
-|ResNet18|
-|ResNet34|
-|ResNet50|
-|ResNet101|
-|ResNet50_vd|
-|ResNet101_vd|
-|ResNet50_vd_ssld|
-|ResNet101_vd_ssld|
-|DarkNet53|
-|MobileNetV1|
-|MobileNetV2|
-|DenseNet121|
-|DenseNet161|
-|DenseNet201|
-
-## 部署流程
-
-### 说明
-本文档在 `Ubuntu`使用`GCC 4.8.5` 进行了验证,如果需要使用更多G++版本和平台的OpenVino编译,请参考: [OpenVINO](https://github.com/openvinotoolkit/openvino/blob/2020/build-instruction.md)。
-
-
-### 验证环境
-* Ubuntu* 16.04 (64-bit) with GCC* 4.8.5
-* CMake 3.12
-* Python 2.7 or higher
-
-请确保系统已经安装好上述基本软件,**下面所有示例以工作目录 `/root/projects/`演示**。
-
-```
- git clone https://github.com/PaddlePaddle/PaddleX.git
-```
-
-**说明**:其中`C++`预测代码在`/root/projects/PaddleX/deploy/openvino` 目录,该目录不依赖任何`PaddleX`下其他目录。
-
-### Step1: 软件依赖
-
-- openvino:
-[编译文档](https://github.com/openvinotoolkit/openvino/blob/2020/build-instruction.md#build-steps)
-
-- gflags:
-[编译文档](https://gflags.github.io/gflags/#download)
-
-- opencv:
-[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html)
-说明:/root/projects/PaddleX/deploy/openvino/scripts/bootstrap.sh提供了预编译版本下载,也可自行编译。
-
-- ngraph:
-说明:openvino编译的过程中会生成ngraph的lib文件,位于{openvino根目录}/bin/intel64/Release/lib/下。
-
-### Step2: 编译
-
-
-编译`cmake`的命令在`scripts/build.sh`中,请根据Step1中编译软件的实际情况修改主要参数,其主要内容说明如下:
-```
-# openvino预编译库的路径
-OPENVINO_DIR=/path/to/inference_engine/
-# gflags预编译库的路径
-GFLAGS_DIR=/path/to/gflags
-# ngraph lib的路径,编译openvino时通常会生成
-NGRAPH_LIB=/path/to/ngraph/lib/
-# opencv预编译库的路径, 如果使用自带预编译版本可不修改
-OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
-# 下载自带预编译版本
-sh $(pwd)/scripts/bootstrap.sh
-rm -rf build
-mkdir -p build
-cd build
-cmake .. \
- -DOPENCV_DIR=${OPENCV_DIR} \
- -DGFLAGS_DIR=${GFLAGS_DIR} \
- -DOPENVINO_DIR=${OPENVINO_DIR} \
- -DNGRAPH_LIB=${NGRAPH_LIB}
-make
-```
-
-修改脚本设置好主要参数后,执行`build`脚本:
- ```shell
- sh ./scripts/build.sh
- ```
-
-### Step3: 模型转换
-
-将PaddleX模型转换成ONNX模型:
-
-```
-paddlex --export_onnx --model_dir=/path/to/xiaoduxiong_epoch_12 --save_dir=/path/to/onnx_model
-```
-
-将生成的onnx模型转换为OpenVINO支持的格式,请参考:[Model Optimizer文档](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
-
-### Step4: 预测
-
-编译成功后,分类任务的预测可执行程序为`classifier`,其主要命令参数说明如下:
-
-| 参数 | 说明 |
-| ---- | ---- |
-| --model_dir | Model Optimizer生成的.xml文件路径,请保证Model Optimizer生成的三个文件在同一路径下|
-| --image | 要预测的图片文件路径 |
-| --image_list | 按行存储图片路径的.txt文件 |
-| --device | 运行的平台, 默认值为"CPU" |
-
-#### 样例
-
-`样例一`:
-
-测试图片 `/path/to/xiaoduxiong.jpeg`
-
-```shell
-./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/xiaoduxiong.jpeg
-```
-
-
-`样例二`:
-
-预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
-```
-/path/to/images/xiaoduxiong1.jpeg
-/path/to/images/xiaoduxiong2.jpeg
-...
-/path/to/images/xiaoduxiongn.jpeg
-```
-
-```shell
-./build/classifier --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt
-```
diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/index.rst b/docs/tutorials/deploy/deploy_server/deploy_cpp/index.rst
deleted file mode 100644
index 36a066b6eb269a00be34f8401e1083b9f61a91bd..0000000000000000000000000000000000000000
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/index.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-C++部署
-==============
-
-
-C++部署方案位于目录PaddleX/deploy/cpp/下,且独立于PaddleX其他模块。该方案支持在 Windows 和 Linux 完成编译、二次开发集成和部署运行,支持在Linux上完成加密部署。
-
-.. toctree::
- :maxdepth: 1
-
- deploy_cpp_win_vs2019.md
- deploy_cpp_linux.md
diff --git a/docs/tutorials/deploy/deploy_server/deploy_python.md b/docs/tutorials/deploy/deploy_server/deploy_python.md
deleted file mode 100644
index 321d48077fd0478234e8ce6386c7355c36d1c63c..0000000000000000000000000000000000000000
--- a/docs/tutorials/deploy/deploy_server/deploy_python.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# Python部署
-PaddleX已经集成了基于Python的高性能预测接口,在安装PaddleX后,可参照如下代码示例,进行预测。相关的接口文档可参考[paddlex.deploy](../../../apis/deploy.md)
-
-## 导出inference模型
-
-在服务端部署的模型需要首先将模型导出为inference格式模型,导出的模型将包括`__model__`、`__params__`和`model.yml`三个文名,分别为模型的网络结构,模型权重和模型的配置文件(包括数据预处理参数等等)。在安装完PaddleX后,在命令行终端使用如下命令导出模型到当前目录`inferece_model`下。
-> 可直接下载小度熊分拣模型测试本文档的流程[xiaoduxiong_epoch_12.tar.gz](https://bj.bcebos.com/paddlex/models/xiaoduxiong_epoch_12.tar.gz)
-
-```
-paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./inference_model
-```
-
-使用TensorRT预测时,需指定模型的图像输入shape:[w,h]。
-**注**:
-- 分类模型请保持于训练时输入的shape一致。
-- 指定[w,h]时,w和h中间逗号隔开,不允许存在空格等其他字符
-
-```
-paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./inference_model --fixed_input_shape=[640,960]
-```
-
-## 预测部署
-**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。**
-
-> 点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz)
-
-```
-import paddlex as pdx
-predictor = pdx.deploy.Predictor('./inference_model')
-result = predictor.predict(image='xiaoduxiong_test_image/JPEGImages/WeChatIMG110.jpeg')
-```
-
-## 预测性能对比
-### 测试环境
-
-- CUDA 9.0
-- CUDNN 7.5
-- PaddlePaddle 1.71
-- GPU: Tesla P40
-- AnalysisPredictor 指采用Python的高性能预测方式
-- Executor 指采用paddlepaddle普通的python预测方式
-- Batch Size均为1,耗时单位为ms/image,只计算模型运行时间,不包括数据的预处理和后处理
-
-### 性能对比
-
-
-| 模型 | AnalysisPredictor耗时 | Executor耗时 | 输入图像大小 |
-| :---- | :--------------------- | :------------ | :------------ |
-| resnet50 | 4.84 | 7.57 | 224*224 |
-| mobilenet_v2 | 3.27 | 5.76 | 224*224 |
-| unet | 22.51 | 34.60 |513*513 |
-| deeplab_mobile | 63.44 | 358.31 |1025*2049 |
-| yolo_mobilenetv2 | 15.20 | 19.54 | 608*608 |
-| faster_rcnn_r50_fpn_1x | 50.05 | 69.58 |800*1088 |
-| faster_rcnn_r50_1x | 326.11 | 347.22 | 800*1067 |
-| mask_rcnn_r50_fpn_1x | 67.49 | 91.02 | 800*1088 |
-| mask_rcnn_r50_1x | 326.11 | 350.94 | 800*1067 |
diff --git a/docs/tutorials/deploy/deploy_server/index.rst b/docs/tutorials/deploy/deploy_server/index.rst
deleted file mode 100644
index e9635c0532bc56f3073a78f615c21019a699c68f..0000000000000000000000000000000000000000
--- a/docs/tutorials/deploy/deploy_server/index.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-服务端部署
-==============
-
-.. toctree::
- :maxdepth: 2
-
- deploy_python.md
- deploy_cpp/index.rst
- encryption.md
diff --git a/docs/tutorials/deploy/index.rst b/docs/tutorials/deploy/index.rst
deleted file mode 100644
index ebeb36ffb9227ff610f1f82ab9eccb16b82a89f8..0000000000000000000000000000000000000000
--- a/docs/tutorials/deploy/index.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-多端安全部署
-==============
-
-本文档指引用户如何采用更高性能地方式来部署使用PaddleX训练的模型。本文档模型部署采用Paddle Inference高性能部署方式,在模型运算过程中,对模型计算图进行优化,同时减少内存操作,具体各模型性能对比见服务端Python部署的预测性能对比章节。
-
-同时结合产业实践开发者对模型知识产权的保护需求,提供了轻量级模型加密部署的方案,提升深度学习模型部署的安全性。
-
-.. toctree::
- :maxdepth: 2
-
- deploy_server/index.rst
- deploy_openvino.md
- deploy_lite.md
diff --git a/docs/tutorials/index.rst b/docs/tutorials/index.rst
deleted file mode 100755
index b05bfae7f517a9ec64d82a590dc2bb5ff2404dfb..0000000000000000000000000000000000000000
--- a/docs/tutorials/index.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-PaddleX全流程开发教程
-=========================
-
-.. toctree::
- :maxdepth: 1
-
- dataset_prepare.md
- train/index.rst
- compress/index.rst
- deploy/index.rst
diff --git a/docs/tutorials/train/classification.md b/docs/tutorials/train/classification.md
deleted file mode 100755
index 0fe98608ca8fa31a663519f8ac8c64dd2225e61b..0000000000000000000000000000000000000000
--- a/docs/tutorials/train/classification.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# 训练图像分类模型
-
----
-本文档训练代码可参考PaddleX的[代码tutorial/train/classification/mobilenetv2.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/classification/mobilenetv2.py)
-
-**1.下载并解压训练所需的数据集**
-
-> 使用1张显卡训练并指定使用0号卡。
-
-```python
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-```
-
-> 这里使用蔬菜数据集,训练集、验证集和测试集共包含6189个样本,18个类别。
-
-```python
-veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
-pdx.utils.download_and_decompress(veg_dataset, path='./')
-```
-
-**2.定义训练和验证过程中的数据处理和增强操作**
-> transforms用于指定训练和验证过程中的数据处理和增强操作流程,如下代码在训练过程中使用了`RandomCrop`和`RandomHorizontalFlip`进行数据增强,transforms的使用见[paddlex.cls.transforms](../../apis/transforms/cls_transforms.html#paddlex-cls-transforms)
-
-```python
-from paddlex.cls import transforms
-train_transforms = transforms.Compose([
- transforms.RandomCrop(crop_size=224),
- transforms.RandomHorizontalFlip(),
- transforms.Normalize()
-])
-eval_transforms = transforms.Compose([
- transforms.ResizeByShort(short_size=256),
- transforms.CenterCrop(crop_size=224),
- transforms.Normalize()
-])
-```
-
-**3.创建数据集读取器,并绑定相应的数据预处理流程**
-> 通过不同的数据集读取器可以加载不同格式的数据集,数据集API的介绍见文档[paddlex.datasets](../../apis/datasets.md)
-
-```python
-train_dataset = pdx.datasets.ImageNet(
- data_dir='vegetables_cls',
- file_list='vegetables_cls/train_list.txt',
- label_list='vegetables_cls/labels.txt',
- transforms=train_transforms,
- shuffle=True)
-eval_dataset = pdx.datasets.ImageNet(
- data_dir='vegetables_cls',
- file_list='vegetables_cls/val_list.txt',
- label_list='vegetables_cls/labels.txt',
- transforms=eval_transforms)
-```
-
-**4.创建模型进行训练**
-> 模型训练会默认自动下载和使用imagenet图像数据集上的预训练模型,用户也可自行指定`pretrain_weights`参数来设置预训练权重。模型训练过程每间隔`save_interval_epochs`轮会保存一次模型在`save_dir`目录下,同时在保存的过程中也会在验证数据集上计算相关指标。
-
-> 分类模型的接口可见文档[paddlex.cls.models](../../apis/models.md)
-
-```python
-model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
-model.train(
- num_epochs=10,
- train_dataset=train_dataset,
- train_batch_size=32,
- eval_dataset=eval_dataset,
- lr_decay_epochs=[4, 6, 8],
- learning_rate=0.025,
- save_dir='output/mobilenetv2',
- use_vdl=True)
-```
-
-> 将`use_vdl`设置为`True`时可使用VisualDL查看训练指标。按以下方式启动VisualDL后,浏览器打开 https://0.0.0.0:8001即可。其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP。
-
-```shell
-visualdl --logdir output/mobilenetv2/vdl_log --port 8001
-```
-
-**5.验证或测试**
-> 利用训练完的模型可继续在验证集上进行验证。
-
-```python
-eval_metrics = model.evaluate(eval_dataset, batch_size=8)
-print("eval_metrics:", eval_metrics)
-```
-
-> 结果输出:
-```
-eval_metrics: OrderedDict([('acc1', 0.9895916733386709), ('acc5', 0.9983987189751802)])
-```
-
-> 训练完用模型对图片进行测试。
-
-```python
-predict_result = model.predict('./vegetables_cls/bocai/IMG_00000839.jpg', topk=5)
-print("predict_result:", predict_result)
-```
-
-> 结果输出:
-```
-predict_result: [{'category_id': 13, 'category': 'bocai', 'score': 0.8607276},
- {'category_id': 11, 'category': 'kongxincai', 'score': 0.06386806},
- {'category_id': 2, 'category': 'suanmiao', 'score': 0.03736042},
- {'category_id': 12, 'category': 'heiqiezi', 'score': 0.007879922},
- {'category_id': 17, 'category': 'huluobo', 'score': 0.006327283}]
-```
diff --git a/docs/tutorials/train/detection.md b/docs/tutorials/train/detection.md
deleted file mode 100755
index eefff4c075d7aba5e05de032ec3c3d73d253ac65..0000000000000000000000000000000000000000
--- a/docs/tutorials/train/detection.md
+++ /dev/null
@@ -1,119 +0,0 @@
-# 训练目标检测模型
-
-------
-
-更多检测模型在VOC数据集或COCO数据集上的训练代码可参考[代码tutorials/train/detection/faster_rcnn_r50_fpn.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/faster_rcnn_r50_fpn.py)、[代码tutorials/train/detection/yolov3_darknet53.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py)。
-
-**1.下载并解压训练所需的数据集**
-
-> 使用1张显卡训练并指定使用0号卡。
-
-```python
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-```
-
-> 这里使用昆虫数据集,训练集、验证集和测试集共包含217个样本,6个类别。
-
-```python
-insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
-pdx.utils.download_and_decompress(insect_dataset, path='./')
-```
-
-**2.定义训练和验证过程中的数据处理和增强操作**
-
-> 在训练过程中使用`RandomHorizontalFlip`进行数据增强,由于接下来选择的模型是带FPN结构的Faster RCNN,所以使用`Padding`将输入图像的尺寸补齐到32的倍数,以保证FPN中两个需做相加操作的特征层的尺寸完全相同。transforms的使用见[paddlex.det.transforms](../../apis/transforms/det_transforms.md)
-
-```python
-from paddlex.det import transforms
-train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),
- transforms.Normalize(),
- transforms.ResizeByShort(short_size=800, max_size=1333),
- transforms.Padding(coarsest_stride=32)
-])
-
-eval_transforms = transforms.Compose([
- transforms.Normalize(),
- transforms.ResizeByShort(short_size=800, max_size=1333),
- transforms.Padding(coarsest_stride=32),
-])
-```
-
-**3.创建数据集读取器,并绑定相应的数据预处理流程**
-
-> 数据集读取器的介绍见文档[paddlex.datasets](../../apis/datasets.md)
-
-```python
-train_dataset = pdx.datasets.VOCDetection(
- data_dir='insect_det',
- file_list='insect_det/train_list.txt',
- label_list='insect_det/labels.txt',
- transforms=train_transforms,
- shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
- data_dir='insect_det',
- file_list='insect_det/val_list.txt',
- label_list='insect_det/labels.txt',
- transforms=eval_transforms)
-```
-
-**4.创建Faster RCNN模型,并进行训练**
-
-> 创建带FPN结构的Faster RCNN模型,`num_classes` 需要设置为包含背景类的类别数,即: 目标类别数量(6) + 1
-
-```python
-num_classes = len(train_dataset.labels) + 1
-model = pdx.det.FasterRCNN(num_classes=num_classes)
-```
-
-> 模型训练默认下载并使用在ImageNet数据集上训练得到的Backone,用户也可自行指定`pretrain_weights`参数来设置预训练权重。训练过程每间隔`save_interval_epochs`会在`save_dir`保存一次模型,与此同时也会在验证数据集上计算指标。检测模型的接口可见文档[paddlex.cv.models](../../apis/models.md#fasterrcnn)
-
-```python
-model.train(
- num_epochs=12,
- train_dataset=train_dataset,
- train_batch_size=2,
- eval_dataset=eval_dataset,
- learning_rate=0.0025,
- lr_decay_epochs=[8, 11],
- save_dir='output/faster_rcnn_r50_fpn',
- use_vdl=True)
-```
-
-> 将`use_vdl`设置为`True`时可使用VisualDL查看训练指标。按以下方式启动VisualDL后,浏览器打开 https://0.0.0.0:8001即可。其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP。
-
-```shell
-visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
-```
-
-**5.验证或测试**
-
-> 训练完利用模型可继续在验证集上进行验证。
-
-```python
-eval_metrics = model.evaluate(eval_dataset, batch_size=2)
-print("eval_metrics:", eval_metrics)
-```
-
-> 结果输出:
-
-```python
-eval_metrics: {'bbox_map': 76.085371}
-
-```
-
-> 训练完用模型对图片进行测试。
-
-```python
-predict_result = model.predict('./insect_det/JPEGImages/1968.jpg')
-```
-
-> 可视化测试结果:
-
-```python
-pdx.det.visualize('./insect_det/JPEGImages/1968.jpg', predict_result, threshold=0.5, save_dir='./output/faster_rcnn_r50_fpn')
-```
-
-
diff --git a/docs/tutorials/train/index.rst b/docs/tutorials/train/index.rst
deleted file mode 100755
index 3ba3b5498336d88a2bd573d1f5b16c33979b8e88..0000000000000000000000000000000000000000
--- a/docs/tutorials/train/index.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-模型训练
-=========================
-
-.. toctree::
- :maxdepth: 1
-
- classification.md
- detection.md
- instance_segmentation.md
- segmentation.md
- visualdl.md
diff --git a/docs/tutorials/train/instance_segmentation.md b/docs/tutorials/train/instance_segmentation.md
deleted file mode 100755
index b096cd56cddf7f8626a652fe83ca1f6b2f92ea9c..0000000000000000000000000000000000000000
--- a/docs/tutorials/train/instance_segmentation.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# 训练实例分割模型
-
-------
-
-本文档训练代码可直接下载[代码tutorials/train/detection/mask_rcnn_r50_fpn.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/mask_rcnn_r50_fpn.py)。
-
-**1.下载并解压训练所需的数据集**
-
-> 使用1张显卡训练并指定使用0号卡。
-
-```python
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-```
-
-> 这里使用小度熊分拣数据集,训练集、验证集和测试共包含21个样本,1个类别。
-
-```python
-xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
-pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
-```
-
-**2.定义训练和验证过程中的数据处理和增强操作**
-
-> 在训练过程中使用`RandomHorizontalFlip`进行数据增强,由于接下来选择的模型是带FPN结构的Mask RCNN,所以使用`PaddingImage`将输入图像的尺寸补齐到32的倍数,以保证FPN中两个需做相加操作的特征层的尺寸完全相同。transforms的使用见[paddlex.cv.transforms](../../apis/transforms/det_transforms.md)
-
-```python
-from paddlex.det import transforms
-train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),
- transforms.Normalize(),
- transforms.ResizeByShort(short_size=800, max_size=1333),
- transforms.Padding(coarsest_stride=32)
-])
-
-eval_transforms = transforms.Compose([
- transforms.Normalize(),
- transforms.ResizeByShort(short_size=800, max_size=1333),
- transforms.Padding(coarsest_stride=32)
-])
-```
-
-**3.创建数据集读取器,并绑定相应的数据预处理流程**
-
-> 数据集读取器的介绍见文档[paddlex.datasets](../../apis/datasets.md)
-
-```python
-train_dataset = pdx.datasets.CocoDetection(
- data_dir='xiaoduxiong_ins_det/JPEGImages',
- ann_file='xiaoduxiong_ins_det/train.json',
- transforms=train_transforms,
- shuffle=True)
-eval_dataset = pdx.datasets.CocoDetection(
- data_dir='xiaoduxiong_ins_det/JPEGImages',
- ann_file='xiaoduxiong_ins_det/val.json',
- transforms=eval_transforms)
-```
-
-**4.创建Mask RCNN模型,并进行训练**
-
-> 创建带FPN结构的Mask RCNN模型,`num_classes` 需要设置为包含背景类的类别数,即: 目标类别数量(1) + 1。
-
-```python
-num_classes = len(train_dataset.labels)
-model = pdx.det.MaskRCNN(num_classes=num_classes
-```
-
-> 模型训练默认下载并使用在ImageNet数据集上训练得到的Backone,用户也可自行指定`pretrain_weights`参数来设置预训练权重。训练过程每间隔`save_interval_epochs`会在`save_dir`保存一次模型,与此同时也会在验证数据集上计算指标。检测模型的接口可见文档[paddlex.det.models](../../apis/models.md)。
-
-```python
-model.train(
- num_epochs=12,
- train_dataset=train_dataset,
- train_batch_size=1,
- eval_dataset=eval_dataset,
- learning_rate=0.00125,
- warmup_steps=10,
- lr_decay_epochs=[8, 11],
- save_dir='output/mask_rcnn_r50_fpn',
- use_vdl=True)
-```
-
-> 将`use_vdl`设置为`True`时可使用VisualDL查看训练指标。按以下方式启动VisualDL后,浏览器打开 https://0.0.0.0:8001即可。其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP。
-
-```shell
-visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
-```
-
-**5.验证或测试**
-
-> 训练完利用模型可继续在验证集上进行验证。
-
-```python
-eval_metrics = model.evaluate(eval_dataset, batch_size=1)
-print("eval_metrics:", eval_metrics)
-```
-
-> 结果输出:
-
-```python
-eval_metrics: OrderedDict([('bbox_mmap', 0.5038283828382838), ('segm_mmap', 0.7025202520252025)])
-
-```
-
-> 训练完用模型对图片进行测试。
-
-```python
-predict_result = model.predict('./xiaoduxiong_ins_det/JPEGImages/WechatIMG114.jpeg')
-```
-
-> 可视化测试结果:
-
-```python
-pdx.det.visualize('./xiaoduxiong_ins_det/JPEGImages/WechatIMG114.jpeg', predict_result, threshold=0.7, save_dir='./output/mask_rcnn_r50_fpn')
-```
-
diff --git a/docs/tutorials/train/segmentation.md b/docs/tutorials/train/segmentation.md
deleted file mode 100755
index b070b169fe020021e2e81e6d306ec87f657cc8b6..0000000000000000000000000000000000000000
--- a/docs/tutorials/train/segmentation.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# 训练语义分割模型
-
----
-更多语义分割模型在视盘数据集上的训练代码可参考[代码tutorials/train/segmentation/deeplabv3p.py](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/segmentation/deeplabv3p.py)。
-
-**1.下载并解压训练所需的数据集**
-
-> 使用1张显卡训练并指定使用0号卡。
-
-```python
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-```
-
-> 这里使用视盘分割数据集,训练集、验证集和测试集共包含343个样本,2个类别。
-
-```python
-optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
-pdx.utils.download_and_decompress(optic_dataset, path='./')
-```
-
-**2.定义训练和验证过程中的数据处理和增强操作**
-
-> 在训练过程中使用`RandomHorizontalFlip`和`RandomPaddingCrop`进行数据增强,transforms的使用见[paddlex.seg.transforms](../../apis/transforms/seg_transforms.md)
-
-```python
-train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),
- transforms.Resize(target_size=512),
- transforms.RandomPaddingCrop(crop_size=500),
- transforms.Normalize()
-])
-eval_transforms = transforms.Compose([
- transforms.Resize(512),
- transforms.Normalize()
-])
-```
-
-**3.创建数据集读取器,并绑定相应的数据预处理流程**
-
-> 数据集读取器的介绍见文档[paddlex.cv.datasets](../../apis/datasets.md)
-
-```python
-train_dataset = pdx.datasets.SegDataset(
- data_dir='optic_disc_seg',
- file_list='optic_disc_seg/train_list.txt',
- label_list='optic_disc_seg/labels.txt',
- transforms=train_transforms,
- shuffle=True)
-eval_dataset = pdx.datasets.SegDataset(
- data_dir='optic_disc_seg',
- file_list='optic_disc_seg/val_list.txt',
- label_list='optic_disc_seg/labels.txt',
- transforms=eval_transforms)
-```
-
-**4.创建DeepLabv3+模型,并进行训练**
-
-> 创建DeepLabv3+模型,`num_classes` 需要设置为不包含背景类的类别数,即: 目标类别数量(1),详细代码可参见[demo](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/segmentation/deeplabv3p.py#L44)。
-
-```python
-num_classes = num_classes
-model = pdx.seg.DeepLabv3p(num_classes=num_classes)
-```
-
-> 模型训练默认下载并使用在ImageNet数据集上训练得到的Backone,用户也可自行指定`pretrain_weights`参数来设置预训练权重。
-训练过程每间隔`save_interval_epochs`会在`save_dir`保存一次模型,与此同时也会在验证数据集上计算指标。
-检测模型的接口可见文档[paddlex.seg.models](../../apis/models.md)。
-
-```python
-model.train(
- num_epochs=40,
- train_dataset=train_dataset,
- train_batch_size=4,
- eval_dataset=eval_dataset,
- learning_rate=0.01,
- save_dir='output/deeplab',
- use_vdl=True)
-```
-
-> 将`use_vdl`设置为`True`时可使用VisualDL查看训练指标。按以下方式启动VisualDL后,浏览器打开 https://0.0.0.0:8001即可。其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP。
-
-```shell
-visualdl --logdir output/deeplab/vdl_log --port 8001
-```
-
-**5.验证或测试**
-
-> 训练完利用模型可继续在验证集上进行验证。
-
-```python
-eval_metrics = model.evaluate(eval_dataset, batch_size=2)
-print("eval_metrics:", eval_metrics)
-```
-
-> 结果输出:
-
-```python
-eval_metrics: {'miou': 0.8915175875548873, 'category_iou': [0.9956445981924432, 0.7873905769173314], 'macc': 0.9957137358816046, 'category_acc': [0.9975360650317765, 0.8948120441157331], 'kappa': 0.8788684558629085}
-```
-
-> 训练完用模型对图片进行测试。
-
-```python
-image_name = 'optic_disc_seg/JPEGImages/H0005.jpg'
-predict_result = model.predict(image_name)
-```
-
-> 可视化测试结果:
-
-```python
-import paddlex as pdx
-pdx.seg.visualize(image_name, predict_result, weight=0.4)
-```
-
-
diff --git a/docs/tutorials/train/visualdl.md b/docs/tutorials/train/visualdl.md
deleted file mode 100755
index dc442b5847e048b7fe080c085e0192caada19c2b..0000000000000000000000000000000000000000
--- a/docs/tutorials/train/visualdl.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# VisualDL可视化训练指标
-在使用PaddleX训练模型过程中,各个训练指标和评估指标会直接输出到标准输出流,同时也可通过VisualDL对训练过程中的指标进行可视化,只需在调用`train`函数时,将`use_vdl`参数设为`True`即可,如下代码所示,
-```
-model = paddlex.cls.ResNet50(num_classes=1000)
-model.train(num_epochs=120, train_dataset=train_dataset,
- train_batch_size=32, eval_dataset=eval_dataset,
- log_interval_steps=10, save_interval_epochs=10,
- save_dir='./output', use_vdl=True)
-```
-
-模型在训练过程中,会在`save_dir`下生成`vdl_log`目录,通过在命令行终端执行以下命令,启动VisualDL。
-```
-visualdl --logdir=output/vdl_log --port=8008
-```
-在浏览器打开`http://0.0.0.0:8008`便可直接查看随训练迭代动态变化的各个指标(0.0.0.0表示启动VisualDL所在服务器的IP,本机使用0.0.0.0即可)。
-
-在训练分类模型过程中,使用VisualDL进行可视化的示例图如下所示。
-
-> 训练过程中每个Step的`Loss`和相应`Top1准确率`变化趋势:
-
-
-> 训练过程中每个Step的`学习率lr`和相应`Top5准确率`变化趋势:
-
-
-> 训练过程中,每次保存模型时,模型在验证数据集上的`Top1准确率`和`Top5准确率`:
-
diff --git a/docs/update.md b/docs/update.md
deleted file mode 100644
index 9191aac6a5d84b6f9be8161ad56747b954b0a68d..0000000000000000000000000000000000000000
--- a/docs/update.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# 更新日志
-
-- 2020.05.20
-> - 发布正式版 v1.0
-> - 增加模型C++部署和Python部署代码
-> - 增加模型加密部署方案
-> - 增加分类模型的OpenVINO部署方案
-> - 增加模型可解释性的接口
-
-- 2020.05.17
-> - 发布v0.1.8 pip更新
-> - 修复部分代码Bug
-> - 新增EasyData平台数据标注格式支持
-> - 支持imgaug数据增强库的pixel-level算子
diff --git a/examples/human_segmentation/README.md b/examples/human_segmentation/README.md
index 18d1f22f3b48979602028e13d1045b63991794b8..2a9d54ff27e75e62f21ff9d1add7e584df20a689 100644
--- a/examples/human_segmentation/README.md
+++ b/examples/human_segmentation/README.md
@@ -1,28 +1,32 @@
# HumanSeg人像分割模型
-本教程基于PaddleX核心分割网络,提供针对人像分割场景从预训练模型、Fine-tune、视频分割预测部署的全流程应用指南。
+本教程基于PaddleX核心分割模型实现人像分割,开放预训练模型和测试数据、支持视频流人像分割、提供模型Fine-tune到Paddle Lite移动端部署的全流程应用指南。
-## 安装
+## 目录
-**前置依赖**
-* paddlepaddle >= 1.8.0
-* python >= 3.5
+* [预训练模型和测试数据](#1)
+* [快速体验视频流人像分割](#2)
+* [模型Fine-tune](#3)
+* [Paddle Lite移动端部署](#4)
-```
-pip install paddlex -i https://mirror.baidu.com/pypi/simple
-```
-安装的相关问题参考[PaddleX安装](https://paddlex.readthedocs.io/zh_CN/latest/install.html)
-## 预训练模型
-HumanSeg开放了在大规模人像数据上训练的两个预训练模型,满足多种使用场景的需求
+## 预训练模型和测试数据
+
+#### 预训练模型
+
+本案例开放了两个在大规模人像数据集上训练好的模型,以满足服务器端场景和移动端场景的需求。使用这些模型可以快速体验视频流人像分割,也可以部署到移动端进行实时人像分割,也可以用于完成模型Fine-tuning。
| 模型类型 | Checkpoint Parameter | Inference Model | Quant Inference Model | 备注 |
| --- | --- | --- | ---| --- |
-| HumanSeg-server | [humanseg_server_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server.pdparams) | [humanseg_server_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 输入大小(512, 512) |
-| HumanSeg-mobile | [humanseg_mobile_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile.pdparams) | [humanseg_mobile_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip) | [humanseg_mobile_quant](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) |
+| HumanSeg-server | [humanseg_server_params](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_server_params.tar) | [humanseg_server_inference](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_server_inference.tar) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 输入大小(512, 512) |
+| HumanSeg-mobile | [humanseg_mobile_params](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_mobile_params.tar) | [humanseg_mobile_inference](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_mobile_inference.tar) | [humanseg_mobile_quant](https://bj.bcebos.com/paddlex/examples/human_seg/models/humanseg_mobile_quant.tar) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_small_v1,输入大小(192, 192) |
+
+> * Checkpoint Parameter为模型权重,用于Fine-tuning场景,包含`__params__`模型参数和`model.yaml`基础的模型配置信息。
+> * Inference Model和Quant Inference Model为预测部署模型,包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。
+> * 其中Inference Model适用于服务端的CPU和GPU预测部署,Qunat Inference Model为量化版本,适用于通过Paddle Lite进行移动端等端侧设备部署。
-模型性能
+预训练模型的存储大小和推理时长如下所示,其中移动端模型的运行环境为cpu:骁龙855,内存:6GB,图片大小:192*192
| 模型 | 模型大小 | 计算耗时 |
| --- | --- | --- |
@@ -30,68 +34,132 @@ HumanSeg开放了在大规模人像数据上训练的两个预训练模型,满
|humanseg_mobile_inference | 5.8 M | 42.35ms |
|humanseg_mobile_quant | 1.6M | 24.93ms |
-计算耗时运行环境: 小米,cpu:骁龙855, 内存:6GB, 图片大小:192*192
+执行以下脚本下载全部的预训练模型:
+* 下载PaddleX源码:
-**NOTE:**
-其中Checkpoint Parameter为模型权重,用于Fine-tuning场景。
+```bash
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+* 下载预训练模型的代码位于`PaddleX/examples/human_segmentation`,进入该目录:
-* Inference Model和Quant Inference Model为预测部署模型,包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。
+```bash
+cd PaddleX/examples/human_segmentation
+```
-* 其中Inference Model适用于服务端的CPU和GPU预测部署,Qunat Inference Model为量化版本,适用于通过Paddle Lite进行移动端等端侧设备部署。
+* 执行下载
-执行以下脚本进行HumanSeg预训练模型的下载
```bash
python pretrain_weights/download_pretrain_weights.py
```
-## 下载测试数据
-我们提供了[supervise.ly](https://supervise.ly/)发布人像分割数据集**Supervisely Persons**, 从中随机抽取一小部分并转化成PaddleX可直接加载数据格式。通过运行以下代码进行快速下载,其中包含手机前置摄像头的人像测试视频`video_test.mp4`.
+#### 测试数据
+
+[supervise.ly](https://supervise.ly/)发布了人像分割数据集**Supervisely Persons**, 本案例从中随机抽取一小部分数据并转化成PaddleX可直接加载的数据格式,运行以下代码可下载该数据、以及手机前置摄像头拍摄的人像测试视频`video_test.mp4`.
+
+* 下载测试数据的代码位于`PaddleX/xamples/human_segmentation`,进入该目录并执行下载:
```bash
python data/download_data.py
```
-## 快速体验视频流人像分割
-结合DIS(Dense Inverse Search-basedmethod)光流算法预测结果与分割结果,改善视频流人像分割
+## 快速体验视频流人像分割
+
+#### 前置依赖
+
+* PaddlePaddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
+
+安装的相关问题参考[PaddleX安装](../../docs/install.md)
+
+* 下载PaddleX源码:
+
+```bash
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+* 视频流人像分割和背景替换的执行文件均位于`PaddleX/examples/human_segmentation`,进入该目录:
+
+```bash
+cd PaddleX/examples/human_segmentation
+```
+
+### 光流跟踪辅助的视频流人像分割
+
+本案例将DIS(Dense Inverse Search-basedmethod)光流跟踪算法的预测结果与PaddleX的分割结果进行融合,以此改善视频流人像分割的效果。运行以下代码进行体验,以下代码位于`PaddleX/xamples/human_segmentation`:
+
+* 通过电脑摄像头进行实时分割处理
+
```bash
-# 通过电脑摄像头进行实时分割处理
python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference
+```
+* 对离线人像视频进行分割处理
-# 对人像视频进行分割处理
+```bash
python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4
```
-视频分割结果如下:
+视频分割结果如下所示:

-根据所选背景进行背景替换,背景可以是一张图片,也可以是一段视频。
+### 人像背景替换
+
+本案例还实现了人像背景替换功能,根据所选背景对人像的背景画面进行替换,背景可以是一张图片,也可以是一段视频。人像背景替换的代码位于`PaddleX/xamples/human_segmentation`,进入该目录并执行:
+
+* 通过电脑摄像头进行实时背景替换处理, 通过'--background_video_path'传入背景视频
```bash
-# 通过电脑摄像头进行实时背景替换处理, 也可通过'--background_video_path'传入背景视频
python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --background_image_path data/background.jpg
+```
-# 对人像视频进行背景替换处理, 也可通过'--background_video_path'传入背景视频
+* 对人像视频进行背景替换处理, 通过'--background_video_path'传入背景视频
+```bash
python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4 --background_image_path data/background.jpg
+```
-# 对单张图像进行背景替换
+* 对单张图像进行背景替换
+```bash
python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --image_path data/human_image.jpg --background_image_path data/background.jpg
-
```
背景替换结果如下:

+**注意**:
+
+* 视频分割处理时间需要几分钟,请耐心等待。
+
+* 提供的模型适用于手机摄像头竖屏拍摄场景,宽屏效果会略差一些。
+
+## 模型Fine-tune
+
+#### 前置依赖
-**NOTE**:
+* PaddlePaddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
-视频分割处理时间需要几分钟,请耐心等待。
+安装的相关问题参考[PaddleX安装](../../docs/install.md)
-提供的模型适用于手机摄像头竖屏拍摄场景,宽屏效果会略差一些。
+* 下载PaddleX源码:
+
+```bash
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+* 人像分割训练、评估、预测、模型导出、离线量化的执行文件均位于`PaddleX/examples/human_segmentation`,进入该目录:
+
+```bash
+cd PaddleX/examples/human_segmentation
+```
+
+### 模型训练
+
+使用下述命令进行基于预训练模型的模型训练,请确保选用的模型结构`model_type`与模型参数`pretrain_weights`匹配。如果不需要本案例提供的测试数据,可更换数据、选择合适的模型并调整训练参数。
-## 训练
-使用下述命令基于与训练模型进行Fine-tuning,请确保选用的模型结构`model_type`与模型参数`pretrain_weights`匹配。
```bash
# 指定GPU卡号(以0号卡为例)
export CUDA_VISIBLE_DEVICES=0
@@ -124,11 +192,12 @@ python train.py --model_type HumanSegMobile \
```bash
python train.py --help
```
-**NOTE**
-可通过更换`--model_type`变量与对应的`--pretrain_weights`使用不同的模型快速尝试。
+**注意**:可以通过更换`--model_type`变量与对应的`--pretrain_weights`使用不同的模型快速尝试。
+
+### 评估
+
+使用下述命令对模型在验证集上的精度进行评估:
-## 评估
-使用下述命令进行评估
```bash
python eval.py --model_dir output/best_model \
--data_dir data/mini_supervisely \
@@ -141,8 +210,9 @@ python eval.py --model_dir output/best_model \
* `--val_list`: 验证集列表路径
* `--image_shape`: 网络输入图像大小(w, h)
-## 预测
-使用下述命令进行预测, 预测结果默认保存在`./output/result/`文件夹中。
+### 预测
+
+使用下述命令对测试集进行预测,预测可视化结果默认保存在`./output/result/`文件夹中。
```bash
python infer.py --model_dir output/best_model \
--data_dir data/mini_supervisely \
@@ -156,7 +226,10 @@ python infer.py --model_dir output/best_model \
* `--test_list`: 测试集列表路径
* `--image_shape`: 网络输入图像大小(w, h)
-## 模型导出
+### 模型导出
+
+在服务端部署的模型需要首先将模型导出为inference格式模型,导出的模型将包括`__model__`、`__params__`和`model.yml`三个文名,分别为模型的网络结构,模型权重和模型的配置文件(包括数据预处理参数等等)。在安装完PaddleX后,在命令行终端使用如下命令完成模型导出:
+
```bash
paddlex --export_inference --model_dir output/best_model \
--save_dir output/export
@@ -165,7 +238,7 @@ paddlex --export_inference --model_dir output/best_model \
* `--model_dir`: 模型路径
* `--save_dir`: 导出模型保存路径
-## 离线量化
+### 离线量化
```bash
python quant_offline.py --model_dir output/best_model \
--data_dir data/mini_supervisely \
@@ -179,3 +252,64 @@ python quant_offline.py --model_dir output/best_model \
* `--quant_list`: 量化数据集列表路径,一般直接选择训练集或验证集
* `--save_dir`: 量化模型保存路径
* `--image_shape`: 网络输入图像大小(w, h)
+
+## Paddle Lite移动端部署
+
+本案例将人像分割模型在移动端进行部署,部署流程展示如下,通用的移动端部署流程参见[Paddle Lite移动端部署](../../docs/deploy/paddlelite/android.md)。
+
+### 1. 将PaddleX模型导出为inference模型
+
+本案例使用humanseg_mobile_quant预训练模型,该模型已经是inference模型,不需要再执行模型导出步骤。如果不使用预训练模型,则执行上一章节`模型训练`中的`模型导出`将自己训练的模型导出为inference格式。
+
+### 2. 将inference模型优化为Paddle Lite模型
+
+下载并解压 [模型优化工具opt](https://bj.bcebos.com/paddlex/deploy/lite/model_optimize_tool_11cbd50e.tar.gz),进入模型优化工具opt所在路径后,执行以下命令:
+
+``` bash
+./opt --model_file= \
+ --param_file= \
+ --valid_targets=arm \
+ --optimize_out_type=naive_buffer \
+ --optimize_out=model_output_name
+```
+
+| 参数 | 说明 |
+| ---- | ---- |
+| --model_file | 导出inference模型中包含的网络结构文件:`__model__`所在的路径|
+| --param_file | 导出inference模型中包含的参数文件:`__params__`所在的路径|
+| --valid_targets |指定模型可执行的backend,这里请指定为`arm`|
+| --optimize_out_type | 输出模型类型,目前支持两种类型:protobuf和naive_buffer,其中naive_buffer是一种更轻量级的序列化/反序列化,这里请指定为`naive_buffer`|
+| --optimize_out | 输出模型的名称 |
+
+更详细的使用方法和参数含义请参考: [使用opt转化模型](https://paddle-lite.readthedocs.io/zh/latest/user_guides/opt/opt_bin.html)
+
+### 3. 移动端预测
+
+PaddleX提供了基于PaddleX Android SDK的安卓demo,可供用户体验图像分类、目标检测、实例分割和语义分割,该demo位于`PaddleX/deploy/lite/android/demo`,用户将模型、配置文件和测试图片拷贝至该demo下进行预测。
+
+#### 3.1 前置依赖
+
+* Android Studio 3.4
+* Android手机或开发板
+
+#### 3.2 拷贝模型、配置文件和测试图片
+
+* 将Lite模型(.nb文件)拷贝到`PaddleX/deploy/lite/android/demo/app/src/main/assets/model/`目录下, 根据.nb文件的名字,修改文件`PaddleX/deploy/lite/android/demo/app/src/main/res/values/strings.xml`中的`MODEL_PATH_DEFAULT`;
+
+* 将配置文件(.yml文件)拷贝到`PaddleX/deploy/lite/android/demo/app/src/main/assets/config/`目录下,根据.yml文件的名字,修改文件`PaddleX/deploy/lite/android/demo/app/src/main/res/values/strings.xml`中的`YAML_PATH_DEFAULT`;
+
+* 将测试图片拷贝到`PaddleX/deploy/lite/android/demo/app/src/main/assets/images/`目录下,根据图片文件的名字,修改文件`PaddleX/deploy/lite/android/demo/app/src/main/res/values/strings.xml`中的`IMAGE_PATH_DEFAULT`。
+
+#### 3.3 导入工程并运行
+
+* 打开Android Studio,在"Welcome to Android Studio"窗口点击"Open an existing Android Studio project",在弹出的路径选择窗口中进入`PaddleX/deploy/lite/android/demo`目录,然后点击右下角的"Open"按钮,导入工程;
+
+* 通过USB连接Android手机或开发板;
+
+* 工程编译完成后,点击菜单栏的Run->Run 'App'按钮,在弹出的"Select Deployment Target"窗口选择已经连接的Android设备,然后点击"OK"按钮;
+
+* 运行成功后,Android设备将加载一个名为PaddleX Demo的App,默认会加载一个测试图片,同时还支持拍照和从图库选择照片进行预测。
+
+测试图片及其分割结果如下所示:
+
+
diff --git a/examples/human_segmentation/data/beauty.png b/examples/human_segmentation/data/beauty.png
new file mode 100644
index 0000000000000000000000000000000000000000..21343fa343a33620981041179e0fc8c6a717c598
Binary files /dev/null and b/examples/human_segmentation/data/beauty.png differ
diff --git a/examples/meter_reader/README.md b/examples/meter_reader/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f8c8388f395bbf64e7111e873f4e269702b3c6eb
--- /dev/null
+++ b/examples/meter_reader/README.md
@@ -0,0 +1,281 @@
+# MeterReader工业表计读数
+
+本案例基于PaddleX实现对传统机械式指针表计的检测与自动读数功能,开放表计数据和预训练模型,并提供在windows系统的服务器端以及linux系统的jetson嵌入式设备上的部署指南。
+
+## 目录
+
+* [读数流程](#1)
+* [表计数据和预训练模型](#2)
+* [快速体验表盘读数](#3)
+* [推理部署](#4)
+* [模型训练](#5)
+
+
+## 读数流程
+
+表计读数共分为三个步骤完成:
+
+* 第一步,使用目标检测模型检测出图像中的表计
+* 第二步,使用语义分割模型将各表计的指针和刻度分割出来
+* 第三步,根据指针的相对位置和预知的量程计算出各表计的读数。
+
+
+
+* **表计检测**:由于本案例中没有面积较小的表计,所以目标检测模型选择性能更优的**YOLOv3**。考虑到本案例主要在有GPU的设备上部署,所以骨干网路选择精度更高的**DarkNet53**。
+* **刻度和指针分割**:考虑到刻度和指针均为细小区域,语义分割模型选择效果更好的**DeepLapv3**。
+* **读数后处理**:首先,对语义分割的预测类别图进行图像腐蚀操作,以达到刻度细分的目的。然后把环形的表盘展开为矩形图像,根据图像中类别信息生成一维的刻度数组和一维的指针数组。接着计算刻度数组的均值,用均值对刻度数组进行二值化操作。最后定位出指针相对刻度的位置,根据刻度的根数判断表盘的类型以此获取表盘的量程,将指针相对位置与量程做乘积得到表盘的读数。
+
+
+## 表计数据和预训练模型
+
+本案例开放了表计测试图片,用于体验表计读数的预测推理全流程。还开放了表计检测数据集、指针和刻度分割数据集,用户可以使用这些数据集重新训练模型。
+
+| 表计测试图片 | 表计检测数据集 | 指针和刻度分割数据集 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| [meter_test](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_test.tar.gz) | [meter_det](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_det.tar.gz) | [meter_seg](https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_seg.tar.gz) |
+
+本案例开放了预先训练好的检测模型和语义分割模型,可以使用这些模型快速体验表计读数全流程,也可以直接将这些模型部署在服务器端或jetson嵌入式设备上进行推理预测。
+
+| 表计检测模型 | 指针和刻度分割模型 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ |
+| [meter_det_inference_model](https://bj.bcebos.com/paddlex/examples/meter_reader/models/meter_det_inference_model.tar.gz) | [meter_seg_inference_model](https://bj.bcebos.com/paddlex/examples/meter_reader/models/meter_seg_inference_model.tar.gz) |
+
+
+## 快速体验表盘读数
+
+可以使用本案例提供的预训练模型快速体验表计读数的自动预测全流程。如果不需要预训练模型,可以跳转至小节`模型训练` 重新训练模型。
+
+#### 前置依赖
+
+* Paddle paddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
+
+安装的相关问题参考[PaddleX安装](../../docs/install.md)
+
+#### 测试表计读数
+
+1. 下载PaddleX源码:
+
+```
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+2. 预测执行文件位于`PaddleX/examples/meter_reader/`,进入该目录:
+
+```
+cd PaddleX/examples/meter_reader/
+```
+
+预测执行文件为`reader_infer.py`,其主要参数说明如下:
+
+
+| 参数 | 说明 |
+| ---- | ---- |
+| detector_dir | 表计检测模型路径 |
+| segmenter_dir | 指针和刻度分割模型路径|
+| image | 待预测的图片路径 |
+| image_dir | 存储待预测图片的文件夹路径 |
+| save_dir | 保存可视化结果的路径, 默认值为"output"|
+| score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
+| seg_batch_size | 分割的批量大小,默认为2 |
+| seg_thread_num | 分割预测的线程数,默认为cpu处理器个数 |
+| use_camera | 是否使用摄像头采集图片,默认为False |
+| camera_id | 摄像头设备ID,默认值为0 |
+| use_erode | 是否使用图像腐蚀对分割预测图进行细分,默认为False |
+| erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
+
+3. 预测
+
+若要使用GPU,则指定GPU卡号(以0号卡为例):
+
+```shell
+export CUDA_VISIBLE_DEVICES=0
+```
+若不使用GPU,则将CUDA_VISIBLE_DEVICES指定为空:
+```shell
+export CUDA_VISIBLE_DEVICES=
+```
+
+* 预测单张图片
+
+```shell
+python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image /path/to/meter_test/20190822_168.jpg --save_dir ./output --use_erode
+```
+
+* 预测多张图片
+
+```shell
+python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image_dir /path/to/meter_test --save_dir ./output --use_erode
+```
+
+* 开启摄像头预测
+
+```shell
+python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --save_dir ./output --use_erode --use_camera
+```
+
+## 推理部署
+
+### Windows系统的服务器端安全部署
+
+#### c++部署
+
+1. 下载PaddleX源码:
+
+```
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+2. 将`PaddleX\examples\meter_reader\deploy\cpp`下的`meter_reader`文件夹和`CMakeList.txt`拷贝至`PaddleX\deploy\cpp`目录下,拷贝之前可以将`PaddleX\deploy\cpp`下原本的`CMakeList.txt`做好备份。
+
+3. 按照[Windows平台部署](../../docs/deploy/server/cpp/windows.md)中的Step2至Step4完成C++预测代码的编译。
+
+4. 编译成功后,可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
+
+ ```
+ cd PaddleX\deploy\cpp\out\build\x64-Release
+ ```
+
+ 预测程序为paddle_inference\meter_reader.exe,其主要命令参数说明如下:
+
+ | 参数 | 说明 |
+ | ---- | ---- |
+ | det_model_dir | 表计检测模型路径 |
+ | seg_model_dir | 指针和刻度分割模型路径|
+ | image | 待预测的图片路径 |
+ | image_list | 按行存储图片路径的.txt文件 |
+ | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+ | gpu_id | GPU 设备ID, 默认值为0 |
+ | save_dir | 保存可视化结果的路径, 默认值为"output"|
+ | det_key | 检测模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的检测模型 |
+ | seg_key | 分割模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的分割模型 |
+ | seg_batch_size | 分割的批量大小,默认为2 |
+ | thread_num | 分割预测的线程数,默认为cpu处理器个数 |
+ | use_camera | 是否使用摄像头采集图片,支持值为0或1(默认值为0) |
+ | camera_id | 摄像头设备ID,默认值为0 |
+ | use_erode | 是否使用图像腐蚀对分割预测图进行去噪,支持值为0或1(默认值为1) |
+ | erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
+ | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
+
+5. 推理预测:
+
+ 用于部署推理的模型应为inference格式,本案例提供的预训练模型均为inference格式,如若是重新训练的模型,需参考[导出inference模型](https://paddlex.readthedocs.io/zh_CN/latest/tutorials/deploy/deploy_server/deploy_python.html#inference)将模型导出为inference格式。
+
+ * 使用未加密的模型对单张图片做预测
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image=\path\to\meter_test\20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型对图像列表做预测
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --image_list=\path\to\meter_test\image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型开启摄像头做预测
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\det_inference_model --seg_model_dir=\path\to\seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用加密后的模型对单张图片做预测
+
+ 如果未对模型进行加密,请参考[加密PaddleX模型](../../docs/deploy/server/encryption.md#13-加密paddlex模型)对模型进行加密。例如加密后的检测模型所在目录为`\path\to\encrypted_det_inference_model`,密钥为`yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0=`;加密后的分割模型所在目录为`\path\to\encrypted_seg_inference_model`,密钥为`DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=`
+
+ ```shell
+ .\paddlex_inference\meter_reader.exe --det_model_dir=\path\to\encrypted_det_inference_model --seg_model_dir=\path\to\encrypted_seg_inference_model --image=\path\to\test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
+ ```
+
+### Linux系统的jetson嵌入式设备安全部署
+
+#### c++部署
+
+1. 下载PaddleX源码:
+
+```
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+
+2. 将`PaddleX/examples/meter_reader/deploy/cpp`下的`meter_reader`文件夹和`CMakeList.txt`拷贝至`PaddleX/deploy/cpp`目录下,拷贝之前可以将`PaddleX/deploy/cpp`下原本的`CMakeList.txt`做好备份。
+
+3. 按照[Nvidia Jetson开发板部署](../../docs/deploy/nvidia-jetson.md)中的Step2至Step3完成C++预测代码的编译。
+
+4. 编译成功后,可执行程为`build/meter_reader/meter_reader`,其主要命令参数说明如下:
+
+ | 参数 | 说明 |
+ | ---- | ---- |
+ | det_model_dir | 表计检测模型路径 |
+ | seg_model_dir | 指针和刻度分割模型路径|
+ | image | 待预测的图片路径 |
+ | image_list | 按行存储图片路径的.txt文件 |
+ | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+ | gpu_id | GPU 设备ID, 默认值为0 |
+ | save_dir | 保存可视化结果的路径, 默认值为"output"|
+ | det_key | 检测模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的检测模型 |
+ | seg_key | 分割模型加密过程中产生的密钥信息,默认值为""表示加载的是未加密的分割模型 |
+ | seg_batch_size | 分割的批量大小,默认为2 |
+ | thread_num | 分割预测的线程数,默认为cpu处理器个数 |
+ | use_camera | 是否使用摄像头采集图片,支持值为0或1(默认值为0) |
+ | camera_id | 摄像头设备ID,默认值为0 |
+ | use_erode | 是否使用图像腐蚀对分割预测图进行细分,支持值为0或1(默认值为1) |
+ | erode_kernel | 图像腐蚀操作时的卷积核大小,默认值为4 |
+ | score_threshold | 检测模型输出结果中,预测得分低于该阈值的框将被滤除,默认值为0.5|
+
+5. 推理预测:
+
+ 用于部署推理的模型应为inference格式,本案例提供的预训练模型均为inference格式,如若是重新训练的模型,需参考[部署模型导出](../../docs/deploy/export_model.md)将模型导出为inference格式。
+
+ * 使用未加密的模型对单张图片做预测
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image=/path/to/meter_test/20190822_168.jpg --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型对图像列表做预测
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --image_list=/path/to/image_list.txt --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用未加密的模型开启摄像头做预测
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/det_inference_model --seg_model_dir=/path/to/seg_inference_model --use_camera=1 --use_gpu=1 --use_erode=1 --save_dir=output
+ ```
+
+ * 使用加密后的模型对单张图片做预测
+
+ 如果未对模型进行加密,请参考[加密PaddleX模型](../../docs/deploy/server/encryption.md#13-加密paddlex模型)对模型进行加密。例如加密后的检测模型所在目录为`/path/to/encrypted_det_inference_model`,密钥为`yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0=`;加密后的分割模型所在目录为`/path/to/encrypted_seg_inference_model`,密钥为`DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=`
+
+ ```shell
+ ./build/meter_reader/meter_reader --det_model_dir=/path/to/encrypted_det_inference_model --seg_model_dir=/path/to/encrypted_seg_inference_model --image=/path/to/test.jpg --use_gpu=1 --use_erode=1 --save_dir=output --det_key yEBLDiBOdlj+5EsNNrABhfDuQGkdcreYcHcncqwdbx0= --seg_key DbVS64I9pFRo5XmQ8MNV2kSGsfEr4FKA6OH9OUhRrsY=
+ ```
+
+
+## 模型训练
+
+
+#### 前置依赖
+
+* Paddle paddle >= 1.8.0
+* Python >= 3.5
+* PaddleX >= 1.0.0
+
+安装的相关问题参考[PaddleX安装](../../docs/install.md)
+
+#### 训练
+
+* 表盘检测的训练
+```
+python3 /path/to/PaddleX/examples/meter_reader/train_detection.py
+```
+* 指针和刻度分割的训练
+
+```
+python3 /path/to/PaddleX/examples/meter_reader/train_segmentation.py
+
+```
+
+运行以上脚本可以训练本案例的检测模型和分割模型。如果不需要本案例的数据和模型参数,可更换数据,选择合适的模型并调整训练参数。
diff --git a/examples/meter_reader/deploy/cpp/CMakeLists.txt b/examples/meter_reader/deploy/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30084a8f22c533b884aadf373671513250399b9f
--- /dev/null
+++ b/examples/meter_reader/deploy/cpp/CMakeLists.txt
@@ -0,0 +1,309 @@
+cmake_minimum_required(VERSION 3.0)
+project(PaddleX CXX C)
+
+option(WITH_MKL "Compile meter_reader with MKL/OpenBlas support,defaultuseMKL." ON)
+option(WITH_GPU "Compile meter_reader with GPU/CPU, default use CPU." ON)
+option(WITH_STATIC_LIB "Compile meter_reader with static/shared library, default use static." OFF)
+option(WITH_TENSORRT "Compile meter_reader with TensorRT." OFF)
+option(WITH_ENCRYPTION "Compile meter_reader with encryption tool." OFF)
+
+SET(TENSORRT_DIR "" CACHE PATH "Location of libraries")
+SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
+SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
+SET(ENCRYPTION_DIR"" CACHE PATH "Location of libraries")
+SET(CUDA_LIB "" CACHE PATH "Location of libraries")
+
+if (NOT WIN32)
+ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/meter_reader)
+else()
+ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/paddlex_inference)
+ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/paddlex_inference)
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/paddlex_inference)
+endif()
+
+if (NOT WIN32)
+ SET(YAML_BUILD_TYPE ON CACHE BOOL "yaml build shared library.")
+else()
+ SET(YAML_BUILD_TYPE OFF CACHE BOOL "yaml build shared library.")
+endif()
+include(cmake/yaml-cpp.cmake)
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
+
+macro(safe_set_static_flag)
+ foreach(flag_var
+ CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+ CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+ if(${flag_var} MATCHES "/MD")
+ string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+ endif(${flag_var} MATCHES "/MD")
+ endforeach(flag_var)
+endmacro()
+
+
+if (WITH_ENCRYPTION)
+add_definitions( -DWITH_ENCRYPTION=${WITH_ENCRYPTION})
+endif()
+
+if (WITH_MKL)
+ ADD_DEFINITIONS(-DUSE_MKL)
+endif()
+
+if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "")
+ message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir")
+endif()
+
+if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
+ message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
+endif()
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${PADDLE_DIR}/")
+include_directories("${PADDLE_DIR}/third_party/install/protobuf/include")
+include_directories("${PADDLE_DIR}/third_party/install/glog/include")
+include_directories("${PADDLE_DIR}/third_party/install/gflags/include")
+include_directories("${PADDLE_DIR}/third_party/install/xxhash/include")
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include")
+ include_directories("${PADDLE_DIR}/third_party/install/snappy/include")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
+ include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
+endif()
+# zlib does not exist in 1.8.1
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/include")
+ include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+endif()
+
+include_directories("${PADDLE_DIR}/third_party/boost")
+include_directories("${PADDLE_DIR}/third_party/eigen3")
+
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+ link_directories("${PADDLE_DIR}/third_party/install/snappy/lib")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+ link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
+endif()
+
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+ link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+endif()
+
+link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
+link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
+link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
+link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib")
+link_directories("${PADDLE_DIR}/paddle/lib/")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}")
+
+if (WIN32)
+ include_directories("${PADDLE_DIR}/paddle/fluid/inference")
+ include_directories("${PADDLE_DIR}/paddle/include")
+ link_directories("${PADDLE_DIR}/paddle/fluid/inference")
+ find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
+ unset(OpenCV_DIR CACHE)
+else ()
+ find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH)
+ include_directories("${PADDLE_DIR}/paddle/include")
+ link_directories("${PADDLE_DIR}/paddle/lib")
+endif ()
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+if (WIN32)
+ add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+ find_package(OpenMP REQUIRED)
+ if (OPENMP_FOUND)
+ message("OPENMP FOUND")
+ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${OpenMP_C_FLAGS}")
+ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${OpenMP_C_FLAGS}")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${OpenMP_CXX_FLAGS}")
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${OpenMP_CXX_FLAGS}")
+ endif()
+ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
+ if (WITH_STATIC_LIB)
+ safe_set_static_flag()
+ add_definitions(-DSTATIC_LIB)
+ endif()
+else()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11")
+ set(CMAKE_STATIC_LIBRARY_PREFIX "")
+endif()
+
+if (WITH_GPU)
+ if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "")
+ message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda/lib64")
+ endif()
+ if (NOT WIN32)
+ if (NOT DEFINED CUDNN_LIB)
+ message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn/")
+ endif()
+ endif(NOT WIN32)
+endif()
+
+
+if (NOT WIN32)
+ if (WITH_TENSORRT AND WITH_GPU)
+ include_directories("${TENSORRT_DIR}/include")
+ link_directories("${TENSORRT_DIR}/lib")
+ endif()
+endif(NOT WIN32)
+
+if (NOT WIN32)
+ set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph")
+ if(EXISTS ${NGRAPH_PATH})
+ include(GNUInstallDirs)
+ include_directories("${NGRAPH_PATH}/include")
+ link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
+ set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+if(WITH_MKL)
+ include_directories("${PADDLE_DIR}/third_party/install/mklml/include")
+ if (WIN32)
+ set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib
+ ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib)
+ else ()
+ set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
+ ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
+ execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib)
+ endif ()
+ set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn")
+ if(EXISTS ${MKLDNN_PATH})
+ include_directories("${MKLDNN_PATH}/include")
+ if (WIN32)
+ set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
+ else ()
+ set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+ endif ()
+ endif()
+else()
+ set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
+endif()
+
+if (WIN32)
+ if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}")
+ set(DEPS
+ ${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+ else()
+ set(DEPS
+ ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+if(WITH_STATIC_LIB)
+ set(DEPS
+ ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+else()
+ if (NOT WIN32)
+ set(DEPS
+ ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+ else()
+ set(DEPS
+ ${PADDLE_DIR}/paddle/lib/paddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+if (NOT WIN32)
+ set(DEPS ${DEPS}
+ ${MATH_LIB} ${MKLDNN_LIB}
+ glog gflags protobuf z xxhash yaml-cpp
+ )
+ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+ set(DEPS ${DEPS} snappystream)
+ endif()
+ if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+ set(DEPS ${DEPS} snappy)
+ endif()
+else()
+ set(DEPS ${DEPS}
+ ${MATH_LIB} ${MKLDNN_LIB}
+ glog gflags_static libprotobuf xxhash libyaml-cppmt)
+
+ if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+ set(DEPS ${DEPS} zlibstatic)
+ endif()
+ set(DEPS ${DEPS} libcmt shlwapi)
+ if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+ set(DEPS ${DEPS} snappy)
+ endif()
+ if (EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+ set(DEPS ${DEPS} snappystream)
+ endif()
+endif(NOT WIN32)
+
+if(WITH_GPU)
+ if(NOT WIN32)
+ if (WITH_TENSORRT)
+ set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
+ set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
+ endif()
+ set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+ set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
+ else()
+ set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
+ set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
+ set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+if(WITH_ENCRYPTION)
+ if(NOT WIN32)
+ include_directories("${ENCRYPTION_DIR}/include")
+ link_directories("${ENCRYPTION_DIR}/lib")
+ set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/libpmodel-decrypt${CMAKE_SHARED_LIBRARY_SUFFIX})
+ else()
+ include_directories("${ENCRYPTION_DIR}/include")
+ link_directories("${ENCRYPTION_DIR}/lib")
+ set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/pmodel-decrypt${CMAKE_STATIC_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+if (NOT WIN32)
+ set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
+ set(DEPS ${DEPS} ${EXTERNAL_LIB})
+endif()
+
+set(DEPS ${DEPS} ${OpenCV_LIBS})
+add_library(paddlex_inference SHARED src/visualize src/transforms.cpp src/paddlex.cpp)
+ADD_DEPENDENCIES(paddlex_inference ext-yaml-cpp)
+target_link_libraries(paddlex_inference ${DEPS})
+
+add_executable(meter_reader meter_reader/meter_reader.cpp meter_reader/global.cpp meter_reader/postprocess.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(meter_reader ext-yaml-cpp)
+target_link_libraries(meter_reader ${DEPS})
+
+if (WIN32 AND WITH_MKL)
+ add_custom_command(TARGET meter_reader POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
+ )
+ # for encryption
+ if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")
+ add_custom_command(TARGET meter_reader POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+ )
+ endif()
+endif()
+
+file(COPY "${CMAKE_SOURCE_DIR}/include/paddlex/visualize.h"
+DESTINATION "${CMAKE_BINARY_DIR}/include/" )
+file(COPY "${CMAKE_SOURCE_DIR}/include/paddlex/config_parser.h"
+DESTINATION "${CMAKE_BINARY_DIR}/include/" )
+file(COPY "${CMAKE_SOURCE_DIR}/include/paddlex/transforms.h"
+DESTINATION "${CMAKE_BINARY_DIR}/include/" )
+file(COPY "${CMAKE_SOURCE_DIR}/include/paddlex/results.h"
+DESTINATION "${CMAKE_BINARY_DIR}/include/" )
+file(COPY "${CMAKE_SOURCE_DIR}/include/paddlex/paddlex.h"
+DESTINATION "${CMAKE_BINARY_DIR}/include/" )
diff --git a/examples/meter_reader/deploy/cpp/meter_reader/global.cpp b/examples/meter_reader/deploy/cpp/meter_reader/global.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2e6dfa42c0f1becadf1c067be584ba89330a66b7
--- /dev/null
+++ b/examples/meter_reader/deploy/cpp/meter_reader/global.cpp
@@ -0,0 +1,34 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include "meter_reader/global.h"
+
+std::vector IMAGE_SHAPE = {1920, 1080};
+std::vector RESULT_SHAPE = {1280, 720};
+std::vector METER_SHAPE = {512, 512};
+
+#define METER_TYPE_NUM 2
+MeterConfig_T meter_config[METER_TYPE_NUM] = {
+{25.0f/50.0f, 25.0f, "(MPa)"},
+{1.6f/32.0f, 1.6f, "(MPa)"}
+};
diff --git a/examples/meter_reader/deploy/cpp/meter_reader/global.h b/examples/meter_reader/deploy/cpp/meter_reader/global.h
new file mode 100644
index 0000000000000000000000000000000000000000..a1ea12b3f8f5355b036091e5afe6ab0cebe474bf
--- /dev/null
+++ b/examples/meter_reader/deploy/cpp/meter_reader/global.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include
+
+typedef struct MeterConfig {
+ float scale_value;
+ float range;
+ char str[10];
+} MeterConfig_T;
+
+extern std::vector IMAGE_SHAPE;
+extern std::vector RESULT_SHAPE;
+extern std::vector METER_SHAPE;
+extern MeterConfig_T meter_config[];
+
+#define TYPE_THRESHOLD 40
diff --git a/examples/meter_reader/deploy/cpp/meter_reader/meter_reader.cpp b/examples/meter_reader/deploy/cpp/meter_reader/meter_reader.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..79307fa05eb7b99c753fd978bcec9f0eb1e2f534
--- /dev/null
+++ b/examples/meter_reader/deploy/cpp/meter_reader/meter_reader.cpp
@@ -0,0 +1,315 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include
+#include
+
+#include
+#include // NOLINT
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include "meter_reader/global.h"
+#include "meter_reader/postprocess.h"
+#include "include/paddlex/paddlex.h"
+#include "include/paddlex/visualize.h"
+
+using namespace std::chrono; // NOLINT
+
+DEFINE_string(det_model_dir, "", "Path of detection inference model");
+DEFINE_string(seg_model_dir, "", "Path of segmentation inference model");
+DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_camera, false, "Infering with Camera");
+DEFINE_bool(use_erode, true, "Eroding predicted label map");
+DEFINE_int32(gpu_id, 0, "GPU card id");
+DEFINE_int32(camera_id, 0, "Camera id");
+DEFINE_int32(thread_num,
+ omp_get_num_procs(),
+ "Number of preprocessing threads");
+DEFINE_int32(erode_kernel, true, "Eroding kernel size");
+DEFINE_int32(seg_batch_size, 2, "Batch size of segmentation infering");
+DEFINE_string(det_key, "", "Detector key of encryption");
+DEFINE_string(seg_key, "", "Segmenter model key of encryption");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_double(score_threshold, 0.5, "Detected bbox whose score is lower than this threshlod is filtered");
+
+void predict(const cv::Mat &input_image, PaddleX::Model *det_model,
+ PaddleX::Model *seg_model, const std::string save_dir,
+ const std::string image_path, const bool use_erode,
+ const int erode_kernel, const int thread_num,
+ const int seg_batch_size, const double threshold) {
+ PaddleX::DetResult det_result;
+ det_model->predict(input_image, &det_result);
+
+ PaddleX::DetResult filter_result;
+ int num_bboxes = det_result.boxes.size();
+ for (int i = 0; i < num_bboxes; ++i) {
+ double score = det_result.boxes[i].score;
+ if (score > threshold || score == threshold) {
+ PaddleX::Box box;
+ box.category_id = det_result.boxes[i].category_id;
+ box.category = det_result.boxes[i].category;
+ box.score = det_result.boxes[i].score;
+ box.coordinate = det_result.boxes[i].coordinate;
+ filter_result.boxes.push_back(std::move(box));
+ }
+ }
+
+ int meter_num = filter_result.boxes.size();
+ if (!meter_num) {
+ std::cout << "Don't find any meter." << std::endl;
+ return;
+ }
+
+ std::vector> seg_result(meter_num);
+ for (int i = 0; i < meter_num; i += seg_batch_size) {
+ int im_vec_size =
+ std::min(static_cast(meter_num), i + seg_batch_size);
+ std::vector meters_image(im_vec_size - i);
+ int batch_thread_num = std::min(thread_num, im_vec_size - i);
+ #pragma omp parallel for num_threads(batch_thread_num)
+ for (int j = i; j < im_vec_size; ++j) {
+ int left = static_cast(filter_result.boxes[j].coordinate[0]);
+ int top = static_cast(filter_result.boxes[j].coordinate[1]);
+ int width = static_cast(filter_result.boxes[j].coordinate[2]);
+ int height = static_cast(filter_result.boxes[j].coordinate[3]);
+ int right = left + width - 1;
+ int bottom = top + height - 1;
+
+ cv::Mat sub_image = input_image(
+ cv::Range(top, bottom + 1), cv::Range(left, right + 1));
+ float scale_x =
+ static_cast(METER_SHAPE[0]) / static_cast(sub_image.cols);
+ float scale_y =
+ static_cast(METER_SHAPE[1]) / static_cast(sub_image.rows);
+ cv::resize(sub_image,
+ sub_image,
+ cv::Size(),
+ scale_x,
+ scale_y,
+ cv::INTER_LINEAR);
+ meters_image[j - i] = std::move(sub_image);
+ }
+ std::vector batch_result(im_vec_size - i);
+ seg_model->predict(meters_image, &batch_result, batch_thread_num);
+ #pragma omp parallel for num_threads(batch_thread_num)
+ for (int j = i; j < im_vec_size; ++j) {
+ if (use_erode) {
+ cv::Mat kernel(4, 4, CV_8U, cv::Scalar(1));
+ std::vector label_map(
+ batch_result[j - i].label_map.data.begin(),
+ batch_result[j - i].label_map.data.end());
+ cv::Mat mask(batch_result[j - i].label_map.shape[0],
+ batch_result[j - i].label_map.shape[1],
+ CV_8UC1,
+ label_map.data());
+ cv::erode(mask, mask, kernel);
+ std::vector map;
+ if (mask.isContinuous()) {
+ map.assign(mask.data, mask.data + mask.total() * mask.channels());
+ } else {
+ for (int r = 0; r < mask.rows; r++) {
+ map.insert(map.end(),
+ mask.ptr(r),
+ mask.ptr(r) + mask.cols * mask.channels());
+ }
+ }
+ seg_result[j] = std::move(map);
+ } else {
+ seg_result[j] = std::move(batch_result[j - i].label_map.data);
+ }
+ }
+ }
+
+ std::vector read_results(meter_num);
+ int all_thread_num = std::min(thread_num, meter_num);
+ read_process(seg_result, &read_results, all_thread_num);
+
+ cv::Mat output_image = input_image.clone();
+ for (int i = 0; i < meter_num; i++) {
+ float result = 0;;
+ if (read_results[i].scale_num > TYPE_THRESHOLD) {
+ result = read_results[i].scales * meter_config[0].scale_value;
+ } else {
+ result = read_results[i].scales * meter_config[1].scale_value;
+ }
+ std::cout << "-- Meter " << i
+ << " -- result: " << result
+ << " --" << std::endl;
+
+ int lx = static_cast(filter_result.boxes[i].coordinate[0]);
+ int ly = static_cast(filter_result.boxes[i].coordinate[1]);
+ int w = static_cast(filter_result.boxes[i].coordinate[2]);
+ int h = static_cast(filter_result.boxes[i].coordinate[3]);
+
+ cv::Rect bounding_box = cv::Rect(lx, ly, w, h) &
+ cv::Rect(0, 0, output_image.cols, output_image.rows);
+ if (w > 0 && h > 0) {
+ cv::Scalar color = cv::Scalar(237, 189, 101);
+ cv::rectangle(output_image, bounding_box, color);
+ cv::rectangle(output_image,
+ cv::Point2d(lx, ly),
+ cv::Point2d(lx + w, ly - 30),
+ color, -1);
+
+ std::string class_name = "Meter";
+ cv::putText(output_image,
+ class_name + " " + std::to_string(result),
+ cv::Point2d(lx, ly-5),
+ cv::FONT_HERSHEY_SIMPLEX,
+ 1, cv::Scalar(255, 255, 255), 2);
+ }
+ }
+
+ cv::Mat result_image;
+ cv::Size resize_size(RESULT_SHAPE[0], RESULT_SHAPE[1]);
+ cv::resize(output_image, result_image, resize_size, 0, 0, cv::INTER_LINEAR);
+ std::string save_path = PaddleX::generate_save_path(save_dir, image_path);
+ cv::imwrite(save_path, result_image);
+
+ return;
+}
+
+
+int main(int argc, char **argv) {
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ if (FLAGS_det_model_dir == "") {
+ std::cerr << "--det_model_dir need to be defined" << std::endl;
+ return -1;
+ }
+ if (FLAGS_seg_model_dir == "") {
+ std::cerr << "--seg_model_dir need to be defined" << std::endl;
+ return -1;
+ }
+ if (FLAGS_image == "" & FLAGS_image_list == "" & FLAGS_use_camera == false) {
+ std::cerr << "--image or --image_list need to be defined "
+ << "when the camera is not been used" << std::endl;
+ return -1;
+ }
+
+ // 加载模型
+ PaddleX::Model det_model;
+ det_model.Init(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_use_trt,
+ FLAGS_gpu_id, FLAGS_det_key);
+ PaddleX::Model seg_model;
+ seg_model.Init(FLAGS_seg_model_dir, FLAGS_use_gpu, FLAGS_use_trt,
+ FLAGS_gpu_id, FLAGS_seg_key);
+
+ double total_running_time_s = 0.0;
+ double total_imread_time_s = 0.0;
+ int imgs = 1;
+ if (FLAGS_use_camera) {
+ cv::VideoCapture cap(FLAGS_camera_id);
+ cap.set(CV_CAP_PROP_FRAME_WIDTH, IMAGE_SHAPE[0]);
+ cap.set(CV_CAP_PROP_FRAME_HEIGHT, IMAGE_SHAPE[1]);
+ if (!cap.isOpened()) {
+ std::cout << "Open the camera unsuccessfully." << std::endl;
+ return -1;
+ }
+ std::cout << "Open the camera successfully." << std::endl;
+
+ while (1) {
+ auto start = system_clock::now();
+ cv::Mat im;
+ cap >> im;
+ auto imread_end = system_clock::now();
+ std::cout << "-------------------------" << std::endl;
+ std::cout << "Got a camera image." << std::endl;
+ std::string ext_name = ".jpg";
+ predict(im, &det_model, &seg_model, FLAGS_save_dir,
+ std::to_string(imgs) + ext_name, FLAGS_use_erode,
+ FLAGS_erode_kernel, FLAGS_thread_num,
+ FLAGS_seg_batch_size, FLAGS_score_threshold);
+ imgs++;
+ auto imread_duration = duration_cast(imread_end - start);
+ total_imread_time_s += static_cast(imread_duration.count()) *
+ microseconds::period::num /
+ microseconds::period::den;
+
+ auto end = system_clock::now();
+ auto duration = duration_cast(end - start);
+ total_running_time_s += static_cast(duration.count()) *
+ microseconds::period::num /
+ microseconds::period::den;
+ }
+ cap.release();
+ cv::destroyAllWindows();
+ } else {
+ if (FLAGS_image_list != "") {
+ std::ifstream inf(FLAGS_image_list);
+ if (!inf) {
+ std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+ return -1;
+ }
+ std::string image_path;
+ while (getline(inf, image_path)) {
+ auto start = system_clock::now();
+ cv::Mat im = cv::imread(image_path, 1);
+ imgs++;
+ auto imread_end = system_clock::now();
+
+ predict(im, &det_model, &seg_model, FLAGS_save_dir,
+ image_path, FLAGS_use_erode, FLAGS_erode_kernel,
+ FLAGS_thread_num, FLAGS_seg_batch_size,
+ FLAGS_score_threshold);
+
+ auto imread_duration = duration_cast(imread_end - start);
+ total_imread_time_s += static_cast(imread_duration.count()) *
+ microseconds::period::num /
+ microseconds::period::den;
+
+ auto end = system_clock::now();
+ auto duration = duration_cast(end - start);
+ total_running_time_s += static_cast(duration.count()) *
+ microseconds::period::num /
+ microseconds::period::den;
+ }
+ } else {
+ auto start = system_clock::now();
+ cv::Mat im = cv::imread(FLAGS_image, 1);
+ auto imread_end = system_clock::now();
+
+ predict(im, &det_model, &seg_model, FLAGS_save_dir,
+ FLAGS_image, FLAGS_use_erode, FLAGS_erode_kernel,
+ FLAGS_thread_num, FLAGS_seg_batch_size,
+ FLAGS_score_threshold);
+
+ auto imread_duration = duration_cast(imread_end - start);
+ total_imread_time_s += static_cast(imread_duration.count()) *
+ microseconds::period::num /
+ microseconds::period::den;
+
+ auto end = system_clock::now();
+ auto duration = duration_cast(end - start);
+ total_running_time_s += static_cast(duration.count()) *
+ microseconds::period::num /
+ microseconds::period::den;
+ }
+ }
+ std::cout << "Total running time: " << total_running_time_s
+ << " s, average running time: " << total_running_time_s / imgs
+ << " s/img, total read img time: " << total_imread_time_s
+ << " s, average read time: " << total_imread_time_s / imgs
+ << " s/img" << std::endl;
+ return 0;
+}
diff --git a/examples/meter_reader/deploy/cpp/meter_reader/postprocess.cpp b/examples/meter_reader/deploy/cpp/meter_reader/postprocess.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a3bc321c79ebec6235cd2675910092aa6e23af42
--- /dev/null
+++ b/examples/meter_reader/deploy/cpp/meter_reader/postprocess.cpp
@@ -0,0 +1,190 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include
+#include
+#include
+#include
+#include
+#include // NOLINT
+
+#include
+#include
+#include
+
+#include "meter_reader/global.h"
+#include "meter_reader/postprocess.h"
+
+using namespace std::chrono; // NOLINT
+
+#define SEG_IMAGE_SIZE 512
+#define LINE_HEIGHT 120
+#define LINE_WIDTH 1570
+#define CIRCLE_RADIUS 250
+
+const float pi = 3.1415926536f;
+const int circle_center[] = {256, 256};
+
+
+void creat_line_image(const std::vector &seg_image,
+ std::vector *output) {
+ float theta;
+ int rho;
+ int image_x;
+ int image_y;
+
+ for (int row = 0; row < LINE_HEIGHT; row++) {
+ for (int col = 0; col < LINE_WIDTH; col++) {
+ theta = pi * 2 / LINE_WIDTH * (col + 1);
+ rho = CIRCLE_RADIUS - row - 1;
+ image_x = static_cast(circle_center[0] + rho * cos(theta) + 0.5);
+ image_y = static_cast(circle_center[1] - rho * sin(theta) + 0.5);
+ (*output)[row * LINE_WIDTH + col] =
+ seg_image[image_x * SEG_IMAGE_SIZE + image_y];
+ }
+ }
+
+ return;
+}
+
+void convert_1D_data(const std::vector &line_image,
+ std::vector *scale_data,
+ std::vector *pointer_data) {
+ for (int col = 0; col < LINE_WIDTH; col++) {
+ (*scale_data)[col] = 0;
+ (*pointer_data)[col] = 0;
+ for (int row = 0; row < LINE_HEIGHT; row++) {
+ if (line_image[row * LINE_WIDTH + col] == 1) {
+ (*pointer_data)[col]++;
+ } else if (line_image[row * LINE_WIDTH + col] == 2) {
+ (*scale_data)[col]++;
+ }
+ }
+ }
+ return;
+}
+
+void scale_mean_filtration(const std::vector &scale_data,
+ std::vector *scale_mean_data) {
+ int sum = 0;
+ float mean = 0;
+ int size = scale_data.size();
+ for (int i = 0; i < size; i++) {
+ sum = sum + scale_data[i];
+ }
+ mean = static_cast(sum) / static_cast(size);
+
+ for (int i = 0; i < size; i++) {
+ if (static_cast(scale_data[i]) >= mean) {
+ (*scale_mean_data)[i] = scale_data[i];
+ }
+ }
+
+ return;
+}
+
+void get_meter_reader(const std::vector &scale,
+ const std::vector &pointer,
+ READ_RESULT *result) {
+ std::vector scale_location;
+ float one_scale_location = 0;
+ bool scale_flag = 0;
+ unsigned int one_scale_start = 0;
+ unsigned int one_scale_end = 0;
+
+ float pointer_location = 0;
+ bool pointer_flag = 0;
+ unsigned int one_pointer_start = 0;
+ unsigned int one_pointer_end = 0;
+
+ for (int i = 0; i < LINE_WIDTH; i++) {
+ // scale location
+ if (scale[i] > 0 && scale[i+1] > 0) {
+ if (scale_flag == 0) {
+ one_scale_start = i;
+ scale_flag = 1;
+ }
+ }
+ if (scale_flag == 1) {
+ if (scale[i] == 0 && scale[i+1] == 0) {
+ one_scale_end = i - 1;
+ one_scale_location = (one_scale_start + one_scale_end) / 2.;
+ scale_location.push_back(one_scale_location);
+ one_scale_start = 0;
+ one_scale_end = 0;
+ scale_flag = 0;
+ }
+ }
+
+ // pointer location
+ if (pointer[i] > 0 && pointer[i+1] > 0) {
+ if (pointer_flag == 0) {
+ one_pointer_start = i;
+ pointer_flag = 1;
+ }
+ }
+ if (pointer_flag == 1) {
+ if ((pointer[i] == 0) && (pointer[i+1] == 0)) {
+ one_pointer_end = i - 1;
+ pointer_location = (one_pointer_start + one_pointer_end) / 2.;
+ one_pointer_start = 0;
+ one_pointer_end = 0;
+ pointer_flag = 0;
+ }
+ }
+ }
+
+ int scale_num = scale_location.size();
+ result->scale_num = scale_num;
+ result->scales = -1;
+ result->ratio = -1;
+ if (scale_num > 0) {
+ for (int i = 0; i < scale_num - 1; i++) {
+ if (scale_location[i] <= pointer_location &&
+ pointer_location < scale_location[i + 1]) {
+ result->scales = i + 1 +
+ (pointer_location-scale_location[i]) /
+ (scale_location[i+1]-scale_location[i] + 1e-05);
+ }
+ }
+ result->ratio =
+ (pointer_location - scale_location[0]) /
+ (scale_location[scale_num - 1] - scale_location[0] + 1e-05);
+ }
+ return;
+}
+
+void read_process(const std::vector> &seg_image,
+ std::vector *read_results,
+ const int thread_num) {
+ int read_num = seg_image.size();
+ #pragma omp parallel for num_threads(thread_num)
+ for (int i_read = 0; i_read < read_num; i_read++) {
+ std::vector line_result(LINE_WIDTH*LINE_HEIGHT, 0);
+ creat_line_image(seg_image[i_read], &line_result);
+
+ std::vector scale_data(LINE_WIDTH);
+ std::vector pointer_data(LINE_WIDTH);
+ convert_1D_data(line_result, &scale_data, &pointer_data);
+ std::vector scale_mean_data(LINE_WIDTH);
+ scale_mean_filtration(scale_data, &scale_mean_data);
+
+ READ_RESULT result;
+ get_meter_reader(scale_mean_data, pointer_data, &result);
+
+ (*read_results)[i_read] = std::move(result);
+ }
+ return;
+}
diff --git a/examples/meter_reader/deploy/cpp/meter_reader/postprocess.h b/examples/meter_reader/deploy/cpp/meter_reader/postprocess.h
new file mode 100644
index 0000000000000000000000000000000000000000..66006366d780acfc561b4af15eb7f5ecd2955c9b
--- /dev/null
+++ b/examples/meter_reader/deploy/cpp/meter_reader/postprocess.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include
+
+struct READ_RESULT {
+ int scale_num;
+ float scales;
+ float ratio;
+};
+
+void creat_line_image(const std::vector &seg_image,
+ std::vector *output);
+
+void convert_1D_data(const std::vector &line_image,
+ std::vector *scale_data,
+ std::vector *pointer_data);
+
+void scale_mean_filtration(const std::vector &scale_data,
+ std::vector *scale_mean_data);
+
+void get_meter_reader(const std::vector &scale,
+ const std::vector &pointer,
+ READ_RESULT *result);
+
+void read_process(const std::vector> &seg_image,
+ std::vector *read_results,
+ const int thread_num);
diff --git a/examples/meter_reader/deploy/python/reader_deploy.py b/examples/meter_reader/deploy/python/reader_deploy.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5f5d18b0edad902217b6392cfc53dfb4709daf9
--- /dev/null
+++ b/examples/meter_reader/deploy/python/reader_deploy.py
@@ -0,0 +1,360 @@
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import os.path as osp
+import numpy as np
+import math
+import cv2
+import argparse
+
+from paddlex.seg import transforms
+import paddlex as pdx
+
+METER_SHAPE = 512
+CIRCLE_CENTER = [256, 256]
+CIRCLE_RADIUS = 250
+PI = 3.1415926536
+LINE_HEIGHT = 120
+LINE_WIDTH = 1570
+TYPE_THRESHOLD = 40
+METER_CONFIG = [{
+ 'scale_value': 25.0 / 50.0,
+ 'range': 25.0,
+ 'unit': "(MPa)"
+}, {
+ 'scale_value': 1.6 / 32.0,
+ 'range': 1.6,
+ 'unit': "(MPa)"
+}]
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Meter Reader Infering')
+ parser.add_argument(
+ '--detector_dir',
+ dest='detector_dir',
+ help='The directory of models to do detection',
+ type=str)
+ parser.add_argument(
+ '--segmenter_dir',
+ dest='segmenter_dir',
+ help='The directory of models to do segmentation',
+ type=str)
+ parser.add_argument(
+ '--image_dir',
+ dest='image_dir',
+ help='The directory of images to be infered',
+ type=str,
+ default=None)
+ parser.add_argument(
+ '--image',
+ dest='image',
+ help='The image to be infered',
+ type=str,
+ default=None)
+ parser.add_argument(
+ '--use_camera',
+ dest='use_camera',
+ help='Whether use camera or not',
+ action='store_true')
+ parser.add_argument(
+ '--camera_id',
+ dest='camera_id',
+ type=int,
+ help='The camera id',
+ default=0)
+ parser.add_argument(
+ '--use_erode',
+ dest='use_erode',
+ help='Whether erode the predicted lable map',
+ action='store_true')
+ parser.add_argument(
+ '--erode_kernel',
+ dest='erode_kernel',
+ help='Erode kernel size',
+ type=int,
+ default=4)
+ parser.add_argument(
+ '--save_dir',
+ dest='save_dir',
+ help='The directory for saving the inference results',
+ type=str,
+ default='./output/result')
+ parser.add_argument(
+ '--score_threshold',
+ dest='score_threshold',
+ help="Detected bbox whose score is lower than this threshlod is filtered",
+ type=float,
+ default=0.5)
+ parser.add_argument(
+ '--seg_batch_size',
+ dest='seg_batch_size',
+ help="Segmentation batch size",
+ type=int,
+ default=2)
+ parser.add_argument(
+ '--seg_thread_num',
+ dest='seg_thread_num',
+ help="Thread number of segmentation preprocess",
+ type=int,
+ default=2)
+
+ return parser.parse_args()
+
+
+def is_pic(img_name):
+ valid_suffix = ['JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png']
+ suffix = img_name.split('.')[-1]
+ if suffix not in valid_suffix:
+ return False
+ return True
+
+
+class MeterReader:
+ def __init__(self, detector_dir, segmenter_dir):
+ if not osp.exists(detector_dir):
+ raise Exception("Model path {} does not exist".format(
+ detector_dir))
+ if not osp.exists(segmenter_dir):
+ raise Exception("Model path {} does not exist".format(
+ segmenter_dir))
+ self.detector = pdx.deploy.Predictor(detector_dir)
+ self.segmenter = pdx.deploy.Predictor(segmenter_dir)
+ # Because we will resize images with (METER_SHAPE, METER_SHAPE) before fed into the segmenter,
+ # here the transform is composed of normalization only.
+ self.seg_transforms = transforms.Compose([transforms.Normalize()])
+
+ def predict(self,
+ im_file,
+ save_dir='./',
+ use_erode=True,
+ erode_kernel=4,
+ score_threshold=0.5,
+ seg_batch_size=2,
+ seg_thread_num=2):
+ if isinstance(im_file, str):
+ im = cv2.imread(im_file).astype('float32')
+ else:
+ im = im_file.copy()
+ # Get detection results
+ det_results = self.detector.predict(im)
+ # Filter bbox whose score is lower than score_threshold
+ filtered_results = list()
+ for res in det_results:
+ if res['score'] > score_threshold:
+ filtered_results.append(res)
+
+ resized_meters = list()
+ for res in filtered_results:
+ # Crop the bbox area
+ xmin, ymin, w, h = res['bbox']
+ xmin = max(0, int(xmin))
+ ymin = max(0, int(ymin))
+ xmax = min(im.shape[1], int(xmin + w - 1))
+ ymax = min(im.shape[0], int(ymin + h - 1))
+ sub_image = im[ymin:(ymax + 1), xmin:(xmax + 1), :]
+
+ # Resize the image with shape (METER_SHAPE, METER_SHAPE)
+ meter_shape = sub_image.shape
+ scale_x = float(METER_SHAPE) / float(meter_shape[1])
+ scale_y = float(METER_SHAPE) / float(meter_shape[0])
+ meter_meter = cv2.resize(
+ sub_image,
+ None,
+ None,
+ fx=scale_x,
+ fy=scale_y,
+ interpolation=cv2.INTER_LINEAR)
+ meter_meter = meter_meter.astype('float32')
+ resized_meters.append(meter_meter)
+
+ meter_num = len(resized_meters)
+ seg_results = list()
+ for i in range(0, meter_num, seg_batch_size):
+ im_size = min(meter_num, i + seg_batch_size)
+ meter_images = list()
+ for j in range(i, im_size):
+ meter_images.append(resized_meters[j - i])
+ result = self.segmenter.batch_predict(
+ transforms=self.seg_transforms,
+ img_file_list=meter_images,
+ thread_num=seg_thread_num)
+ if use_erode:
+ kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
+ for i in range(len(result)):
+ result[i]['label_map'] = cv2.erode(result[i]['label_map'],
+ kernel)
+ seg_results.extend(result)
+
+ results = list()
+ for i, seg_result in enumerate(seg_results):
+ result = self.read_process(seg_result['label_map'])
+ results.append(result)
+
+ meter_values = list()
+ for i, result in enumerate(results):
+ if result['scale_num'] > TYPE_THRESHOLD:
+ value = result['scales'] * METER_CONFIG[0]['scale_value']
+ else:
+ value = result['scales'] * METER_CONFIG[1]['scale_value']
+ meter_values.append(value)
+ print("-- Meter {} -- result: {} --\n".format(i, value))
+
+ # visualize the results
+ visual_results = list()
+ for i, res in enumerate(filtered_results):
+ # Use `score` to represent the meter value
+ res['score'] = meter_values[i]
+ visual_results.append(res)
+ pdx.det.visualize(im_file, visual_results, -1, save_dir=save_dir)
+
+ def read_process(self, label_maps):
+ # Convert the circular meter into rectangular meter
+ line_images = self.creat_line_image(label_maps)
+ # Convert the 2d meter into 1d meter
+ scale_data, pointer_data = self.convert_1d_data(line_images)
+ # Fliter scale data whose value is lower than the mean value
+ self.scale_mean_filtration(scale_data)
+ # Get scale_num, scales and ratio of meters
+ result = self.get_meter_reader(scale_data, pointer_data)
+ return result
+
+ def creat_line_image(self, meter_image):
+ line_image = np.zeros((LINE_HEIGHT, LINE_WIDTH), dtype=np.uint8)
+ for row in range(LINE_HEIGHT):
+ for col in range(LINE_WIDTH):
+ theta = PI * 2 / LINE_WIDTH * (col + 1)
+ rho = CIRCLE_RADIUS - row - 1
+ x = int(CIRCLE_CENTER[0] + rho * math.cos(theta) + 0.5)
+ y = int(CIRCLE_CENTER[1] - rho * math.sin(theta) + 0.5)
+ line_image[row, col] = meter_image[x, y]
+ return line_image
+
+ def convert_1d_data(self, meter_image):
+ scale_data = np.zeros((LINE_WIDTH), dtype=np.uint8)
+ pointer_data = np.zeros((LINE_WIDTH), dtype=np.uint8)
+ for col in range(LINE_WIDTH):
+ for row in range(LINE_HEIGHT):
+ if meter_image[row, col] == 1:
+ pointer_data[col] += 1
+ elif meter_image[row, col] == 2:
+ scale_data[col] += 1
+ return scale_data, pointer_data
+
+ def scale_mean_filtration(self, scale_data):
+ mean_data = np.mean(scale_data)
+ for col in range(LINE_WIDTH):
+ if scale_data[col] < mean_data:
+ scale_data[col] = 0
+
+ def get_meter_reader(self, scale_data, pointer_data):
+ scale_flag = False
+ pointer_flag = False
+ one_scale_start = 0
+ one_scale_end = 0
+ one_pointer_start = 0
+ one_pointer_end = 0
+ scale_location = list()
+ pointer_location = 0
+ for i in range(LINE_WIDTH - 1):
+ if scale_data[i] > 0 and scale_data[i + 1] > 0:
+ if scale_flag == False:
+ one_scale_start = i
+ scale_flag = True
+ if scale_flag:
+ if scale_data[i] == 0 and scale_data[i + 1] == 0:
+ one_scale_end = i - 1
+ one_scale_location = (one_scale_start + one_scale_end) / 2
+ scale_location.append(one_scale_location)
+ one_scale_start = 0
+ one_scale_end = 0
+ scale_flag = False
+ if pointer_data[i] > 0 and pointer_data[i + 1] > 0:
+ if pointer_flag == False:
+ one_pointer_start = i
+ pointer_flag = True
+ if pointer_flag:
+ if pointer_data[i] == 0 and pointer_data[i + 1] == 0:
+ one_pointer_end = i - 1
+ pointer_location = (
+ one_pointer_start + one_pointer_end) / 2
+ one_pointer_start = 0
+ one_pointer_end = 0
+ pointer_flag = False
+
+ scale_num = len(scale_location)
+ scales = -1
+ ratio = -1
+ if scale_num > 0:
+ for i in range(scale_num - 1):
+ if scale_location[
+ i] <= pointer_location and pointer_location < scale_location[
+ i + 1]:
+ scales = i + (pointer_location - scale_location[i]) / (
+ scale_location[i + 1] - scale_location[i] + 1e-05) + 1
+ ratio = (pointer_location - scale_location[0]) / (
+ scale_location[scale_num - 1] - scale_location[0] + 1e-05)
+ result = {'scale_num': scale_num, 'scales': scales, 'ratio': ratio}
+ return result
+
+
+def infer(args):
+ image_lists = list()
+ if args.image is not None:
+ if not osp.exists(args.image):
+ raise Exception("Image {} does not exist.".format(args.image))
+ if not is_pic(args.image):
+ raise Exception("{} is not a picture.".format(args.image))
+ image_lists.append(args.image)
+ elif args.image_dir is not None:
+ if not osp.exists(args.image_dir):
+ raise Exception("Directory {} does not exist.".format(
+ args.image_dir))
+ for im_file in os.listdir(args.image_dir):
+ if not is_pic(im_file):
+ continue
+ im_file = osp.join(args.image_dir, im_file)
+ image_lists.append(im_file)
+
+ meter_reader = MeterReader(args.detector_dir, args.segmenter_dir)
+ if len(image_lists) > 0:
+ for im_file in image_lists:
+ meter_reader.predict(im_file, args.save_dir, args.use_erode,
+ args.erode_kernel, args.score_threshold,
+ args.seg_batch_size, args.seg_thread_num)
+ elif args.use_camera:
+ cap_video = cv2.VideoCapture(args.camera_id)
+ if not cap_video.isOpened():
+ raise Exception(
+ "Error opening video stream, please make sure the camera is working"
+ )
+
+ while cap_video.isOpened():
+ ret, frame = cap_video.read()
+ if ret:
+ meter_reader.predict(frame, args.save_dir, args.use_erode,
+ args.erode_kernel, args.score_threshold,
+ args.seg_batch_size, args.seg_thread_num)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+ else:
+ break
+ cap_video.release()
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ infer(args)
diff --git a/examples/meter_reader/image/MeterReader_Architecture.jpg b/examples/meter_reader/image/MeterReader_Architecture.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f60b597c7431e1b43fb8d3fc7b168caacd13c37e
Binary files /dev/null and b/examples/meter_reader/image/MeterReader_Architecture.jpg differ
diff --git a/examples/meter_reader/reader_infer.py b/examples/meter_reader/reader_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7f7d7367a7ef3d0b6bba4fd1c6a3258cd5145ac
--- /dev/null
+++ b/examples/meter_reader/reader_infer.py
@@ -0,0 +1,360 @@
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import os.path as osp
+import numpy as np
+import math
+import cv2
+import argparse
+
+from paddlex.seg import transforms
+import paddlex as pdx
+
+METER_SHAPE = 512
+CIRCLE_CENTER = [256, 256]
+CIRCLE_RADIUS = 250
+PI = 3.1415926536
+LINE_HEIGHT = 120
+LINE_WIDTH = 1570
+TYPE_THRESHOLD = 40
+METER_CONFIG = [{
+ 'scale_value': 25.0 / 50.0,
+ 'range': 25.0,
+ 'unit': "(MPa)"
+}, {
+ 'scale_value': 1.6 / 32.0,
+ 'range': 1.6,
+ 'unit': "(MPa)"
+}]
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Meter Reader Infering')
+ parser.add_argument(
+ '--detector_dir',
+ dest='detector_dir',
+ help='The directory of models to do detection',
+ type=str)
+ parser.add_argument(
+ '--segmenter_dir',
+ dest='segmenter_dir',
+ help='The directory of models to do segmentation',
+ type=str)
+ parser.add_argument(
+ '--image_dir',
+ dest='image_dir',
+ help='The directory of images to be infered',
+ type=str,
+ default=None)
+ parser.add_argument(
+ '--image',
+ dest='image',
+ help='The image to be infered',
+ type=str,
+ default=None)
+ parser.add_argument(
+ '--use_camera',
+ dest='use_camera',
+ help='Whether use camera or not',
+ action='store_true')
+ parser.add_argument(
+ '--camera_id',
+ dest='camera_id',
+ type=int,
+ help='The camera id',
+ default=0)
+ parser.add_argument(
+ '--use_erode',
+ dest='use_erode',
+ help='Whether erode the predicted lable map',
+ action='store_true')
+ parser.add_argument(
+ '--erode_kernel',
+ dest='erode_kernel',
+ help='Erode kernel size',
+ type=int,
+ default=4)
+ parser.add_argument(
+ '--save_dir',
+ dest='save_dir',
+ help='The directory for saving the inference results',
+ type=str,
+ default='./output/result')
+ parser.add_argument(
+ '--score_threshold',
+ dest='score_threshold',
+ help="Detected bbox whose score is lower than this threshlod is filtered",
+ type=float,
+ default=0.5)
+ parser.add_argument(
+ '--seg_batch_size',
+ dest='seg_batch_size',
+ help="Segmentation batch size",
+ type=int,
+ default=2)
+ parser.add_argument(
+ '--seg_thread_num',
+ dest='seg_thread_num',
+ help="Thread number of segmentation preprocess",
+ type=int,
+ default=2)
+
+ return parser.parse_args()
+
+
+def is_pic(img_name):
+ valid_suffix = ['JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png']
+ suffix = img_name.split('.')[-1]
+ if suffix not in valid_suffix:
+ return False
+ return True
+
+
+class MeterReader:
+ def __init__(self, detector_dir, segmenter_dir):
+ if not osp.exists(detector_dir):
+ raise Exception("Model path {} does not exist".format(
+ detector_dir))
+ if not osp.exists(segmenter_dir):
+ raise Exception("Model path {} does not exist".format(
+ segmenter_dir))
+ self.detector = pdx.load_model(detector_dir)
+ self.segmenter = pdx.load_model(segmenter_dir)
+ # Because we will resize images with (METER_SHAPE, METER_SHAPE) before fed into the segmenter,
+ # here the transform is composed of normalization only.
+ self.seg_transforms = transforms.Compose([transforms.Normalize()])
+
+ def predict(self,
+ im_file,
+ save_dir='./',
+ use_erode=True,
+ erode_kernel=4,
+ score_threshold=0.5,
+ seg_batch_size=2,
+ seg_thread_num=2):
+ if isinstance(im_file, str):
+ im = cv2.imread(im_file).astype('float32')
+ else:
+ im = im_file.copy()
+ # Get detection results
+ det_results = self.detector.predict(im)
+ # Filter bbox whose score is lower than score_threshold
+ filtered_results = list()
+ for res in det_results:
+ if res['score'] > score_threshold:
+ filtered_results.append(res)
+
+ resized_meters = list()
+ for res in filtered_results:
+ # Crop the bbox area
+ xmin, ymin, w, h = res['bbox']
+ xmin = max(0, int(xmin))
+ ymin = max(0, int(ymin))
+ xmax = min(im.shape[1], int(xmin + w - 1))
+ ymax = min(im.shape[0], int(ymin + h - 1))
+ sub_image = im[ymin:(ymax + 1), xmin:(xmax + 1), :]
+
+ # Resize the image with shape (METER_SHAPE, METER_SHAPE)
+ meter_shape = sub_image.shape
+ scale_x = float(METER_SHAPE) / float(meter_shape[1])
+ scale_y = float(METER_SHAPE) / float(meter_shape[0])
+ meter_meter = cv2.resize(
+ sub_image,
+ None,
+ None,
+ fx=scale_x,
+ fy=scale_y,
+ interpolation=cv2.INTER_LINEAR)
+ meter_meter = meter_meter.astype('float32')
+ resized_meters.append(meter_meter)
+
+ meter_num = len(resized_meters)
+ seg_results = list()
+ for i in range(0, meter_num, seg_batch_size):
+ im_size = min(meter_num, i + seg_batch_size)
+ meter_images = list()
+ for j in range(i, im_size):
+ meter_images.append(resized_meters[j - i])
+ result = self.segmenter.batch_predict(
+ transforms=self.seg_transforms,
+ img_file_list=meter_images,
+ thread_num=seg_thread_num)
+ if use_erode:
+ kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
+ for i in range(len(result)):
+ result[i]['label_map'] = cv2.erode(result[i]['label_map'],
+ kernel)
+ seg_results.extend(result)
+
+ results = list()
+ for i, seg_result in enumerate(seg_results):
+ result = self.read_process(seg_result['label_map'])
+ results.append(result)
+
+ meter_values = list()
+ for i, result in enumerate(results):
+ if result['scale_num'] > TYPE_THRESHOLD:
+ value = result['scales'] * METER_CONFIG[0]['scale_value']
+ else:
+ value = result['scales'] * METER_CONFIG[1]['scale_value']
+ meter_values.append(value)
+ print("-- Meter {} -- result: {} --\n".format(i, value))
+
+ # visualize the results
+ visual_results = list()
+ for i, res in enumerate(filtered_results):
+ # Use `score` to represent the meter value
+ res['score'] = meter_values[i]
+ visual_results.append(res)
+ pdx.det.visualize(im_file, visual_results, -1, save_dir=save_dir)
+
+ def read_process(self, label_maps):
+ # Convert the circular meter into rectangular meter
+ line_images = self.creat_line_image(label_maps)
+ # Convert the 2d meter into 1d meter
+ scale_data, pointer_data = self.convert_1d_data(line_images)
+ # Fliter scale data whose value is lower than the mean value
+ self.scale_mean_filtration(scale_data)
+ # Get scale_num, scales and ratio of meters
+ result = self.get_meter_reader(scale_data, pointer_data)
+ return result
+
+ def creat_line_image(self, meter_image):
+ line_image = np.zeros((LINE_HEIGHT, LINE_WIDTH), dtype=np.uint8)
+ for row in range(LINE_HEIGHT):
+ for col in range(LINE_WIDTH):
+ theta = PI * 2 / LINE_WIDTH * (col + 1)
+ rho = CIRCLE_RADIUS - row - 1
+ x = int(CIRCLE_CENTER[0] + rho * math.cos(theta) + 0.5)
+ y = int(CIRCLE_CENTER[1] - rho * math.sin(theta) + 0.5)
+ line_image[row, col] = meter_image[x, y]
+ return line_image
+
+ def convert_1d_data(self, meter_image):
+ scale_data = np.zeros((LINE_WIDTH), dtype=np.uint8)
+ pointer_data = np.zeros((LINE_WIDTH), dtype=np.uint8)
+ for col in range(LINE_WIDTH):
+ for row in range(LINE_HEIGHT):
+ if meter_image[row, col] == 1:
+ pointer_data[col] += 1
+ elif meter_image[row, col] == 2:
+ scale_data[col] += 1
+ return scale_data, pointer_data
+
+ def scale_mean_filtration(self, scale_data):
+ mean_data = np.mean(scale_data)
+ for col in range(LINE_WIDTH):
+ if scale_data[col] < mean_data:
+ scale_data[col] = 0
+
+ def get_meter_reader(self, scale_data, pointer_data):
+ scale_flag = False
+ pointer_flag = False
+ one_scale_start = 0
+ one_scale_end = 0
+ one_pointer_start = 0
+ one_pointer_end = 0
+ scale_location = list()
+ pointer_location = 0
+ for i in range(LINE_WIDTH - 1):
+ if scale_data[i] > 0 and scale_data[i + 1] > 0:
+ if scale_flag == False:
+ one_scale_start = i
+ scale_flag = True
+ if scale_flag:
+ if scale_data[i] == 0 and scale_data[i + 1] == 0:
+ one_scale_end = i - 1
+ one_scale_location = (one_scale_start + one_scale_end) / 2
+ scale_location.append(one_scale_location)
+ one_scale_start = 0
+ one_scale_end = 0
+ scale_flag = False
+ if pointer_data[i] > 0 and pointer_data[i + 1] > 0:
+ if pointer_flag == False:
+ one_pointer_start = i
+ pointer_flag = True
+ if pointer_flag:
+ if pointer_data[i] == 0 and pointer_data[i + 1] == 0:
+ one_pointer_end = i - 1
+ pointer_location = (
+ one_pointer_start + one_pointer_end) / 2
+ one_pointer_start = 0
+ one_pointer_end = 0
+ pointer_flag = False
+
+ scale_num = len(scale_location)
+ scales = -1
+ ratio = -1
+ if scale_num > 0:
+ for i in range(scale_num - 1):
+ if scale_location[
+ i] <= pointer_location and pointer_location < scale_location[
+ i + 1]:
+ scales = i + (pointer_location - scale_location[i]) / (
+ scale_location[i + 1] - scale_location[i] + 1e-05) + 1
+ ratio = (pointer_location - scale_location[0]) / (
+ scale_location[scale_num - 1] - scale_location[0] + 1e-05)
+ result = {'scale_num': scale_num, 'scales': scales, 'ratio': ratio}
+ return result
+
+
+def infer(args):
+ image_lists = list()
+ if args.image is not None:
+ if not osp.exists(args.image):
+ raise Exception("Image {} does not exist.".format(args.image))
+ if not is_pic(args.image):
+ raise Exception("{} is not a picture.".format(args.image))
+ image_lists.append(args.image)
+ elif args.image_dir is not None:
+ if not osp.exists(args.image_dir):
+ raise Exception("Directory {} does not exist.".format(
+ args.image_dir))
+ for im_file in os.listdir(args.image_dir):
+ if not is_pic(im_file):
+ continue
+ im_file = osp.join(args.image_dir, im_file)
+ image_lists.append(im_file)
+
+ meter_reader = MeterReader(args.detector_dir, args.segmenter_dir)
+ if len(image_lists) > 0:
+ for im_file in image_lists:
+ meter_reader.predict(im_file, args.save_dir, args.use_erode,
+ args.erode_kernel, args.score_threshold,
+ args.seg_batch_size, args.seg_thread_num)
+ elif args.use_camera:
+ cap_video = cv2.VideoCapture(args.camera_id)
+ if not cap_video.isOpened():
+ raise Exception(
+ "Error opening video stream, please make sure the camera is working"
+ )
+
+ while cap_video.isOpened():
+ ret, frame = cap_video.read()
+ if ret:
+ meter_reader.predict(frame, args.save_dir, args.use_erode,
+ args.erode_kernel, args.score_threshold,
+ args.seg_batch_size, args.seg_thread_num)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+ else:
+ break
+ cap_video.release()
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ infer(args)
diff --git a/examples/meter_reader/train_detection.py b/examples/meter_reader/train_detection.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a54361f19ff73ea3ce34d8df25c940b0e1308f3
--- /dev/null
+++ b/examples/meter_reader/train_detection.py
@@ -0,0 +1,60 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压表计检测数据集
+meter_det_dataset = 'https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_det.tar.gz'
+pdx.utils.download_and_decompress(meter_det_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+ transforms.MixupImage(mixup_epoch=250),
+ transforms.RandomDistort(),
+ transforms.RandomExpand(),
+ transforms.RandomCrop(),
+ transforms.Resize(
+ target_size=608, interp='RANDOM'),
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize(),
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Resize(
+ target_size=608, interp='CUBIC'),
+ transforms.Normalize(),
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.CocoDetection(
+ data_dir='meter_det/train/',
+ ann_file='meter_det/annotations/instance_train.json',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.CocoDetection(
+ data_dir='meter_det/test/',
+ ann_file='meter_det/annotations/instance_test.json',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
+num_classes = len(train_dataset.labels)
+model = pdx.det.YOLOv3(
+ num_classes=num_classes, backbone='DarkNet53', label_smooth=True)
+model.train(
+ num_epochs=270,
+ train_dataset=train_dataset,
+ train_batch_size=8,
+ eval_dataset=eval_dataset,
+ learning_rate=0.001,
+ warmup_steps=4000,
+ lr_decay_epochs=[210, 240],
+ save_dir='output/meter_det',
+ use_vdl=True)
diff --git a/examples/meter_reader/train_segmentation.py b/examples/meter_reader/train_segmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2f7e3b81ba97f585c7c80c2fa585fdcf3e1a222
--- /dev/null
+++ b/examples/meter_reader/train_segmentation.py
@@ -0,0 +1,55 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+# 下载和解压表盘分割数据集
+meter_seg_dataset = 'https://bj.bcebos.com/paddlex/examples/meter_reader/datasets/meter_seg.tar.gz'
+pdx.utils.download_and_decompress(meter_seg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+ transforms.Resize([512, 512]),
+ transforms.RandomHorizontalFlip(prob=0.5),
+ transforms.Normalize(),
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Resize([512, 512]),
+ transforms.Normalize(),
+])
+# 定义训练和验证所用的数据集
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
+train_dataset = pdx.datasets.SegDataset(
+ data_dir='meter_seg/',
+ file_list='meter_seg/train.txt',
+ label_list='meter_seg/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.SegDataset(
+ data_dir='meter_seg/',
+ file_list='meter_seg/val.txt',
+ label_list='meter_seg/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+#
+# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
+model = pdx.seg.DeepLabv3p(
+ num_classes=len(train_dataset.labels), backbone='Xception65')
+model.train(
+ num_epochs=20,
+ train_dataset=train_dataset,
+ train_batch_size=4,
+ eval_dataset=eval_dataset,
+ learning_rate=0.1,
+ pretrain_weights='COCO',
+ save_interval_epochs=5,
+ save_dir='output/meter_seg',
+ use_vdl=True)
diff --git a/paddlex/__init__.py b/paddlex/__init__.py
index 7743882a6334e257c1a7a4b94566aff3a8a55667..312da5638ea8b8fd02a782d18d8a04f2000ce420 100644
--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -13,6 +13,7 @@
# limitations under the License.
from __future__ import absolute_import
+
import os
if 'FLAGS_eager_delete_tensor_gb' not in os.environ:
os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
@@ -21,6 +22,7 @@ if 'FLAGS_allocator_strategy' not in os.environ:
if "CUDA_VISIBLE_DEVICES" in os.environ:
if os.environ["CUDA_VISIBLE_DEVICES"].count("-1") > 0:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
from .utils.utils import get_environ_info
from . import cv
from . import det
@@ -38,7 +40,7 @@ except:
"[WARNING] pycocotools is not installed, detection model is not available now."
)
print(
- "[WARNING] pycocotools install: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/install.md"
+ "[WARNING] pycocotools install: https://paddlex.readthedocs.io/zh_CN/develop/install.html#pycocotools"
)
import paddlehub as hub
@@ -54,4 +56,4 @@ log_level = 2
from . import interpret
-__version__ = '1.0.7'
+__version__ = '1.1.0'
diff --git a/paddlex/command.py b/paddlex/command.py
index 612bc5f3f2b2c3bbec23f56c2983a722d76e21fc..fb79cef4df0c00408e95ca950827bdf7c5528f13 100644
--- a/paddlex/command.py
+++ b/paddlex/command.py
@@ -50,6 +50,36 @@ def arg_parser():
action="store_true",
default=False,
help="export onnx model for deployment")
+ parser.add_argument(
+ "--data_conversion",
+ "-dc",
+ action="store_true",
+ default=False,
+ help="convert the dataset to the standard format")
+ parser.add_argument(
+ "--source",
+ "-se",
+ type=_text_type,
+ default=None,
+ help="define dataset format before the conversion")
+ parser.add_argument(
+ "--to",
+ "-to",
+ type=_text_type,
+ default=None,
+ help="define dataset format after the conversion")
+ parser.add_argument(
+ "--pics",
+ "-p",
+ type=_text_type,
+ default=None,
+ help="define pictures directory path")
+ parser.add_argument(
+ "--annotations",
+ "-a",
+ type=_text_type,
+ default=None,
+ help="define annotations directory path")
parser.add_argument(
"--fixed_input_shape",
"-fs",
@@ -105,6 +135,24 @@ def main():
"paddlex --export_inference --model_dir model_path --save_dir infer_model"
)
pdx.convertor.export_onnx_model(model, args.save_dir)
+
+ if args.data_conversion:
+ assert args.source is not None, "--source should be defined while converting dataset"
+ assert args.to is not None, "--to should be defined to confirm the taregt dataset format"
+ assert args.pics is not None, "--pics should be defined to confirm the pictures path"
+ assert args.annotations is not None, "--annotations should be defined to confirm the annotations path"
+ assert args.save_dir is not None, "--save_dir should be defined to store taregt dataset"
+ if args.source == 'labelme' and args.to == 'ImageNet':
+ logging.error(
+ "The labelme dataset can not convert to the ImageNet dataset.",
+ exit=False)
+ if args.source == 'jingling' and args.to == 'PascalVOC':
+ logging.error(
+ "The jingling dataset can not convert to the PascalVOC dataset.",
+ exit=False)
+ pdx.tools.convert.dataset_conversion(args.source, args.to,
+ args.pics, args.annotations, args.save_dir )
+
if __name__ == "__main__":
diff --git a/paddlex/cv/datasets/__init__.py b/paddlex/cv/datasets/__init__.py
index b8bf8f9c3fc0355a02a1802db706546c2f6fa4b7..926f4942d844c7562de9e977b10446328b9f8303 100644
--- a/paddlex/cv/datasets/__init__.py
+++ b/paddlex/cv/datasets/__init__.py
@@ -18,4 +18,5 @@ from .coco import CocoDetection
from .seg_dataset import SegDataset
from .easydata_cls import EasyDataCls
from .easydata_det import EasyDataDet
-from .easydata_seg import EasyDataSeg
\ No newline at end of file
+from .easydata_seg import EasyDataSeg
+from .dataset import generate_minibatch
diff --git a/paddlex/cv/datasets/dataset.py b/paddlex/cv/datasets/dataset.py
index 3cc8b52c10f41ce17bbe8da7fd5289b96aac409e..b91f408f44e6beff9354142ff44ac8458ed9160e 100644
--- a/paddlex/cv/datasets/dataset.py
+++ b/paddlex/cv/datasets/dataset.py
@@ -46,7 +46,7 @@ def is_valid(sample):
return False
elif isinstance(s, np.ndarray) and s.size == 0:
return False
- elif isinstance(s, collections.Sequence) and len(s) == 0:
+ elif isinstance(s, collections.abc.Sequence) and len(s) == 0:
return False
return True
@@ -55,6 +55,7 @@ def get_encoding(path):
f = open(path, 'rb')
data = f.read()
file_encoding = chardet.detect(data).get('encoding')
+ f.close()
return file_encoding
@@ -114,7 +115,7 @@ def multithread_reader(mapper,
while not isinstance(sample, EndSignal):
batch_data.append(sample)
if len(batch_data) == batch_size:
- batch_data = GenerateMiniBatch(batch_data)
+ batch_data = generate_minibatch(batch_data)
yield batch_data
batch_data = []
sample = out_queue.get()
@@ -126,11 +127,11 @@ def multithread_reader(mapper,
else:
batch_data.append(sample)
if len(batch_data) == batch_size:
- batch_data = GenerateMiniBatch(batch_data)
+ batch_data = generate_minibatch(batch_data)
yield batch_data
batch_data = []
if not drop_last and len(batch_data) != 0:
- batch_data = GenerateMiniBatch(batch_data)
+ batch_data = generate_minibatch(batch_data)
yield batch_data
batch_data = []
@@ -187,32 +188,65 @@ def multiprocess_reader(mapper,
else:
batch_data.append(sample)
if len(batch_data) == batch_size:
- batch_data = GenerateMiniBatch(batch_data)
+ batch_data = generate_minibatch(batch_data)
yield batch_data
batch_data = []
if len(batch_data) != 0 and not drop_last:
- batch_data = GenerateMiniBatch(batch_data)
+ batch_data = generate_minibatch(batch_data)
yield batch_data
batch_data = []
return queue_reader
-def GenerateMiniBatch(batch_data):
+def generate_minibatch(batch_data, label_padding_value=255):
+ # if batch_size is 1, do not pad the image
if len(batch_data) == 1:
return batch_data
width = [data[0].shape[2] for data in batch_data]
height = [data[0].shape[1] for data in batch_data]
+ # if the sizes of images in a mini-batch are equal,
+ # do not pad the image
if len(set(width)) == 1 and len(set(height)) == 1:
return batch_data
max_shape = np.array([data[0].shape for data in batch_data]).max(axis=0)
padding_batch = []
for data in batch_data:
+ # pad the image to a same size
im_c, im_h, im_w = data[0].shape[:]
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = data[0]
- padding_batch.append((padding_im, ) + data[1:])
+ if len(data) > 1:
+ if isinstance(data[1], np.ndarray) and len(data[1].shape) > 1:
+ # padding the image and label of segmentation
+ # during the training and evaluating phase
+ # the data[1] of segmentation is a image array,
+ # so len(data[1].shape) > 1
+ padding_label = np.zeros(
+ (1, max_shape[1], max_shape[2]
+ )).astype('int64') + label_padding_value
+ _, label_h, label_w = data[1].shape
+ padding_label[:, :label_h, :label_w] = data[1]
+ padding_batch.append((padding_im, padding_label))
+ elif len(data[1]) == 0 or isinstance(
+ data[1][0],
+ tuple) and data[1][0][0] in ['resize', 'padding']:
+ # padding the image and insert 'padding' into `im_info`
+ # of segmentation during the infering phase
+ if len(data[1]) == 0 or 'padding' not in [
+ data[1][i][0] for i in range(len(data[1]))
+ ]:
+ data[1].append(('padding', [im_h, im_w]))
+ padding_batch.append((padding_im, ) + tuple(data[1:]))
+ else:
+ # padding the image of detection, or
+ # padding the image of classification during the trainging
+ # and evaluating phase
+ padding_batch.append((padding_im, ) + tuple(data[1:]))
+ else:
+ # padding the image of classification during the infering phase
+ padding_batch.append((padding_im))
return padding_batch
diff --git a/paddlex/cv/datasets/easydata_cls.py b/paddlex/cv/datasets/easydata_cls.py
index 9b6dddc4843616ff0a09712e6766e3ea9552b466..9c07aa3cfaf87ecf569cebf670dc523efee96fdd 100644
--- a/paddlex/cv/datasets/easydata_cls.py
+++ b/paddlex/cv/datasets/easydata_cls.py
@@ -18,6 +18,7 @@ import random
import copy
import json
import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
from .imagenet import ImageNet
from .dataset import is_pic
from .dataset import get_encoding
@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
for line in f:
img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
+ img_file = path_normalization(img_file)
+ json_file = path_normalization(json_file)
if not is_pic(img_file):
continue
if not osp.isfile(json_file):
diff --git a/paddlex/cv/datasets/easydata_det.py b/paddlex/cv/datasets/easydata_det.py
index 173720f0d5df56eb553b5e4a828962e0554403ad..65d5471bfd6ab8651cbdc856963d5b7f65dc9acf 100644
--- a/paddlex/cv/datasets/easydata_det.py
+++ b/paddlex/cv/datasets/easydata_det.py
@@ -20,6 +20,7 @@ import json
import cv2
import numpy as np
import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
from .voc import VOCDetection
from .dataset import is_pic
from .dataset import get_encoding
@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
for line in f:
img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
+ img_file = path_normalization(img_file)
+ json_file = path_normalization(json_file)
if not is_pic(img_file):
continue
if not osp.isfile(json_file):
diff --git a/paddlex/cv/datasets/easydata_seg.py b/paddlex/cv/datasets/easydata_seg.py
index 4988248d291accc0469494d0f00e16440784ae7a..5e938cca10a346bf1c92ae65413c801d589da5e9 100644
--- a/paddlex/cv/datasets/easydata_seg.py
+++ b/paddlex/cv/datasets/easydata_seg.py
@@ -20,6 +20,7 @@ import json
import cv2
import numpy as np
import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import get_encoding
from .dataset import is_pic
@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
for line in f:
img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
+ img_file = path_normalization(img_file)
+ json_file = path_normalization(json_file)
if not is_pic(img_file):
continue
if not osp.isfile(json_file):
diff --git a/paddlex/cv/datasets/imagenet.py b/paddlex/cv/datasets/imagenet.py
index 0986f823add893c6fb746168f3c2bcfa438f5e10..75567d01efc43d2ecaa39720fa6590d8f1f4342e 100644
--- a/paddlex/cv/datasets/imagenet.py
+++ b/paddlex/cv/datasets/imagenet.py
@@ -17,6 +17,7 @@ import os.path as osp
import random
import copy
import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import is_pic
from .dataset import get_encoding
@@ -66,6 +67,7 @@ class ImageNet(Dataset):
with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f:
items = line.strip().split()
+ items[0] = path_normalization(items[0])
if not is_pic(items[0]):
continue
full_path = osp.join(data_dir, items[0])
diff --git a/paddlex/cv/datasets/seg_dataset.py b/paddlex/cv/datasets/seg_dataset.py
index 6e8bfae1ca623ed90a6d583042627cf4aecb2ea6..b4ee7184a7f5de3210282cbab9726e1a306f52d3 100644
--- a/paddlex/cv/datasets/seg_dataset.py
+++ b/paddlex/cv/datasets/seg_dataset.py
@@ -17,6 +17,7 @@ import os.path as osp
import random
import copy
import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import get_encoding
from .dataset import is_pic
@@ -61,10 +62,11 @@ class SegDataset(Dataset):
for line in f:
item = line.strip()
self.labels.append(item)
-
with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f:
items = line.strip().split()
+ items[0] = path_normalization(items[0])
+ items[1] = path_normalization(items[1])
if not is_pic(items[0]):
continue
full_path_im = osp.join(data_dir, items[0])
diff --git a/paddlex/cv/datasets/voc.py b/paddlex/cv/datasets/voc.py
index 276891894b9636e5de8bed566fe234bf212bcad3..410c9f7d4a7d02c5743491723226a5cfbdd6c182 100644
--- a/paddlex/cv/datasets/voc.py
+++ b/paddlex/cv/datasets/voc.py
@@ -22,6 +22,7 @@ import numpy as np
from collections import OrderedDict
import xml.etree.ElementTree as ET
import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import is_pic
from .dataset import get_encoding
@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
break
img_file, xml_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
+ img_file = path_normalization(img_file)
+ xml_file = path_normalization(xml_file)
if not is_pic(img_file):
continue
if not osp.isfile(xml_file):
diff --git a/paddlex/cv/models/base.py b/paddlex/cv/models/base.py
index 1bf3a2c97a3ef9680aae64206aeb72207b759642..399a6708faeeb694052d5b4c27c95dd13bf71d6b 100644
--- a/paddlex/cv/models/base.py
+++ b/paddlex/cv/models/base.py
@@ -26,6 +26,7 @@ import functools
import paddlex.utils.logging as logging
from paddlex.utils import seconds_to_hms
from paddlex.utils.utils import EarlyStop
+from paddlex.cv.transforms import arrange_transforms
import paddlex
from collections import OrderedDict
from os import path as osp
@@ -102,23 +103,6 @@ class BaseAPI:
mode='test')
self.test_prog = self.test_prog.clone(for_test=True)
- def arrange_transforms(self, transforms, mode='train'):
- # 给transforms添加arrange操作
- if self.model_type == 'classifier':
- arrange_transform = paddlex.cls.transforms.ArrangeClassifier
- elif self.model_type == 'segmenter':
- arrange_transform = paddlex.seg.transforms.ArrangeSegmenter
- elif self.model_type == 'detector':
- arrange_name = 'Arrange{}'.format(self.__class__.__name__)
- arrange_transform = getattr(paddlex.det.transforms, arrange_name)
- else:
- raise Exception("Unrecognized model type: {}".format(
- self.model_type))
- if type(transforms.transforms[-1]).__name__.startswith('Arrange'):
- transforms.transforms[-1] = arrange_transform(mode=mode)
- else:
- transforms.transforms.append(arrange_transform(mode=mode))
-
def build_train_data_loader(self, dataset, batch_size):
# 初始化data_loader
if self.train_data_loader is None:
@@ -140,7 +124,11 @@ class BaseAPI:
batch_size=1,
batch_num=10,
cache_dir="./temp"):
- self.arrange_transforms(transforms=dataset.transforms, mode='quant')
+ arrange_transforms(
+ model_type=self.model_type,
+ class_name=self.__class__.__name__,
+ transforms=dataset.transforms,
+ mode='quant')
dataset.num_samples = batch_size * batch_num
try:
from .slim.post_quantization import PaddleXPostTrainingQuantization
@@ -204,22 +192,31 @@ class BaseAPI:
if self.model_type == 'classifier':
if pretrain_weights not in ['IMAGENET']:
logging.warning(
- "Pretrain_weights for classifier should be defined as directory path or parameter file or 'IMAGENET' or None, but it is {}, so we force to set it as 'IMAGENET'".
+ "Path of pretrain_weights('{}') is not exists!".
format(pretrain_weights))
+ logging.warning(
+ "Pretrain_weights will be forced to set as 'IMAGENET', if you don't want to use pretrain weights, set pretrain_weights=None."
+ )
pretrain_weights = 'IMAGENET'
elif self.model_type == 'detector':
if pretrain_weights not in ['IMAGENET', 'COCO']:
logging.warning(
- "Pretrain_weights for detector should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or None, but it is {}, so we force to set it as 'IMAGENET'".
+ "Path of pretrain_weights('{}') is not exists!".
format(pretrain_weights))
+ logging.warning(
+ "Pretrain_weights will be forced to set as 'IMAGENET', if you don't want to use pretrain weights, set pretrain_weights=None."
+ )
pretrain_weights = 'IMAGENET'
elif self.model_type == 'segmenter':
if pretrain_weights not in [
'IMAGENET', 'COCO', 'CITYSCAPES'
]:
logging.warning(
- "Pretrain_weights for segmenter should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or 'CITYSCAPES', but it is {}, so we force to set it as 'IMAGENET'".
+ "Path of pretrain_weights('{}') is not exists!".
format(pretrain_weights))
+ logging.warning(
+ "Pretrain_weights will be forced to set as 'IMAGENET', if you don't want to use pretrain weights, set pretrain_weights=None."
+ )
pretrain_weights = 'IMAGENET'
if hasattr(self, 'backbone'):
backbone = self.backbone
@@ -416,8 +413,11 @@ class BaseAPI:
from visualdl import LogWriter
vdl_logdir = osp.join(save_dir, 'vdl_log')
# 给transform添加arrange操作
- self.arrange_transforms(
- transforms=train_dataset.transforms, mode='train')
+ arrange_transforms(
+ model_type=self.model_type,
+ class_name=self.__class__.__name__,
+ transforms=train_dataset.transforms,
+ mode='train')
# 构建train_data_loader
self.build_train_data_loader(
dataset=train_dataset, batch_size=train_batch_size)
diff --git a/paddlex/cv/models/classifier.py b/paddlex/cv/models/classifier.py
index b329d90e0de7b246c43cde8ffdfe17e6dd406b91..3e8b70ea35a2f40ba2dadd98a385de68480bcd8e 100644
--- a/paddlex/cv/models/classifier.py
+++ b/paddlex/cv/models/classifier.py
@@ -17,10 +17,13 @@ import numpy as np
import time
import math
import tqdm
+from multiprocessing.pool import ThreadPool
import paddle.fluid as fluid
import paddlex.utils.logging as logging
from paddlex.utils import seconds_to_hms
import paddlex
+from paddlex.cv.transforms import arrange_transforms
+from paddlex.cv.datasets import generate_minibatch
from collections import OrderedDict
from .base import BaseAPI
@@ -54,7 +57,8 @@ class BaseClassifier(BaseAPI):
input_shape = [
None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
]
- image = fluid.data(dtype='float32', shape=input_shape, name='image')
+ image = fluid.data(
+ dtype='float32', shape=input_shape, name='image')
else:
image = fluid.data(
dtype='float32', shape=[None, 3, None, None], name='image')
@@ -219,7 +223,11 @@ class BaseClassifier(BaseAPI):
tuple (metrics, eval_details): 当return_details为True时,增加返回dict,
包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。
"""
- self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
+ arrange_transforms(
+ model_type=self.model_type,
+ class_name=self.__class__.__name__,
+ transforms=eval_dataset.transforms,
+ mode='eval')
data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False)
k = min(5, self.num_classes)
@@ -232,8 +240,9 @@ class BaseClassifier(BaseAPI):
self.test_prog).with_data_parallel(
share_vars_from=self.parallel_train_prog)
batch_size_each_gpu = self._get_single_card_bs(batch_size)
- logging.info("Start to evaluating(total_samples={}, total_steps={})...".
- format(eval_dataset.num_samples, total_steps))
+ logging.info(
+ "Start to evaluating(total_samples={}, total_steps={})...".format(
+ eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32')
@@ -269,38 +278,106 @@ class BaseClassifier(BaseAPI):
return metrics, eval_details
return metrics
+ @staticmethod
+ def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+ arrange_transforms(
+ model_type=model_type,
+ class_name=class_name,
+ transforms=transforms,
+ mode='test')
+ pool = ThreadPool(thread_num)
+ batch_data = pool.map(transforms, images)
+ pool.close()
+ pool.join()
+ padding_batch = generate_minibatch(batch_data)
+ im = np.array([data[0] for data in padding_batch])
+
+ return im
+
+ @staticmethod
+ def _postprocess(results, true_topk, labels):
+ preds = list()
+ for i, pred in enumerate(results[0]):
+ pred_label = np.argsort(pred)[::-1][:true_topk]
+ preds.append([{
+ 'category_id': l,
+ 'category': labels[l],
+ 'score': results[0][i][l]
+ } for l in pred_label])
+
+ return preds
+
def predict(self, img_file, transforms=None, topk=1):
"""预测。
Args:
- img_file (str): 预测图像路径。
+ img_file (str|np.ndarray): 预测图像路径,或者是解码后的排列格式为(H, W, C)且类型为float32且为BGR格式的数组。
transforms (paddlex.cls.transforms): 数据预处理操作。
topk (int): 预测时前k个最大值。
Returns:
list: 其中元素均为字典。字典的关键字为'category_id'、'category'、'score',
分别对应预测类别id、预测类别标签、预测得分。
"""
+
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
true_topk = min(self.num_classes, topk)
- if transforms is not None:
- self.arrange_transforms(transforms=transforms, mode='test')
- im = transforms(img_file)
+ if isinstance(img_file, (str, np.ndarray)):
+ images = [img_file]
else:
- self.arrange_transforms(
- transforms=self.test_transforms, mode='test')
- im = self.test_transforms(img_file)
+ raise Exception("img_file must be str/np.ndarray")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im = BaseClassifier._preprocess(images, transforms, self.model_type,
+ self.__class__.__name__)
+
with fluid.scope_guard(self.scope):
result = self.exe.run(self.test_prog,
feed={'image': im},
fetch_list=list(self.test_outputs.values()),
use_program_cache=True)
- pred_label = np.argsort(result[0][0])[::-1][:true_topk]
- res = [{
- 'category_id': l,
- 'category': self.labels[l],
- 'score': result[0][0][l]
- } for l in pred_label]
- return res
+
+ preds = BaseClassifier._postprocess(result, true_topk, self.labels)
+
+ return preds[0]
+
+ def batch_predict(self,
+ img_file_list,
+ transforms=None,
+ topk=1,
+ thread_num=2):
+ """预测。
+ Args:
+ img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
+ 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
+ transforms (paddlex.cls.transforms): 数据预处理操作。
+ topk (int): 预测时前k个最大值。
+ thread_num (int): 并发执行各图像预处理时的线程数。
+ Returns:
+ list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测列表中,其中元素均为字典。字典的关键字为'category_id'、'category'、'score',
+ 分别对应预测类别id、预测类别标签、预测得分。
+ """
+ if transforms is None and not hasattr(self, 'test_transforms'):
+ raise Exception("transforms need to be defined, now is None.")
+ true_topk = min(self.num_classes, topk)
+ if not isinstance(img_file_list, (list, tuple)):
+ raise Exception("im_file must be list/tuple")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im = BaseClassifier._preprocess(img_file_list, transforms,
+ self.model_type,
+ self.__class__.__name__, thread_num)
+
+ with fluid.scope_guard(self.scope):
+ result = self.exe.run(self.test_prog,
+ feed={'image': im},
+ fetch_list=list(self.test_outputs.values()),
+ use_program_cache=True)
+
+ preds = BaseClassifier._postprocess(result, true_topk, self.labels)
+
+ return preds
class ResNet18(BaseClassifier):
diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py
index cd9240e18594bd44a5acc9b03e4077fbf0f4434a..e0eb47c5c5fbaaf4f526f032f559869625069cfc 100644
--- a/paddlex/cv/models/deeplabv3p.py
+++ b/paddlex/cv/models/deeplabv3p.py
@@ -18,9 +18,12 @@ import numpy as np
import tqdm
import math
import cv2
+from multiprocessing.pool import ThreadPool
import paddle.fluid as fluid
import paddlex.utils.logging as logging
import paddlex
+from paddlex.cv.transforms import arrange_transforms
+from paddlex.cv.datasets import generate_minibatch
from collections import OrderedDict
from .base import BaseAPI
from .utils.seg_eval import ConfusionMatrix
@@ -317,7 +320,11 @@ class DeepLabv3p(BaseAPI):
tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
包含关键字:'confusion_matrix',表示评估的混淆矩阵。
"""
- self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
+ arrange_transforms(
+ model_type=self.model_type,
+ class_name=self.__class__.__name__,
+ transforms=eval_dataset.transforms,
+ mode='eval')
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
conf_mat = ConfusionMatrix(self.num_classes, streaming=True)
data_generator = eval_dataset.generator(
@@ -327,21 +334,13 @@ class DeepLabv3p(BaseAPI):
self.parallel_test_prog = fluid.CompiledProgram(
self.test_prog).with_data_parallel(
share_vars_from=self.parallel_train_prog)
- logging.info("Start to evaluating(total_samples={}, total_steps={})...".
- format(eval_dataset.num_samples, total_steps))
+ logging.info(
+ "Start to evaluating(total_samples={}, total_steps={})...".format(
+ eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data])
-
- _, _, im_h, im_w = images.shape
- labels = list()
- for d in data:
- padding_label = np.zeros(
- (1, im_h, im_w)).astype('int64') + self.ignore_index
- _, label_h, label_w = d[1].shape
- padding_label[:, :label_h, :label_w] = d[1]
- labels.append(padding_label)
- labels = np.array(labels)
+ labels = np.array([d[1] for d in data])
num_samples = images.shape[0]
if num_samples < batch_size:
@@ -379,10 +378,56 @@ class DeepLabv3p(BaseAPI):
return metrics, eval_details
return metrics
- def predict(self, im_file, transforms=None):
+ @staticmethod
+ def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+ arrange_transforms(
+ model_type=model_type,
+ class_name=class_name,
+ transforms=transforms,
+ mode='test')
+ pool = ThreadPool(thread_num)
+ batch_data = pool.map(transforms, images)
+ pool.close()
+ pool.join()
+ padding_batch = generate_minibatch(batch_data)
+ im = np.array(
+ [data[0] for data in padding_batch],
+ dtype=padding_batch[0][0].dtype)
+ im_info = [data[1] for data in padding_batch]
+ return im, im_info
+
+ @staticmethod
+ def _postprocess(results, im_info):
+ pred_list = list()
+ logit_list = list()
+ for i, (pred, logit) in enumerate(zip(results[0], results[1])):
+ pred = pred.astype('uint8')
+ pred = np.squeeze(pred).astype('uint8')
+ logit = np.transpose(logit, (1, 2, 0))
+ for info in im_info[i][::-1]:
+ if info[0] == 'resize':
+ w, h = info[1][1], info[1][0]
+ pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
+ logit = cv2.resize(logit, (w, h), cv2.INTER_LINEAR)
+ elif info[0] == 'padding':
+ w, h = info[1][1], info[1][0]
+ pred = pred[0:h, 0:w]
+ logit = logit[0:h, 0:w, :]
+ else:
+ raise Exception("Unexpected info '{}' in im_info".format(
+ info[0]))
+ pred_list.append(pred)
+ logit_list.append(logit)
+
+ preds = list()
+ for pred, logit in zip(pred_list, logit_list):
+ preds.append({'label_map': pred, 'score_map': logit})
+ return preds
+
+ def predict(self, img_file, transforms=None):
"""预测。
Args:
- img_file(str): 预测图像路径。
+ img_file(str|np.ndarray): 预测图像路径,或者是解码后的排列格式为(H, W, C)且类型为float32且为BGR格式的数组。
transforms(paddlex.cv.transforms): 数据预处理操作。
Returns:
@@ -392,34 +437,53 @@ class DeepLabv3p(BaseAPI):
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
- if transforms is not None:
- self.arrange_transforms(transforms=transforms, mode='test')
- im, im_info = transforms(im_file)
+ if isinstance(img_file, (str, np.ndarray)):
+ images = [img_file]
else:
- self.arrange_transforms(
- transforms=self.test_transforms, mode='test')
- im, im_info = self.test_transforms(im_file)
- im = np.expand_dims(im, axis=0)
+ raise Exception("img_file must be str/np.ndarray")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_info = DeepLabv3p._preprocess(
+ images, transforms, self.model_type, self.__class__.__name__)
+
with fluid.scope_guard(self.scope):
result = self.exe.run(self.test_prog,
feed={'image': im},
fetch_list=list(self.test_outputs.values()),
use_program_cache=True)
- pred = result[0]
- pred = np.squeeze(pred).astype('uint8')
- logit = result[1]
- logit = np.squeeze(logit)
- logit = np.transpose(logit, (1, 2, 0))
- for info in im_info[::-1]:
- if info[0] == 'resize':
- w, h = info[1][1], info[1][0]
- pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
- logit = cv2.resize(logit, (w, h), cv2.INTER_LINEAR)
- elif info[0] == 'padding':
- w, h = info[1][1], info[1][0]
- pred = pred[0:h, 0:w]
- logit = logit[0:h, 0:w, :]
- else:
- raise Exception("Unexpected info '{}' in im_info".format(info[
- 0]))
- return {'label_map': pred, 'score_map': logit}
+
+ preds = DeepLabv3p._postprocess(result, im_info)
+ return preds[0]
+
+ def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+ """预测。
+ Args:
+ img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
+ 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
+ transforms(paddlex.cv.transforms): 数据预处理操作。
+ thread_num (int): 并发执行各图像预处理时的线程数。
+
+ Returns:
+ list: 每个元素都为列表,表示各图像的预测结果。各图像的预测结果用字典表示,包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,
+ 像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)
+ """
+
+ if transforms is None and not hasattr(self, 'test_transforms'):
+ raise Exception("transforms need to be defined, now is None.")
+ if not isinstance(img_file_list, (list, tuple)):
+ raise Exception("im_file must be list/tuple")
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_info = DeepLabv3p._preprocess(
+ img_file_list, transforms, self.model_type,
+ self.__class__.__name__, thread_num)
+
+ with fluid.scope_guard(self.scope):
+ result = self.exe.run(self.test_prog,
+ feed={'image': im},
+ fetch_list=list(self.test_outputs.values()),
+ use_program_cache=True)
+
+ preds = DeepLabv3p._postprocess(result, im_info)
+ return preds
diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py
index 408c9deab07ea22f3150778f3d7bb9dc636bafa8..a6b8f2a118c6aa1681f853da243b812aaf8b030a 100644
--- a/paddlex/cv/models/faster_rcnn.py
+++ b/paddlex/cv/models/faster_rcnn.py
@@ -16,11 +16,14 @@ from __future__ import absolute_import
import math
import tqdm
import numpy as np
+from multiprocessing.pool import ThreadPool
import paddle.fluid as fluid
import paddlex.utils.logging as logging
import paddlex
import os.path as osp
import copy
+from paddlex.cv.transforms import arrange_transforms
+from paddlex.cv.datasets import generate_minibatch
from .base import BaseAPI
from collections import OrderedDict
from .utils.detection_eval import eval_results, bbox2out
@@ -291,7 +294,11 @@ class FasterRCNN(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
"""
- self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
+ arrange_transforms(
+ model_type=self.model_type,
+ class_name=self.__class__.__name__,
+ transforms=eval_dataset.transforms,
+ mode='eval')
if metric is None:
if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric
@@ -310,12 +317,14 @@ class FasterRCNN(BaseAPI):
logging.warning(
"Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1."
)
- dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False)
+ dataset = eval_dataset.generator(
+ batch_size=batch_size, drop_last=False)
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
results = list()
- logging.info("Start to evaluating(total_samples={}, total_steps={})...".
- format(eval_dataset.num_samples, total_steps))
+ logging.info(
+ "Start to evaluating(total_samples={}, total_steps={})...".format(
+ eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32')
im_infos = np.array([d[1] for d in data]).astype('float32')
@@ -366,11 +375,42 @@ class FasterRCNN(BaseAPI):
return metrics, eval_details
return metrics
+ @staticmethod
+ def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+ arrange_transforms(
+ model_type=model_type,
+ class_name=class_name,
+ transforms=transforms,
+ mode='test')
+ pool = ThreadPool(thread_num)
+ batch_data = pool.map(transforms, images)
+ pool.close()
+ pool.join()
+ padding_batch = generate_minibatch(batch_data)
+ im = np.array([data[0] for data in padding_batch])
+ im_resize_info = np.array([data[1] for data in padding_batch])
+ im_shape = np.array([data[2] for data in padding_batch])
+
+ return im, im_resize_info, im_shape
+
+ @staticmethod
+ def _postprocess(res, batch_size, num_classes, labels):
+ clsid2catid = dict({i: i for i in range(num_classes)})
+ xywh_results = bbox2out([res], clsid2catid)
+ preds = [[] for i in range(batch_size)]
+ for xywh_res in xywh_results:
+ image_id = xywh_res['image_id']
+ del xywh_res['image_id']
+ xywh_res['category'] = labels[xywh_res['category_id']]
+ preds[image_id].append(xywh_res)
+
+ return preds
+
def predict(self, img_file, transforms=None):
"""预测。
Args:
- img_file (str): 预测图像路径。
+ img_file(str|np.ndarray): 预测图像路径,或者是解码后的排列格式为(H, W, C)且类型为float32且为BGR格式的数组。
transforms (paddlex.det.transforms): 数据预处理操作。
Returns:
@@ -380,36 +420,84 @@ class FasterRCNN(BaseAPI):
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
- if transforms is not None:
- self.arrange_transforms(transforms=transforms, mode='test')
- im, im_resize_info, im_shape = transforms(img_file)
+ if isinstance(img_file, (str, np.ndarray)):
+ images = [img_file]
else:
- self.arrange_transforms(
- transforms=self.test_transforms, mode='test')
- im, im_resize_info, im_shape = self.test_transforms(img_file)
- im = np.expand_dims(im, axis=0)
- im_resize_info = np.expand_dims(im_resize_info, axis=0)
- im_shape = np.expand_dims(im_shape, axis=0)
+ raise Exception("img_file must be str/np.ndarray")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_resize_info, im_shape = FasterRCNN._preprocess(
+ images, transforms, self.model_type, self.__class__.__name__)
+
with fluid.scope_guard(self.scope):
- outputs = self.exe.run(self.test_prog,
- feed={
- 'image': im,
- 'im_info': im_resize_info,
- 'im_shape': im_shape
- },
- fetch_list=list(self.test_outputs.values()),
- return_numpy=False,
- use_program_cache=True)
+ result = self.exe.run(self.test_prog,
+ feed={
+ 'image': im,
+ 'im_info': im_resize_info,
+ 'im_shape': im_shape
+ },
+ fetch_list=list(self.test_outputs.values()),
+ return_numpy=False,
+ use_program_cache=True)
+
res = {
k: (np.array(v), v.recursive_sequence_lengths())
- for k, v in zip(list(self.test_outputs.keys()), outputs)
+ for k, v in zip(list(self.test_outputs.keys()), result)
}
- res['im_id'] = (np.array([[0]]).astype('int32'), [])
- clsid2catid = dict({i: i for i in range(self.num_classes)})
- xywh_results = bbox2out([res], clsid2catid)
- results = list()
- for xywh_res in xywh_results:
- del xywh_res['image_id']
- xywh_res['category'] = self.labels[xywh_res['category_id']]
- results.append(xywh_res)
- return results
+ res['im_id'] = (np.array(
+ [[i] for i in range(len(images))]).astype('int32'), [])
+ preds = FasterRCNN._postprocess(res,
+ len(images), self.num_classes,
+ self.labels)
+
+ return preds[0]
+
+ def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+ """预测。
+
+ Args:
+ img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
+ 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
+ transforms (paddlex.det.transforms): 数据预处理操作。
+ thread_num (int): 并发执行各图像预处理时的线程数。
+
+ Returns:
+ list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、
+ 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
+ 预测框得分组成。
+ """
+ if transforms is None and not hasattr(self, 'test_transforms'):
+ raise Exception("transforms need to be defined, now is None.")
+
+ if not isinstance(img_file_list, (list, tuple)):
+ raise Exception("im_file must be list/tuple")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_resize_info, im_shape = FasterRCNN._preprocess(
+ img_file_list, transforms, self.model_type,
+ self.__class__.__name__, thread_num)
+
+ with fluid.scope_guard(self.scope):
+ result = self.exe.run(self.test_prog,
+ feed={
+ 'image': im,
+ 'im_info': im_resize_info,
+ 'im_shape': im_shape
+ },
+ fetch_list=list(self.test_outputs.values()),
+ return_numpy=False,
+ use_program_cache=True)
+
+ res = {
+ k: (np.array(v), v.recursive_sequence_lengths())
+ for k, v in zip(list(self.test_outputs.keys()), result)
+ }
+ res['im_id'] = (np.array(
+ [[i] for i in range(len(img_file_list))]).astype('int32'), [])
+ preds = FasterRCNN._postprocess(res,
+ len(img_file_list), self.num_classes,
+ self.labels)
+
+ return preds
diff --git a/paddlex/cv/models/hrnet.py b/paddlex/cv/models/hrnet.py
index d3af363ceac925d40552da22360759553c0090f7..691114da8caffb2bf86860ed51cd07e449ae7cd7 100644
--- a/paddlex/cv/models/hrnet.py
+++ b/paddlex/cv/models/hrnet.py
@@ -25,7 +25,7 @@ class HRNet(DeepLabv3p):
Args:
num_classes (int): 类别数。
width (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。
- '18_small_v1'是18的轻量级版本。
+ '18_small_v1'是18的轻量级版本,默认18。
use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
diff --git a/paddlex/cv/models/load_model.py b/paddlex/cv/models/load_model.py
index a80e2e93856621e12a38bfc174f2aba078bda312..7c9aa265d4e863e0e3b97e4460a98313b58e40dd 100644
--- a/paddlex/cv/models/load_model.py
+++ b/paddlex/cv/models/load_model.py
@@ -21,10 +21,13 @@ import paddle.fluid as fluid
from paddle.fluid.framework import Parameter
import paddlex
import paddlex.utils.logging as logging
+from paddlex.cv.transforms import build_transforms, build_transforms_v1
def load_model(model_dir, fixed_input_shape=None):
model_scope = fluid.Scope()
+ if not osp.exists(model_dir):
+ logging.error("model_dir '{}' is not exists!".format(model_dir))
if not osp.exists(osp.join(model_dir, "model.yml")):
raise Exception("There's not model.yml in {}".format(model_dir))
with open(osp.join(model_dir, "model.yml")) as f:
@@ -128,67 +131,3 @@ def fix_input_shape(info, fixed_input_shape=None):
padding['Padding']['target_size'] = list(fixed_input_shape)
info['Transforms'].append(resize)
info['Transforms'].append(padding)
-
-
-def build_transforms(model_type, transforms_info, to_rgb=True):
- if model_type == "classifier":
- import paddlex.cv.transforms.cls_transforms as T
- elif model_type == "detector":
- import paddlex.cv.transforms.det_transforms as T
- elif model_type == "segmenter":
- import paddlex.cv.transforms.seg_transforms as T
- transforms = list()
- for op_info in transforms_info:
- op_name = list(op_info.keys())[0]
- op_attr = op_info[op_name]
- if not hasattr(T, op_name):
- raise Exception(
- "There's no operator named '{}' in transforms of {}".format(
- op_name, model_type))
- transforms.append(getattr(T, op_name)(**op_attr))
- eval_transforms = T.Compose(transforms)
- eval_transforms.to_rgb = to_rgb
- return eval_transforms
-
-
-def build_transforms_v1(model_type, transforms_info, batch_transforms_info):
- """ 老版本模型加载,仅支持PaddleX前端导出的模型
- """
- logging.debug("Use build_transforms_v1 to reconstruct transforms")
- if model_type == "classifier":
- import paddlex.cv.transforms.cls_transforms as T
- elif model_type == "detector":
- import paddlex.cv.transforms.det_transforms as T
- elif model_type == "segmenter":
- import paddlex.cv.transforms.seg_transforms as T
- transforms = list()
- for op_info in transforms_info:
- op_name = op_info[0]
- op_attr = op_info[1]
- if op_name == 'DecodeImage':
- continue
- if op_name == 'Permute':
- continue
- if op_name == 'ResizeByShort':
- op_attr_new = dict()
- if 'short_size' in op_attr:
- op_attr_new['short_size'] = op_attr['short_size']
- else:
- op_attr_new['short_size'] = op_attr['target_size']
- op_attr_new['max_size'] = op_attr.get('max_size', -1)
- op_attr = op_attr_new
- if op_name.startswith('Arrange'):
- continue
- if not hasattr(T, op_name):
- raise Exception(
- "There's no operator named '{}' in transforms of {}".format(
- op_name, model_type))
- transforms.append(getattr(T, op_name)(**op_attr))
- if model_type == "detector" and len(batch_transforms_info) > 0:
- op_name = batch_transforms_info[0][0]
- op_attr = batch_transforms_info[0][1]
- assert op_name == "PaddingMiniBatch", "Only PaddingMiniBatch transform is supported for batch transform"
- padding = T.Padding(coarsest_stride=op_attr['coarsest_stride'])
- transforms.append(padding)
- eval_transforms = T.Compose(transforms)
- return eval_transforms
diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py
index 9e08808c94d7a9491bac21da9f3fd17f2d1a2bf8..888cd21725b68ea7e467681f2ac42789c2a72d81 100644
--- a/paddlex/cv/models/mask_rcnn.py
+++ b/paddlex/cv/models/mask_rcnn.py
@@ -16,11 +16,13 @@ from __future__ import absolute_import
import math
import tqdm
import numpy as np
+from multiprocessing.pool import ThreadPool
import paddle.fluid as fluid
import paddlex.utils.logging as logging
import paddlex
import copy
import os.path as osp
+from paddlex.cv.transforms import arrange_transforms
from collections import OrderedDict
from .faster_rcnn import FasterRCNN
from .utils.detection_eval import eval_results, bbox2out, mask2out
@@ -253,7 +255,11 @@ class MaskRCNN(FasterRCNN):
预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、
预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。
"""
- self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
+ arrange_transforms(
+ model_type=self.model_type,
+ class_name=self.__class__.__name__,
+ transforms=eval_dataset.transforms,
+ mode='eval')
if metric is None:
if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric
@@ -333,56 +339,118 @@ class MaskRCNN(FasterRCNN):
return metrics, eval_details
return metrics
+ @staticmethod
+ def _postprocess(res, batch_size, num_classes, mask_head_resolution,
+ labels):
+ clsid2catid = dict({i: i for i in range(num_classes)})
+ xywh_results = bbox2out([res], clsid2catid)
+ segm_results = mask2out([res], clsid2catid, mask_head_resolution)
+ preds = [[] for i in range(batch_size)]
+ import pycocotools.mask as mask_util
+ for index, xywh_res in enumerate(xywh_results):
+ image_id = xywh_res['image_id']
+ del xywh_res['image_id']
+ xywh_res['mask'] = mask_util.decode(segm_results[index][
+ 'segmentation'])
+ xywh_res['category'] = labels[xywh_res['category_id']]
+ preds[image_id].append(xywh_res)
+
+ return preds
+
def predict(self, img_file, transforms=None):
"""预测。
Args:
- img_file (str): 预测图像路径。
+ img_file(str|np.ndarray): 预测图像路径,或者是解码后的排列格式为(H, W, C)且类型为float32且为BGR格式的数组。
transforms (paddlex.det.transforms): 数据预处理操作。
Returns:
- dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、
+ lict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、
预测框坐标(坐标格式为[xmin, ymin, w, h])、
原图大小的预测二值图(1表示预测框类别,0表示背景类)、
预测框得分组成。
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
- if transforms is not None:
- self.arrange_transforms(transforms=transforms, mode='test')
- im, im_resize_info, im_shape = transforms(img_file)
+ if isinstance(img_file, (str, np.ndarray)):
+ images = [img_file]
else:
- self.arrange_transforms(
- transforms=self.test_transforms, mode='test')
- im, im_resize_info, im_shape = self.test_transforms(img_file)
- im = np.expand_dims(im, axis=0)
- im_resize_info = np.expand_dims(im_resize_info, axis=0)
- im_shape = np.expand_dims(im_shape, axis=0)
+ raise Exception("img_file must be str/np.ndarray")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_resize_info, im_shape = FasterRCNN._preprocess(
+ images, transforms, self.model_type, self.__class__.__name__)
+
with fluid.scope_guard(self.scope):
- outputs = self.exe.run(self.test_prog,
- feed={
- 'image': im,
- 'im_info': im_resize_info,
- 'im_shape': im_shape
- },
- fetch_list=list(self.test_outputs.values()),
- return_numpy=False,
- use_program_cache=True)
+ result = self.exe.run(self.test_prog,
+ feed={
+ 'image': im,
+ 'im_info': im_resize_info,
+ 'im_shape': im_shape
+ },
+ fetch_list=list(self.test_outputs.values()),
+ return_numpy=False,
+ use_program_cache=True)
+
res = {
k: (np.array(v), v.recursive_sequence_lengths())
- for k, v in zip(list(self.test_outputs.keys()), outputs)
+ for k, v in zip(list(self.test_outputs.keys()), result)
}
- res['im_id'] = (np.array([[0]]).astype('int32'), [])
+ res['im_id'] = (np.array(
+ [[i] for i in range(len(images))]).astype('int32'), [])
res['im_shape'] = (np.array(im_shape), [])
- clsid2catid = dict({i: i for i in range(self.num_classes)})
- xywh_results = bbox2out([res], clsid2catid)
- segm_results = mask2out([res], clsid2catid, self.mask_head_resolution)
- results = list()
- import pycocotools.mask as mask_util
- for index, xywh_res in enumerate(xywh_results):
- del xywh_res['image_id']
- xywh_res['mask'] = mask_util.decode(segm_results[index][
- 'segmentation'])
- xywh_res['category'] = self.labels[xywh_res['category_id']]
- results.append(xywh_res)
- return results
+ preds = MaskRCNN._postprocess(res,
+ len(images), self.num_classes,
+ self.mask_head_resolution, self.labels)
+
+ return preds[0]
+
+ def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+ """预测。
+
+ Args:
+ img_file_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
+ 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
+ transforms (paddlex.det.transforms): 数据预处理操作。
+ thread_num (int): 并发执行各图像预处理时的线程数。
+ Returns:
+ dict: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、预测框类别名称、
+ 预测框坐标(坐标格式为[xmin, ymin, w, h])、
+ 原图大小的预测二值图(1表示预测框类别,0表示背景类)、
+ 预测框得分组成。
+ """
+ if transforms is None and not hasattr(self, 'test_transforms'):
+ raise Exception("transforms need to be defined, now is None.")
+
+ if not isinstance(img_file_list, (list, tuple)):
+ raise Exception("im_file must be list/tuple")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_resize_info, im_shape = FasterRCNN._preprocess(
+ img_file_list, transforms, self.model_type, self.__class__.__name__,
+ thread_num)
+
+ with fluid.scope_guard(self.scope):
+ result = self.exe.run(self.test_prog,
+ feed={
+ 'image': im,
+ 'im_info': im_resize_info,
+ 'im_shape': im_shape
+ },
+ fetch_list=list(self.test_outputs.values()),
+ return_numpy=False,
+ use_program_cache=True)
+
+ res = {
+ k: (np.array(v), v.recursive_sequence_lengths())
+ for k, v in zip(list(self.test_outputs.keys()), result)
+ }
+ res['im_id'] = (np.array(
+ [[i] for i in range(len(img_file_list))]).astype('int32'), [])
+ res['im_shape'] = (np.array(im_shape), [])
+ preds = MaskRCNN._postprocess(res,
+ len(img_file_list), self.num_classes,
+ self.mask_head_resolution, self.labels)
+ return preds
diff --git a/paddlex/cv/models/slim/prune_config.py b/paddlex/cv/models/slim/prune_config.py
index 4ca4215cd31dcf47bed7d3ae25c9ccae3c9a3dc8..64d7c45c7d5072f5d3826cc041ac175baa76f4fa 100644
--- a/paddlex/cv/models/slim/prune_config.py
+++ b/paddlex/cv/models/slim/prune_config.py
@@ -67,8 +67,7 @@ sensitivities_data = {
'https://bj.bcebos.com/paddlex/slim_prune/yolov3_darknet53.sensitivities',
'YOLOv3_ResNet34':
'https://bj.bcebos.com/paddlex/slim_prune/yolov3_resnet34.sensitivities',
- 'UNet':
- 'https://bj.bcebos.com/paddlex/slim_prune/unet.sensitivities',
+ 'UNet': 'https://bj.bcebos.com/paddlex/slim_prune/unet.sensitivities',
'DeepLabv3p_MobileNetV2_x0.25':
'https://bj.bcebos.com/paddlex/slim_prune/deeplab_mobilenetv2_x0.25_no_aspp_decoder.sensitivities',
'DeepLabv3p_MobileNetV2_x0.5':
@@ -103,8 +102,8 @@ def get_sensitivities(flag, model, save_dir):
model_type = model_name + '_' + model.backbone
if model_type.startswith('DeepLabv3p_Xception'):
model_type = model_type + '_' + 'aspp' + '_' + 'decoder'
- elif hasattr(model, 'encoder_with_aspp') or hasattr(
- model, 'enable_decoder'):
+ elif hasattr(model, 'encoder_with_aspp') or hasattr(model,
+ 'enable_decoder'):
model_type = model_type + '_' + 'aspp' + '_' + 'decoder'
if osp.isfile(flag):
return flag
@@ -116,7 +115,6 @@ def get_sensitivities(flag, model, save_dir):
paddlex.utils.download(url, path=save_dir)
return osp.join(save_dir, fname)
-
# try:
# hub.download(fname, save_path=save_dir)
# except Exception as e:
@@ -126,7 +124,7 @@ def get_sensitivities(flag, model, save_dir):
# model_type, fname))
# elif isinstance(e, hub.ServerConnectionError):
# raise Exception(
-# "Cannot get reource for model {}(key='{}'), please check your internet connecgtion"
+# "Cannot get reource for model {}(key='{}'), please check your internet connection"
# .format(model_type, fname))
# else:
# raise Exception(
@@ -162,27 +160,29 @@ def get_prune_params(model):
if model_type == 'AlexNet':
prune_names.remove('conv5_weights')
if model_type == 'ShuffleNetV2':
- not_prune_names = ['stage_2_1_conv5_weights',
- 'stage_2_1_conv3_weights',
- 'stage_2_2_conv3_weights',
- 'stage_2_3_conv3_weights',
- 'stage_2_4_conv3_weights',
- 'stage_3_1_conv5_weights',
- 'stage_3_1_conv3_weights',
- 'stage_3_2_conv3_weights',
- 'stage_3_3_conv3_weights',
- 'stage_3_4_conv3_weights',
- 'stage_3_5_conv3_weights',
- 'stage_3_6_conv3_weights',
- 'stage_3_7_conv3_weights',
- 'stage_3_8_conv3_weights',
- 'stage_4_1_conv5_weights',
- 'stage_4_1_conv3_weights',
- 'stage_4_2_conv3_weights',
- 'stage_4_3_conv3_weights',
- 'stage_4_4_conv3_weights',]
+ not_prune_names = [
+ 'stage_2_1_conv5_weights',
+ 'stage_2_1_conv3_weights',
+ 'stage_2_2_conv3_weights',
+ 'stage_2_3_conv3_weights',
+ 'stage_2_4_conv3_weights',
+ 'stage_3_1_conv5_weights',
+ 'stage_3_1_conv3_weights',
+ 'stage_3_2_conv3_weights',
+ 'stage_3_3_conv3_weights',
+ 'stage_3_4_conv3_weights',
+ 'stage_3_5_conv3_weights',
+ 'stage_3_6_conv3_weights',
+ 'stage_3_7_conv3_weights',
+ 'stage_3_8_conv3_weights',
+ 'stage_4_1_conv5_weights',
+ 'stage_4_1_conv3_weights',
+ 'stage_4_2_conv3_weights',
+ 'stage_4_3_conv3_weights',
+ 'stage_4_4_conv3_weights',
+ ]
for name in not_prune_names:
- prune_names.remove(name)
+ prune_names.remove(name)
elif model_type == "MobileNetV1":
prune_names.append("conv1_weights")
for param in program.global_block().all_parameters():
diff --git a/paddlex/cv/models/utils/pretrain_weights.py b/paddlex/cv/models/utils/pretrain_weights.py
index 97018acb827c41381f2e3e29df87ee0620ee2f40..0d969981a5fae2ae015beed74e852fa06514ec79 100644
--- a/paddlex/cv/models/utils/pretrain_weights.py
+++ b/paddlex/cv/models/utils/pretrain_weights.py
@@ -65,6 +65,8 @@ image_pretrain = {
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W32_C_pretrained.tar',
'HRNet_W40':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W40_C_pretrained.tar',
+ 'HRNet_W44':
+ 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W44_C_pretrained.tar',
'HRNet_W48':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W48_C_pretrained.tar',
'HRNet_W60':
@@ -86,6 +88,8 @@ coco_pretrain = {
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar',
'YOLOv3_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn.tar',
+ 'FasterRCNN_ResNet18_COCO':
+ 'https://bj.bcebos.com/paddlex/pretrained_weights/faster_rcnn_r18_fpn_1x.tar',
'FasterRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar',
'FasterRCNN_ResNet50_vd_COCO':
@@ -96,6 +100,8 @@ coco_pretrain = {
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar',
'FasterRCNN_HRNet_W18_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_2x.tar',
+ 'MaskRCNN_ResNet18_COCO':
+ 'https://bj.bcebos.com/paddlex/pretrained_weights/mask_rcnn_r18_fpn_1x.tar',
'MaskRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar',
'MaskRCNN_ResNet50_vd_COCO':
@@ -104,6 +110,8 @@ coco_pretrain = {
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar',
'MaskRCNN_ResNet101_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar',
+ 'MaskRCNN_HRNet_W18_COCO':
+ 'https://bj.bcebos.com/paddlex/pretrained_weights/mask_rcnn_hrnetv2p_w18_2x.tar',
'UNet_COCO': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz',
'DeepLabv3p_MobileNetV2_x1.0_COCO':
'https://bj.bcebos.com/v1/paddleseg/deeplab_mobilenet_x1_0_coco.tgz',
@@ -132,9 +140,10 @@ def get_pretrain_weights(flag, class_name, backbone, save_dir):
return flag
warning_info = "{} does not support to be finetuned with weights pretrained on the {} dataset, so pretrain_weights is forced to be set to {}"
if flag == 'COCO':
- if class_name == "FasterRCNN" and backbone in ['ResNet18'] or \
- class_name == "MaskRCNN" and backbone in ['ResNet18', 'HRNet_W18'] or \
- class_name == 'DeepLabv3p' and backbone in ['Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']:
+ if class_name == 'DeepLabv3p' and backbone in [
+ 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5',
+ 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'
+ ]:
model_name = '{}_{}'.format(class_name, backbone)
logging.warning(warning_info.format(model_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
@@ -194,14 +203,20 @@ def get_pretrain_weights(flag, class_name, backbone, save_dir):
# paddlex.utils.download_and_decompress(url, path=new_save_dir)
# return osp.join(new_save_dir, fname)
try:
+ logging.info(
+ "Connecting PaddleHub server to get pretrain weights...")
hub.download(backbone, save_path=new_save_dir)
except Exception as e:
+ logging.error(
+ "Couldn't download pretrain weight, you can download it manualy from {} (decompress the file if it is a compressed file), and set pretrain weights by your self".
+ format(image_pretrain[backbone]),
+ exit=False)
if isinstance(e, hub.ResourceNotFoundError):
raise Exception("Resource for backbone {} not found".format(
backbone))
elif isinstance(e, hub.ServerConnectionError):
raise Exception(
- "Cannot get reource for backbone {}, please check your internet connecgtion"
+ "Cannot get reource for backbone {}, please check your internet connection"
.format(backbone))
else:
raise Exception(
@@ -222,20 +237,25 @@ def get_pretrain_weights(flag, class_name, backbone, save_dir):
# paddlex.utils.download_and_decompress(url, path=new_save_dir)
# return osp.join(new_save_dir, fname)
try:
+ logging.info(
+ "Connecting PaddleHub server to get pretrain weights...")
hub.download(backbone, save_path=new_save_dir)
except Exception as e:
+ logging.error(
+ "Couldn't download pretrain weight, you can download it manualy from {} (decompress the file if it is a compressed file), and set pretrain weights by your self".
+ format(url),
+ exit=False)
if isinstance(hub.ResourceNotFoundError):
raise Exception("Resource for backbone {} not found".format(
backbone))
elif isinstance(hub.ServerConnectionError):
raise Exception(
- "Cannot get reource for backbone {}, please check your internet connecgtion"
+ "Cannot get reource for backbone {}, please check your internet connection"
.format(backbone))
else:
raise Exception(
"Unexpected error, please make sure paddlehub >= 1.6.2")
return osp.join(new_save_dir, backbone)
else:
- raise Exception(
- "pretrain_weights need to be defined as directory path or 'IMAGENET' or 'COCO' or 'Cityscapes' (download pretrain weights automatically)."
- )
+ logging.error("Path of retrain weights '{}' is not exists!".format(
+ flag))
diff --git a/paddlex/cv/models/utils/visualize.py b/paddlex/cv/models/utils/visualize.py
index 89875114f13e2b275019e3a65fc19576013dd68a..7e1fbbc74932cd9cca06327bf757a566b6d30547 100644
--- a/paddlex/cv/models/utils/visualize.py
+++ b/paddlex/cv/models/utils/visualize.py
@@ -1,11 +1,11 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -28,7 +28,7 @@ def visualize_detection(image, result, threshold=0.5, save_dir='./'):
"""
if isinstance(image, np.ndarray):
- image_name = str(int(time.time())) + '.jpg'
+ image_name = str(int(time.time() * 1000)) + '.jpg'
else:
image_name = os.path.split(image)[-1]
image = cv2.imread(image)
@@ -64,7 +64,7 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./'):
if isinstance(image, np.ndarray):
im = image
- image_name = str(int(time.time())) + '.jpg'
+ image_name = str(int(time.time() * 1000)) + '.jpg'
else:
image_name = os.path.split(image)[-1]
im = cv2.imread(image)
@@ -145,8 +145,8 @@ def draw_bbox_mask(image, results, threshold=0.5):
assert brightness_factor >= -1.0 and brightness_factor <= 1.0
color = mplc.to_rgb(color)
polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
- modified_lightness = polygon_color[1] + (
- brightness_factor * polygon_color[1])
+ modified_lightness = polygon_color[1] + (brightness_factor *
+ polygon_color[1])
modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
modified_color = colorsys.hls_to_rgb(
@@ -161,8 +161,7 @@ def draw_bbox_mask(image, results, threshold=0.5):
dpi = fig.get_dpi()
fig.set_size_inches(
(width * scale + 1e-2) / dpi,
- (height * scale + 1e-2) / dpi,
- )
+ (height * scale + 1e-2) / dpi, )
canvas = FigureCanvasAgg(fig)
ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
ax.axis("off")
@@ -208,8 +207,7 @@ def draw_bbox_mask(image, results, threshold=0.5):
edgecolor=color,
linewidth=linewidth * scale,
alpha=0.8,
- linestyle="-",
- ))
+ linestyle="-", ))
# draw mask
if 'mask' in dt:
@@ -232,23 +230,22 @@ def draw_bbox_mask(image, results, threshold=0.5):
fill=True,
facecolor=mplc.to_rgb(color) + (alpha, ),
edgecolor=edge_color,
- linewidth=max(default_font_size // 15 * scale, 1),
- )
+ linewidth=max(default_font_size // 15 * scale, 1), )
ax.add_patch(polygon)
# draw label
text_pos = (xmin, ymin)
horiz_align = "left"
instance_area = w * h
- if (instance_area < _SMALL_OBJECT_AREA_THRESH * scale
- or h < 40 * scale):
+ if (instance_area < _SMALL_OBJECT_AREA_THRESH * scale or
+ h < 40 * scale):
if ymin >= height - 5:
text_pos = (xmin, ymin)
else:
text_pos = (xmin, ymax)
height_ratio = h / np.sqrt(height * width)
- font_size = (np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 *
- default_font_size)
+ font_size = (np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2,
+ 2) * 0.5 * default_font_size)
text = "{} {:.2f}".format(cname, score)
color = np.maximum(list(mplc.to_rgb(color)), 0.2)
color[np.argmax(color)] = max(0.8, np.max(color))
@@ -269,8 +266,7 @@ def draw_bbox_mask(image, results, threshold=0.5):
horizontalalignment=horiz_align,
color=color,
zorder=10,
- rotation=0,
- )
+ rotation=0, )
s, (width, height) = canvas.print_to_buffer()
buffer = np.frombuffer(s, dtype="uint8")
@@ -408,8 +404,8 @@ def draw_pr_curve(eval_details_file=None,
plt.plot(x, sr_array, color=color, label=nm, linewidth=1)
plt.legend(loc="lower left", fontsize=5)
plt.savefig(
- os.path.join(save_dir, "./{}_pr_curve(iou-{}).png".format(
- style, iou_thresh)),
+ os.path.join(save_dir,
+ "./{}_pr_curve(iou-{}).png".format(style, iou_thresh)),
dpi=800)
plt.close()
diff --git a/paddlex/cv/models/yolo_v3.py b/paddlex/cv/models/yolo_v3.py
index 0417431bdda69f109fc0a40f30d0ddac85174e82..32b74df408b0ce68b632b81cb08536a8d6c9115a 100644
--- a/paddlex/cv/models/yolo_v3.py
+++ b/paddlex/cv/models/yolo_v3.py
@@ -17,13 +17,16 @@ import math
import tqdm
import os.path as osp
import numpy as np
+from multiprocessing.pool import ThreadPool
import paddle.fluid as fluid
import paddlex.utils.logging as logging
import paddlex
+import copy
+from paddlex.cv.transforms import arrange_transforms
+from paddlex.cv.datasets import generate_minibatch
from .base import BaseAPI
from collections import OrderedDict
from .utils.detection_eval import eval_results, bbox2out
-import copy
class YOLOv3(BaseAPI):
@@ -286,7 +289,11 @@ class YOLOv3(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
"""
- self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
+ arrange_transforms(
+ model_type=self.model_type,
+ class_name=self.__class__.__name__,
+ transforms=eval_dataset.transforms,
+ mode='eval')
if metric is None:
if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric
@@ -306,8 +313,9 @@ class YOLOv3(BaseAPI):
data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False)
- logging.info("Start to evaluating(total_samples={}, total_steps={})...".
- format(eval_dataset.num_samples, total_steps))
+ logging.info(
+ "Start to evaluating(total_samples={}, total_steps={})...".format(
+ eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data])
@@ -345,11 +353,43 @@ class YOLOv3(BaseAPI):
return evaluate_metrics, eval_details
return evaluate_metrics
+ @staticmethod
+ def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+ arrange_transforms(
+ model_type=model_type,
+ class_name=class_name,
+ transforms=transforms,
+ mode='test')
+ pool = ThreadPool(thread_num)
+ batch_data = pool.map(transforms, images)
+ pool.close()
+ pool.join()
+ padding_batch = generate_minibatch(batch_data)
+ im = np.array(
+ [data[0] for data in padding_batch],
+ dtype=padding_batch[0][0].dtype)
+ im_size = np.array([data[1] for data in padding_batch], dtype=np.int32)
+
+ return im, im_size
+
+ @staticmethod
+ def _postprocess(res, batch_size, num_classes, labels):
+ clsid2catid = dict({i: i for i in range(num_classes)})
+ xywh_results = bbox2out([res], clsid2catid)
+ preds = [[] for i in range(batch_size)]
+ for xywh_res in xywh_results:
+ image_id = xywh_res['image_id']
+ del xywh_res['image_id']
+ xywh_res['category'] = labels[xywh_res['category_id']]
+ preds[image_id].append(xywh_res)
+
+ return preds
+
def predict(self, img_file, transforms=None):
"""预测。
Args:
- img_file (str): 预测图像路径。
+ img_file (str|np.ndarray): 预测图像路径,或者是解码后的排列格式为(H, W, C)且类型为float32且为BGR格式的数组。
transforms (paddlex.det.transforms): 数据预处理操作。
Returns:
@@ -359,32 +399,74 @@ class YOLOv3(BaseAPI):
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
- if transforms is not None:
- self.arrange_transforms(transforms=transforms, mode='test')
- im, im_size = transforms(img_file)
+ if isinstance(img_file, (str, np.ndarray)):
+ images = [img_file]
else:
- self.arrange_transforms(
- transforms=self.test_transforms, mode='test')
- im, im_size = self.test_transforms(img_file)
- im = np.expand_dims(im, axis=0)
- im_size = np.expand_dims(im_size, axis=0)
+ raise Exception("img_file must be str/np.ndarray")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_size = YOLOv3._preprocess(images, transforms, self.model_type,
+ self.__class__.__name__)
+
with fluid.scope_guard(self.scope):
- outputs = self.exe.run(self.test_prog,
- feed={'image': im,
- 'im_size': im_size},
- fetch_list=list(self.test_outputs.values()),
- return_numpy=False,
- use_program_cache=True)
+ result = self.exe.run(self.test_prog,
+ feed={'image': im,
+ 'im_size': im_size},
+ fetch_list=list(self.test_outputs.values()),
+ return_numpy=False,
+ use_program_cache=True)
+
res = {
k: (np.array(v), v.recursive_sequence_lengths())
- for k, v in zip(list(self.test_outputs.keys()), outputs)
+ for k, v in zip(list(self.test_outputs.keys()), result)
}
- res['im_id'] = (np.array([[0]]).astype('int32'), [])
- clsid2catid = dict({i: i for i in range(self.num_classes)})
- xywh_results = bbox2out([res], clsid2catid)
- results = list()
- for xywh_res in xywh_results:
- del xywh_res['image_id']
- xywh_res['category'] = self.labels[xywh_res['category_id']]
- results.append(xywh_res)
- return results
+ res['im_id'] = (np.array(
+ [[i] for i in range(len(images))]).astype('int32'), [[]])
+ preds = YOLOv3._postprocess(res,
+ len(images), self.num_classes, self.labels)
+ return preds[0]
+
+ def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+ """预测。
+
+ Args:
+ img_file_list (list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径,也可以是解码后的排列格式为(H,W,C)
+ 且类型为float32且为BGR格式的数组。
+ transforms (paddlex.det.transforms): 数据预处理操作。
+ thread_num (int): 并发执行各图像预处理时的线程数。
+ Returns:
+ list: 每个元素都为列表,表示各图像的预测结果。在各图像的预测结果列表中,每个预测结果由预测框类别标签、
+ 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
+ 预测框得分组成。
+ """
+ if transforms is None and not hasattr(self, 'test_transforms'):
+ raise Exception("transforms need to be defined, now is None.")
+
+ if not isinstance(img_file_list, (list, tuple)):
+ raise Exception("im_file must be list/tuple")
+
+ if transforms is None:
+ transforms = self.test_transforms
+ im, im_size = YOLOv3._preprocess(img_file_list, transforms,
+ self.model_type,
+ self.__class__.__name__, thread_num)
+
+ with fluid.scope_guard(self.scope):
+ result = self.exe.run(self.test_prog,
+ feed={'image': im,
+ 'im_size': im_size},
+ fetch_list=list(self.test_outputs.values()),
+ return_numpy=False,
+ use_program_cache=True)
+
+ res = {
+ k: (np.array(v), v.recursive_sequence_lengths())
+ for k, v in zip(list(self.test_outputs.keys()), result)
+ }
+ res['im_id'] = (np.array(
+ [[i] for i in range(len(img_file_list))]).astype('int32'), [[]])
+ preds = YOLOv3._postprocess(res,
+ len(img_file_list), self.num_classes,
+ self.labels)
+ return preds
diff --git a/paddlex/cv/transforms/__init__.py b/paddlex/cv/transforms/__init__.py
index c74b5b19e8d1e007674f6d17a30736f42dde1789..fc8494c7fd279fce03e70993a64349be38d11cfb 100644
--- a/paddlex/cv/transforms/__init__.py
+++ b/paddlex/cv/transforms/__init__.py
@@ -15,5 +15,87 @@
from . import cls_transforms
from . import det_transforms
from . import seg_transforms
+
from . import visualize
visualize = visualize.visualize
+
+
+def build_transforms(model_type, transforms_info, to_rgb=True):
+ if model_type == "classifier":
+ from . import cls_transforms as T
+ elif model_type == "detector":
+ from . import det_transforms as T
+ elif model_type == "segmenter":
+ from . import seg_transforms as T
+ transforms = list()
+ for op_info in transforms_info:
+ op_name = list(op_info.keys())[0]
+ op_attr = op_info[op_name]
+ if not hasattr(T, op_name):
+ raise Exception(
+ "There's no operator named '{}' in transforms of {}".format(
+ op_name, model_type))
+ transforms.append(getattr(T, op_name)(**op_attr))
+ eval_transforms = T.Compose(transforms)
+ eval_transforms.to_rgb = to_rgb
+ return eval_transforms
+
+
+def build_transforms_v1(model_type, transforms_info, batch_transforms_info):
+ """ 老版本模型加载,仅支持PaddleX前端导出的模型
+ """
+ logging.debug("Use build_transforms_v1 to reconstruct transforms")
+ if model_type == "classifier":
+ from . import cls_transforms as T
+ elif model_type == "detector":
+ from . import det_transforms as T
+ elif model_type == "segmenter":
+ from . import seg_transforms as T
+ transforms = list()
+ for op_info in transforms_info:
+ op_name = op_info[0]
+ op_attr = op_info[1]
+ if op_name == 'DecodeImage':
+ continue
+ if op_name == 'Permute':
+ continue
+ if op_name == 'ResizeByShort':
+ op_attr_new = dict()
+ if 'short_size' in op_attr:
+ op_attr_new['short_size'] = op_attr['short_size']
+ else:
+ op_attr_new['short_size'] = op_attr['target_size']
+ op_attr_new['max_size'] = op_attr.get('max_size', -1)
+ op_attr = op_attr_new
+ if op_name.startswith('Arrange'):
+ continue
+ if not hasattr(T, op_name):
+ raise Exception(
+ "There's no operator named '{}' in transforms of {}".format(
+ op_name, model_type))
+ transforms.append(getattr(T, op_name)(**op_attr))
+ if model_type == "detector" and len(batch_transforms_info) > 0:
+ op_name = batch_transforms_info[0][0]
+ op_attr = batch_transforms_info[0][1]
+ assert op_name == "PaddingMiniBatch", "Only PaddingMiniBatch transform is supported for batch transform"
+ padding = T.Padding(coarsest_stride=op_attr['coarsest_stride'])
+ transforms.append(padding)
+ eval_transforms = T.Compose(transforms)
+ return eval_transforms
+
+
+def arrange_transforms(model_type, class_name, transforms, mode='train'):
+ # 给transforms添加arrange操作
+ if model_type == 'classifier':
+ arrange_transform = cls_transforms.ArrangeClassifier
+ elif model_type == 'segmenter':
+ arrange_transform = seg_transforms.ArrangeSegmenter
+ elif model_type == 'detector':
+ arrange_name = 'Arrange{}'.format(class_name)
+ arrange_transform = getattr(det_transforms, arrange_name)
+ else:
+ raise Exception("Unrecognized model type: {}".format(self.model_type))
+ if type(transforms.transforms[-1]).__name__.startswith('Arrange'):
+ transforms.transforms[-1] = arrange_transform(mode=mode)
+ else:
+ transforms.transforms.append(arrange_transform(mode=mode))
diff --git a/paddlex/cv/transforms/cls_transforms.py b/paddlex/cv/transforms/cls_transforms.py
index 606bb5b8d6eb4605510f734d9b737811ec22c477..4166cd170ecf1a0f1b840a804b0d0e28615a04ba 100644
--- a/paddlex/cv/transforms/cls_transforms.py
+++ b/paddlex/cv/transforms/cls_transforms.py
@@ -68,13 +68,14 @@ class Compose(ClsTransform):
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
- "im should be 3-dimension, but now is {}-dimensions".format(
- len(im.shape)))
+ "im should be 3-dimension, but now is {}-dimensions".
+ format(len(im.shape)))
else:
try:
- im = cv2.imread(im).astype('float32')
+ im = cv2.imread(im)
except:
raise TypeError('Can\'t read The image file {}!'.format(im))
+ im = im.astype('float32')
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
for op in self.transforms:
if isinstance(op, ClsTransform):
@@ -139,8 +140,8 @@ class RandomCrop(ClsTransform):
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
- im = random_crop(im, self.crop_size, self.lower_scale, self.lower_ratio,
- self.upper_ratio)
+ im = random_crop(im, self.crop_size, self.lower_scale,
+ self.lower_ratio, self.upper_ratio)
if label is None:
return (im, )
else:
@@ -270,12 +271,14 @@ class ResizeByShort(ClsTransform):
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
- if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
+ if self.max_size > 0 and np.round(scale *
+ im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
- im, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR)
+ im, (resized_width, resized_height),
+ interpolation=cv2.INTER_LINEAR)
if label is None:
return (im, )
diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py
index dd193d2f14aac37b58b2af9ee49e8d99d51f05ad..9154f03cf9975625041728d8656bf838ad36c434 100644
--- a/paddlex/cv/transforms/det_transforms.py
+++ b/paddlex/cv/transforms/det_transforms.py
@@ -108,10 +108,11 @@ class Compose(DetTransform):
im = im_file
else:
try:
- im = cv2.imread(im_file).astype('float32')
+ im = cv2.imread(im_file)
except:
raise TypeError('Can\'t read The image file {}!'.format(
im_file))
+ im = im.astype('float32')
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# make default im_info with [h, w, 1]
im_info['im_resize_info'] = np.array(
@@ -220,13 +221,15 @@ class ResizeByShort(DetTransform):
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
- if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
+ if self.max_size > 0 and np.round(scale *
+ im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im_resize_info = [resized_height, resized_width, scale]
im = cv2.resize(
- im, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR)
+ im, (resized_width, resized_height),
+ interpolation=cv2.INTER_LINEAR)
im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
if label_info is None:
return (im, im_info)
@@ -266,7 +269,8 @@ class Padding(DetTransform):
if not isinstance(target_size, tuple) and not isinstance(
target_size, list):
raise TypeError(
- "Padding: Type of target_size must in (int|list|tuple).")
+ "Padding: Type of target_size must in (int|list|tuple)."
+ )
elif len(target_size) != 2:
raise ValueError(
"Padding: Length of target_size must equal 2.")
@@ -451,7 +455,8 @@ class RandomHorizontalFlip(DetTransform):
ValueError: 数据长度不匹配。
"""
if not isinstance(im, np.ndarray):
- raise TypeError("RandomHorizontalFlip: image is not a numpy array.")
+ raise TypeError(
+ "RandomHorizontalFlip: image is not a numpy array.")
if len(im.shape) != 3:
raise ValueError(
"RandomHorizontalFlip: image is not 3-dimensional.")
@@ -782,7 +787,9 @@ class RandomExpand(DetTransform):
fill_value (list): 扩张图像的初始填充值(0-255)。默认为[123.675, 116.28, 103.53]。
"""
- def __init__(self, ratio=4., prob=0.5,
+ def __init__(self,
+ ratio=4.,
+ prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
super(RandomExpand, self).__init__()
assert ratio > 1.01, "expand ratio must be larger than 1.01"
diff --git a/paddlex/cv/transforms/seg_transforms.py b/paddlex/cv/transforms/seg_transforms.py
index f2bfb32ebeed11f84c27ab7f1d8e8920f21699a7..f5a565f649218c45d6d4c11db88ae08d842a8f9e 100644
--- a/paddlex/cv/transforms/seg_transforms.py
+++ b/paddlex/cv/transforms/seg_transforms.py
@@ -81,9 +81,10 @@ class Compose(SegTransform):
format(len(im.shape)))
else:
try:
- im = cv2.imread(im).astype('float32')
+ im = cv2.imread(im)
except:
raise ValueError('Can\'t read The image file {}!'.format(im))
+ im = im.astype('float32')
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
if label is not None:
@@ -399,7 +400,8 @@ class ResizeByShort(SegTransform):
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
- if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
+ if self.max_size > 0 and np.round(scale *
+ im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
@@ -1102,20 +1104,21 @@ class ArrangeSegmenter(SegTransform):
class ComposedSegTransforms(Compose):
""" 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
训练阶段:
- 1. 随机对图像以0.5的概率水平翻转
- 2. 按不同的比例随机Resize原图
+ 1. 随机对图像以0.5的概率水平翻转,若random_horizontal_flip为False,则跳过此步骤
+ 2. 按不同的比例随机Resize原图, 处理方式参考[paddlex.seg.transforms.ResizeRangeScaling](#resizerangescaling)。若min_max_size为None,则跳过此步骤
3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
4. 图像归一化
- 预测阶段:
- 1. 图像归一化
+ 预测阶段:
+ 1. 将图像的最长边resize至(min_max_size[0] + min_max_size[1])//2, 短边按比例resize。若min_max_size为None,则跳过此步骤
+ 2. 图像归一化
Args:
- mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
- min_max_size(list): 训练过程中,图像的最长边会随机resize至此区间(短边按比例相应resize);预测阶段,图像最长边会resize至此区间中间值,即(min_size+max_size)/2。默认为[400, 600]
- train_crop_size(list): 仅在mode为'train`时生效,训练过程中,随机从图像中裁剪出对应大小的子图(如若原图小于此大小,则会padding到此大小),默认为[400, 600]
- mean(list): 图像均值
- std(list): 图像方差
- random_horizontal_flip(bool): 数据增强方式,仅在mode为`train`时生效,表示训练过程是否随机水平翻转图像,默认为True
+ mode(str): Transforms所处的阶段,包括`train', 'eval'或'test'
+ min_max_size(list): 用于对图像进行resize,具体作用参见上述步骤。
+ train_crop_size(list): 训练过程中随机裁剪原图用于训练,具体作用参见上述步骤。此参数仅在mode为`train`时生效。
+ mean(list): 图像均值, 默认为[0.485, 0.456, 0.406]。
+ std(list): 图像方差,默认为[0.229, 0.224, 0.225]。
+ random_horizontal_flip(bool): 数据增强,是否随机水平翻转图像,此参数仅在mode为`train`时生效。
"""
def __init__(self,
@@ -1127,19 +1130,29 @@ class ComposedSegTransforms(Compose):
random_horizontal_flip=True):
if mode == 'train':
# 训练时的transforms,包含数据增强
- transforms = [
- ResizeRangeScaling(
- min_value=min(min_max_size), max_value=max(min_max_size)),
- RandomPaddingCrop(crop_size=train_crop_size), Normalize(
- mean=mean, std=std)
- ]
+ if min_max_size is None:
+ transforms = [
+ RandomPaddingCrop(crop_size=train_crop_size), Normalize(
+ mean=mean, std=std)
+ ]
+ else:
+ transforms = [
+ ResizeRangeScaling(
+ min_value=min(min_max_size),
+ max_value=max(min_max_size)),
+ RandomPaddingCrop(crop_size=train_crop_size), Normalize(
+ mean=mean, std=std)
+ ]
if random_horizontal_flip:
transforms.insert(0, RandomHorizontalFlip())
else:
# 验证/预测时的transforms
- long_size = (min(min_max_size) + max(min_max_size)) // 2
- transforms = [
- ResizeByLong(long_size=long_size), Normalize(
- mean=mean, std=std)
- ]
+ if min_max_size is None:
+ transforms = [Normalize(mean=mean, std=std)]
+ else:
+ long_size = (min(min_max_size) + max(min_max_size)) // 2
+ transforms = [
+ ResizeByLong(long_size=long_size), Normalize(
+ mean=mean, std=std)
+ ]
super(ComposedSegTransforms, self).__init__(transforms)
diff --git a/paddlex/deploy.py b/paddlex/deploy.py
index 0aee491ecdda1609b8827f94d0412a26bf053650..c5d114a230f83241df743166ad51bb04ad71f499 100644
--- a/paddlex/deploy.py
+++ b/paddlex/deploy.py
@@ -18,6 +18,8 @@ import numpy as np
import yaml
import paddlex
import paddle.fluid as fluid
+from paddlex.cv.transforms import build_transforms
+from paddlex.cv.models import BaseClassifier, YOLOv3, FasterRCNN, MaskRCNN, DeepLabv3p
class Predictor:
@@ -68,8 +70,8 @@ class Predictor:
to_rgb = True
else:
to_rgb = False
- self.transforms = self.build_transforms(self.info['Transforms'],
- to_rgb)
+ self.transforms = build_transforms(self.model_type,
+ self.info['Transforms'], to_rgb)
self.predictor = self.create_predictor(
use_gpu, gpu_id, use_mkl, use_trt, use_glog, memory_optimize)
@@ -105,77 +107,101 @@ class Predictor:
predictor = fluid.core.create_paddle_predictor(config)
return predictor
- def build_transforms(self, transforms_info, to_rgb=True):
- if self.model_type == "classifier":
- from paddlex.cls import transforms
- elif self.model_type == "detector":
- from paddlex.det import transforms
- elif self.model_type == "segmenter":
- from paddlex.seg import transforms
- op_list = list()
- for op_info in transforms_info:
- op_name = list(op_info.keys())[0]
- op_attr = op_info[op_name]
- if not hasattr(transforms, op_name):
- raise Exception(
- "There's no operator named '{}' in transforms of {}".
- format(op_name, self.model_type))
- op_list.append(getattr(transforms, op_name)(**op_attr))
- eval_transforms = transforms.Compose(op_list)
- if hasattr(eval_transforms, 'to_rgb'):
- eval_transforms.to_rgb = to_rgb
- self.arrange_transforms(eval_transforms)
- return eval_transforms
-
- def arrange_transforms(self, transforms):
- if self.model_type == 'classifier':
- arrange_transform = paddlex.cls.transforms.ArrangeClassifier
- elif self.model_type == 'segmenter':
- arrange_transform = paddlex.seg.transforms.ArrangeSegmenter
- elif self.model_type == 'detector':
- arrange_name = 'Arrange{}'.format(self.model_name)
- arrange_transform = getattr(paddlex.det.transforms, arrange_name)
- else:
- raise Exception("Unrecognized model type: {}".format(
- self.model_type))
- if type(transforms.transforms[-1]).__name__.startswith('Arrange'):
- transforms.transforms[-1] = arrange_transform(mode='test')
- else:
- transforms.transforms.append(arrange_transform(mode='test'))
-
- def preprocess(self, image):
+ def preprocess(self, image, thread_num=1):
""" 对图像做预处理
Args:
- image(str|np.ndarray): 图片路径或np.ndarray,如为后者,要求是BGR格式
+ image(list|tuple): 数组中的元素可以是图像路径,也可以是解码后的排列格式为(H,W,C)
+ 且类型为float32且为BGR格式的数组。
"""
res = dict()
if self.model_type == "classifier":
- im, = self.transforms(image)
- im = np.expand_dims(im, axis=0).copy()
+ im = BaseClassifier._preprocess(
+ image,
+ self.transforms,
+ self.model_type,
+ self.model_name,
+ thread_num=thread_num)
res['image'] = im
elif self.model_type == "detector":
if self.model_name == "YOLOv3":
- im, im_shape = self.transforms(image)
- im = np.expand_dims(im, axis=0).copy()
- im_shape = np.expand_dims(im_shape, axis=0).copy()
+ im, im_size = YOLOv3._preprocess(
+ image,
+ self.transforms,
+ self.model_type,
+ self.model_name,
+ thread_num=thread_num)
res['image'] = im
- res['im_size'] = im_shape
+ res['im_size'] = im_size
if self.model_name.count('RCNN') > 0:
- im, im_resize_info, im_shape = self.transforms(image)
- im = np.expand_dims(im, axis=0).copy()
- im_resize_info = np.expand_dims(im_resize_info, axis=0).copy()
- im_shape = np.expand_dims(im_shape, axis=0).copy()
+ im, im_resize_info, im_shape = FasterRCNN._preprocess(
+ image,
+ self.transforms,
+ self.model_type,
+ self.model_name,
+ thread_num=thread_num)
res['image'] = im
res['im_info'] = im_resize_info
res['im_shape'] = im_shape
elif self.model_type == "segmenter":
- im, im_info = self.transforms(image)
- im = np.expand_dims(im, axis=0).copy()
+ im, im_info = DeepLabv3p._preprocess(
+ image,
+ self.transforms,
+ self.model_type,
+ self.model_name,
+ thread_num=thread_num)
res['image'] = im
res['im_info'] = im_info
return res
+ def postprocess(self,
+ results,
+ topk=1,
+ batch_size=1,
+ im_shape=None,
+ im_info=None):
+ """ 对预测结果做后处理
+
+ Args:
+ results (list): 预测结果
+ topk (int): 分类预测时前k个最大值
+ batch_size (int): 预测时图像批量大小
+ im_shape (list): MaskRCNN的图像输入大小
+ im_info (list):RCNN系列和分割网络的原图大小
+ """
+
+ def offset_to_lengths(lod):
+ offset = lod[0]
+ lengths = [
+ offset[i + 1] - offset[i] for i in range(len(offset) - 1)
+ ]
+ return [lengths]
+
+ if self.model_type == "classifier":
+ true_topk = min(self.num_classes, topk)
+ preds = BaseClassifier._postprocess([results[0][0]], true_topk,
+ self.labels)
+ elif self.model_type == "detector":
+ res = {'bbox': (results[0][0], offset_to_lengths(results[0][1])), }
+ res['im_id'] = (np.array(
+ [[i] for i in range(batch_size)]).astype('int32'), [[]])
+ if self.model_name == "YOLOv3":
+ preds = YOLOv3._postprocess(res, batch_size, self.num_classes,
+ self.labels)
+ elif self.model_name == "FasterRCNN":
+ preds = FasterRCNN._postprocess(res, batch_size,
+ self.num_classes, self.labels)
+ elif self.model_name == "MaskRCNN":
+ res['mask'] = (results[1][0], offset_to_lengths(results[1][1]))
+ res['im_shape'] = (im_shape, [])
+ preds = MaskRCNN._postprocess(
+ res, batch_size, self.num_classes,
+ self.mask_head_resolution, self.labels)
+ elif self.model_type == "segmenter":
+ res = [results[0][0], results[1][0]]
+ preds = DeepLabv3p._postprocess(res, im_info)
+ return preds
+
def raw_predict(self, inputs):
""" 接受预处理过后的数据进行预测
@@ -193,82 +219,54 @@ class Predictor:
output_results = list()
for name in output_names:
output_tensor = self.predictor.get_output_tensor(name)
- output_results.append(output_tensor.copy_to_cpu())
+ output_tensor_lod = output_tensor.lod()
+ output_results.append(
+ [output_tensor.copy_to_cpu(), output_tensor_lod])
return output_results
- def classifier_postprocess(self, preds, topk=1):
- """ 对分类模型的预测结果做后处理
- """
- true_topk = min(self.num_classes, topk)
- pred_label = np.argsort(preds[0][0])[::-1][:true_topk]
- result = [{
- 'category_id': l,
- 'category': self.labels[l],
- 'score': preds[0][0, l],
- } for l in pred_label]
- return result
+ def predict(self, image, topk=1):
+ """ 图片预测
- def segmenter_postprocess(self, preds, preprocessed_inputs):
- """ 对语义分割结果做后处理
+ Args:
+ image(str|np.ndarray): 图像路径;或者是解码后的排列格式为(H, W, C)且类型为float32且为BGR格式的数组。
+ topk(int): 分类预测时使用,表示预测前topk的结果
"""
- label_map = np.squeeze(preds[0]).astype('uint8')
- score_map = np.squeeze(preds[1])
- score_map = np.transpose(score_map, (1, 2, 0))
- im_info = preprocessed_inputs['im_info']
- for info in im_info[::-1]:
- if info[0] == 'resize':
- w, h = info[1][1], info[1][0]
- label_map = cv2.resize(label_map, (w, h), cv2.INTER_NEAREST)
- score_map = cv2.resize(score_map, (w, h), cv2.INTER_LINEAR)
- elif info[0] == 'padding':
- w, h = info[1][1], info[1][0]
- label_map = label_map[0:h, 0:w]
- score_map = score_map[0:h, 0:w, :]
- else:
- raise Exception("Unexpected info '{}' in im_info".format(info[
- 0]))
- return {'label_map': label_map, 'score_map': score_map}
+ preprocessed_input = self.preprocess([image])
+ model_pred = self.raw_predict(preprocessed_input)
+ im_shape = None if 'im_shape' not in preprocessed_input else preprocessed_input[
+ 'im_shape']
+ im_info = None if 'im_info' not in preprocessed_input else preprocessed_input[
+ 'im_info']
+ results = self.postprocess(
+ model_pred,
+ topk=topk,
+ batch_size=1,
+ im_shape=im_shape,
+ im_info=im_info)
- def detector_postprocess(self, preds, preprocessed_inputs):
- """ 对目标检测和实例分割结果做后处理
- """
- bboxes = {'bbox': (np.array(preds[0]), [[len(preds[0])]])}
- bboxes['im_id'] = (np.array([[0]]).astype('int32'), [])
- clsid2catid = dict({i: i for i in range(self.num_classes)})
- xywh_results = paddlex.cv.models.utils.detection_eval.bbox2out(
- [bboxes], clsid2catid)
- results = list()
- for xywh_res in xywh_results:
- del xywh_res['image_id']
- xywh_res['category'] = self.labels[xywh_res['category_id']]
- results.append(xywh_res)
- if len(preds) > 1:
- im_shape = preprocessed_inputs['im_shape']
- bboxes['im_shape'] = (im_shape, [])
- bboxes['mask'] = (np.array(preds[1]), [[len(preds[1])]])
- segm_results = paddlex.cv.models.utils.detection_eval.mask2out(
- [bboxes], clsid2catid, self.mask_head_resolution)
- import pycocotools.mask as mask_util
- for i in range(len(results)):
- results[i]['mask'] = mask_util.decode(segm_results[i][
- 'segmentation'])
- return results
+ return results[0]
- def predict(self, image, topk=1, threshold=0.5):
+ def batch_predict(self, image_list, topk=1, thread_num=2):
""" 图片预测
Args:
- image(str|np.ndarray): 图片路径或np.ndarray格式,如果后者,要求为BGR输入格式
+ image_list(list|tuple): 对列表(或元组)中的图像同时进行预测,列表中的元素可以是图像路径
+ 也可以是解码后的排列格式为(H,W,C)且类型为float32且为BGR格式的数组。
+ thread_num (int): 并发执行各图像预处理时的线程数。
+
topk(int): 分类预测时使用,表示预测前topk的结果
"""
- preprocessed_input = self.preprocess(image)
+ preprocessed_input = self.preprocess(image_list)
model_pred = self.raw_predict(preprocessed_input)
+ im_shape = None if 'im_shape' not in preprocessed_input else preprocessed_input[
+ 'im_shape']
+ im_info = None if 'im_info' not in preprocessed_input else preprocessed_input[
+ 'im_info']
+ results = self.postprocess(
+ model_pred,
+ topk=topk,
+ batch_size=len(image_list),
+ im_shape=im_shape,
+ im_info=im_info)
- if self.model_type == "classifier":
- results = self.classifier_postprocess(model_pred, topk)
- elif self.model_type == "detector":
- results = self.detector_postprocess(model_pred, preprocessed_input)
- elif self.model_type == "segmenter":
- results = self.segmenter_postprocess(model_pred,
- preprocessed_input)
return results
diff --git a/paddlex/interpret/interpretation_predict.py b/paddlex/interpret/interpretation_predict.py
index 31b3b47e86613f62ba1c63b4ba2041357cc6bdc7..2ebe9a87d7fc80a6379331b7e4d0ef7c2da304bb 100644
--- a/paddlex/interpret/interpretation_predict.py
+++ b/paddlex/interpret/interpretation_predict.py
@@ -15,11 +15,17 @@
import numpy as np
import cv2
import copy
+import paddle.fluid as fluid
+from paddlex.cv.transforms import arrange_transforms
def interpretation_predict(model, images):
images = images.astype('float32')
- model.arrange_transforms(transforms=model.test_transforms, mode='test')
+ arrange_transforms(
+ model.model_type,
+ model.__class__.__name__,
+ transforms=model.test_transforms,
+ mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms.transforms)
model.test_transforms.transforms = model.test_transforms.transforms[-2:]
@@ -29,9 +35,11 @@ def interpretation_predict(model, images):
new_imgs.append(model.test_transforms(images[i])[0])
new_imgs = np.array(new_imgs)
- out = model.exe.run(model.test_prog,
- feed={'image': new_imgs},
- fetch_list=list(model.interpretation_feats.values()))
+ with fluid.scope_guard(model.scope):
+ out = model.exe.run(
+ model.test_prog,
+ feed={'image': new_imgs},
+ fetch_list=list(model.interpretation_feats.values()))
model.test_transforms.transforms = tmp_transforms
diff --git a/paddlex/interpret/visualize.py b/paddlex/interpret/visualize.py
index 6c3570b05d99f359452116542c82cb9a8cbc555b..2d7c096175ce0ff7f10c33696cac42a9f1a64e99 100644
--- a/paddlex/interpret/visualize.py
+++ b/paddlex/interpret/visualize.py
@@ -1,11 +1,11 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,6 +22,7 @@ from .interpretation_predict import interpretation_predict
from .core.interpretation import Interpretation
from .core.normlime_base import precompute_global_classifier
from .core._session_preparation import gen_user_home
+from paddlex.cv.transforms import arrange_transforms
def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'):
@@ -48,7 +49,11 @@ def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'):
'The interpretation only can deal with the Normal model')
if not osp.exists(save_dir):
os.makedirs(save_dir)
- model.arrange_transforms(transforms=model.test_transforms, mode='test')
+ arrange_transforms(
+ model.model_type,
+ model.__class__.__name__,
+ transforms=model.test_transforms,
+ mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms)
tmp_transforms.transforms = tmp_transforms.transforms[:-2]
img = tmp_transforms(img_file)[0]
@@ -94,7 +99,11 @@ def normlime(img_file,
'The interpretation only can deal with the Normal model')
if not osp.exists(save_dir):
os.makedirs(save_dir)
- model.arrange_transforms(transforms=model.test_transforms, mode='test')
+ arrange_transforms(
+ model.model_type,
+ model.__class__.__name__,
+ transforms=model.test_transforms,
+ mode='test')
tmp_transforms = copy.deepcopy(model.test_transforms)
tmp_transforms.transforms = tmp_transforms.transforms[:-2]
img = tmp_transforms(img_file)[0]
diff --git a/paddlex/tools/base.py b/paddlex/tools/base.py
index 94f9fa672f93154c963e75c47867368c42b535ca..fc5d04cb01dac37ac15976299dc50cac0cb7f5b6 100644
--- a/paddlex/tools/base.py
+++ b/paddlex/tools/base.py
@@ -40,4 +40,5 @@ def get_encoding(path):
f = open(path, 'rb')
data = f.read()
file_encoding = chardet.detect(data).get('encoding')
+ f.close()
return file_encoding
\ No newline at end of file
diff --git a/paddlex/tools/convert.py b/paddlex/tools/convert.py
index b2755f2fbb3ad2876df936612af9a12019523878..02f94ce4835260d0c2d7d86e9b4ec7b205f0943f 100644
--- a/paddlex/tools/convert.py
+++ b/paddlex/tools/convert.py
@@ -15,8 +15,10 @@
# limitations under the License.
from .x2imagenet import EasyData2ImageNet
+from .x2imagenet import JingLing2ImageNet
from .x2coco import LabelMe2COCO
from .x2coco import EasyData2COCO
+from .x2coco import JingLing2COCO
from .x2voc import LabelMe2VOC
from .x2voc import EasyData2VOC
from .x2seg import JingLing2Seg
@@ -24,10 +26,34 @@ from .x2seg import LabelMe2Seg
from .x2seg import EasyData2Seg
easydata2imagenet = EasyData2ImageNet().convert
+jingling2imagenet = JingLing2ImageNet().convert
labelme2coco = LabelMe2COCO().convert
easydata2coco = EasyData2COCO().convert
+jingling2coco = JingLing2COCO().convert
labelme2voc = LabelMe2VOC().convert
easydata2voc = EasyData2VOC().convert
jingling2seg = JingLing2Seg().convert
labelme2seg = LabelMe2Seg().convert
easydata2seg = EasyData2Seg().convert
+
+def dataset_conversion(source, to, pics, anns, save_dir):
+ if source == 'labelme' and to == 'PascalVOC':
+ labelme2voc(pics, anns, save_dir)
+ elif source == 'labelme' and to == 'MSCOCO':
+ labelme2coco(pics, anns, save_dir)
+ elif source == 'labelme' and to == 'SEG':
+ labelme2seg(pics, anns, save_dir)
+ elif source == 'jingling' and to == 'ImageNet':
+ jingling2imagenet(pics, anns, save_dir)
+ elif source == 'jingling' and to == 'MSCOCO':
+ jingling2coco(pics, anns, save_dir)
+ elif source == 'jingling' and to == 'SEG':
+ jingling2seg(pics, anns, save_dir)
+ elif source == 'easydata' and to == 'ImageNet':
+ easydata2imagenet(pics, anns, save_dir)
+ elif source == 'easydata' and to == 'PascalVOC':
+ easydata2voc(pics, anns, save_dir)
+ elif source == 'easydata' and to == 'MSCOCO':
+ easydata2coco(pics, anns, save_dir)
+ elif source == 'easydata' and to == 'SEG':
+ easydata2seg(pics, anns, save_dir)
\ No newline at end of file
diff --git a/paddlex/tools/x2coco.py b/paddlex/tools/x2coco.py
index 48a8b3d8ba4cc6a4261ad809d9e9c957390da40f..a90716084cccccd748e64effbd6a0cc92bccb0fa 100644
--- a/paddlex/tools/x2coco.py
+++ b/paddlex/tools/x2coco.py
@@ -22,6 +22,7 @@ import shutil
import numpy as np
import PIL.ImageDraw
from .base import MyEncoder, is_pic, get_encoding
+from paddlex.utils import path_normalization
class X2COCO(object):
@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
image["height"] = json_info["imageHeight"]
image["width"] = json_info["imageWidth"]
image["id"] = image_id + 1
+ json_info["imagePath"] = path_normalization(json_info["imagePath"])
image["file_name"] = osp.split(json_info["imagePath"])[-1]
return image
@@ -144,7 +146,7 @@ class LabelMe2COCO(X2COCO):
img_name_part = osp.splitext(img_file)[0]
json_file = osp.join(json_dir, img_name_part + ".json")
if not osp.exists(json_file):
- os.remove(os.remove(osp.join(image_dir, img_file)))
+ os.remove(osp.join(image_dir, img_file))
continue
image_id = image_id + 1
with open(json_file, mode='r', \
@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
image["height"] = img.shape[0]
image["width"] = img.shape[1]
image["id"] = image_id + 1
+ img_path = path_normalization(img_path)
image["file_name"] = osp.split(img_path)[-1]
return image
@@ -216,7 +219,7 @@ class EasyData2COCO(X2COCO):
img_name_part = osp.splitext(img_file)[0]
json_file = osp.join(json_dir, img_name_part + ".json")
if not osp.exists(json_file):
- os.remove(os.remove(osp.join(image_dir, img_file)))
+ os.remove(osp.join(image_dir, img_file))
continue
image_id = image_id + 1
with open(json_file, mode='r', \
@@ -255,3 +258,108 @@ class EasyData2COCO(X2COCO):
self.annotations_list.append(
self.generate_polygon_anns_field(points, segmentation, label, image_id, object_id,
label_to_num))
+
+
+class JingLing2COCO(X2COCO):
+ """将使用EasyData标注的检测或分割数据集转换为COCO数据集。
+ """
+ def __init__(self):
+ super(JingLing2COCO, self).__init__()
+
+ def generate_images_field(self, json_info, image_id):
+ image = {}
+ image["height"] = json_info["size"]["height"]
+ image["width"] = json_info["size"]["width"]
+ image["id"] = image_id + 1
+ json_info["path"] = path_normalization(json_info["path"])
+ image["file_name"] = osp.split(json_info["path"])[-1]
+ return image
+
+ def generate_polygon_anns_field(self, height, width,
+ points, label, image_id,
+ object_id, label_to_num):
+ annotation = {}
+ annotation["segmentation"] = [list(np.asarray(points).flatten())]
+ annotation["iscrowd"] = 0
+ annotation["image_id"] = image_id + 1
+ annotation["bbox"] = list(map(float, self.get_bbox(height, width, points)))
+ annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3]
+ annotation["category_id"] = label_to_num[label]
+ annotation["id"] = object_id + 1
+ return annotation
+
+ def get_bbox(self, height, width, points):
+ polygons = points
+ mask = np.zeros([height, width], dtype=np.uint8)
+ mask = PIL.Image.fromarray(mask)
+ xy = list(map(tuple, polygons))
+ PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
+ mask = np.array(mask, dtype=bool)
+ index = np.argwhere(mask == 1)
+ rows = index[:, 0]
+ clos = index[:, 1]
+ left_top_r = np.min(rows)
+ left_top_c = np.min(clos)
+ right_bottom_r = np.max(rows)
+ right_bottom_c = np.max(clos)
+ return [
+ left_top_c, left_top_r, right_bottom_c - left_top_c,
+ right_bottom_r - left_top_r
+ ]
+
+ def parse_json(self, img_dir, json_dir):
+ image_id = -1
+ object_id = -1
+ labels_list = []
+ label_to_num = {}
+ for img_file in os.listdir(img_dir):
+ img_name_part = osp.splitext(img_file)[0]
+ json_file = osp.join(json_dir, img_name_part + ".json")
+ if not osp.exists(json_file):
+ os.remove(osp.join(image_dir, img_file))
+ continue
+ image_id = image_id + 1
+ with open(json_file, mode='r', \
+ encoding=get_encoding(json_file)) as j:
+ json_info = json.load(j)
+ img_info = self.generate_images_field(json_info, image_id)
+ self.images_list.append(img_info)
+ anns_type = "bndbox"
+ for i, obj in enumerate(json_info["outputs"]["object"]):
+ if i == 0:
+ if "polygon" in obj:
+ anns_type = "polygon"
+ else:
+ if anns_type not in obj:
+ continue
+ object_id = object_id + 1
+ label = obj["name"]
+ if label not in labels_list:
+ self.categories_list.append(\
+ self.generate_categories_field(label, labels_list))
+ labels_list.append(label)
+ label_to_num[label] = len(labels_list)
+ if anns_type == "polygon":
+ points = []
+ for j in range(int(len(obj["polygon"]) / 2.0)):
+ points.append([obj["polygon"]["x" + str(j + 1)],
+ obj["polygon"]["y" + str(j + 1)]])
+ self.annotations_list.append(
+ self.generate_polygon_anns_field(json_info["size"]["height"],
+ json_info["size"]["width"],
+ points,
+ label,
+ image_id,
+ object_id,
+ label_to_num))
+ if anns_type == "bndbox":
+ points = []
+ points.append([obj["bndbox"]["xmin"], obj["bndbox"]["ymin"]])
+ points.append([obj["bndbox"]["xmax"], obj["bndbox"]["ymax"]])
+ points.append([obj["bndbox"]["xmin"], obj["bndbox"]["ymax"]])
+ points.append([obj["bndbox"]["xmax"], obj["bndbox"]["ymin"]])
+ self.annotations_list.append(
+ self.generate_rectangle_anns_field(points, label, image_id,
+ object_id, label_to_num))
+
+
\ No newline at end of file
diff --git a/paddlex/tools/x2imagenet.py b/paddlex/tools/x2imagenet.py
index 676eaec8d1193c230b01695c968e76536e8632e0..0b9de7c15e2effc2a90726f4c1a0ae964df36e86 100644
--- a/paddlex/tools/x2imagenet.py
+++ b/paddlex/tools/x2imagenet.py
@@ -22,9 +22,8 @@ import shutil
import numpy as np
from .base import MyEncoder, is_pic, get_encoding
-class EasyData2ImageNet(object):
- """将使用EasyData标注的分类数据集转换为COCO数据集。
- """
+
+class X2ImageNet(object):
def __init__(self):
pass
@@ -46,8 +45,8 @@ class EasyData2ImageNet(object):
continue
with open(json_file, mode="r", \
encoding=get_encoding(json_file)) as j:
- json_info = json.load(j)
- for output in json_info['labels']:
+ json_info = self.get_json_info(j)
+ for output in json_info:
cls_name = output['name']
new_image_dir = osp.join(dataset_save_dir, cls_name)
if not osp.exists(new_image_dir):
@@ -55,4 +54,28 @@ class EasyData2ImageNet(object):
if is_pic(img_name):
shutil.copyfile(
osp.join(image_dir, img_name),
- osp.join(new_image_dir, img_name))
\ No newline at end of file
+ osp.join(new_image_dir, img_name))
+
+
+class EasyData2ImageNet(X2ImageNet):
+ """将使用EasyData标注的分类数据集转换为ImageNet数据集。
+ """
+ def __init__(self):
+ super(EasyData2ImageNet, self).__init__()
+
+ def get_json_info(self, json_file):
+ json_info = json.load(json_file)
+ json_info = json_info['labels']
+ return json_info
+
+class JingLing2ImageNet(X2ImageNet):
+ """将使用标注精灵标注的分类数据集转换为ImageNet数据集。
+ """
+ def __init__(self):
+ super(X2ImageNet, self).__init__()
+
+ def get_json_info(self, json_file):
+ json_info = json.load(json_file)
+ json_info = json_info['outputs']['object']
+ return json_info
+
\ No newline at end of file
diff --git a/paddlex/utils/__init__.py b/paddlex/utils/__init__.py
index 19c86d754b9b99219fdbf7be4b5e7fa6cffe6346..2e7d1bb3899fd42490416c391ec8f60e54493b5f 100644
--- a/paddlex/utils/__init__.py
+++ b/paddlex/utils/__init__.py
@@ -17,6 +17,7 @@ from . import logging
from . import utils
from . import save
from .utils import seconds_to_hms
+from .utils import path_normalization
from .download import download
from .download import decompress
from .download import download_and_decompress
diff --git a/paddlex/utils/utils.py b/paddlex/utils/utils.py
index d9005875ea6c793269a8c67e065b69bd7100dbe8..6af574bb3403bb47f6f41dcb1223ec43407f8e92 100644
--- a/paddlex/utils/utils.py
+++ b/paddlex/utils/utils.py
@@ -20,6 +20,7 @@ import numpy as np
import six
import yaml
import math
+import platform
from . import logging
@@ -49,18 +50,26 @@ def get_environ_info():
info['num'] = fluid.core.get_cuda_device_count()
return info
+def path_normalization(path):
+ win_sep = "\\"
+ other_sep = "/"
+ if platform.system() == "Windows":
+ path = win_sep.join(path.split(other_sep))
+ else:
+ path = other_sep.join(path.split(win_sep))
+ return path
def parse_param_file(param_file, return_shape=True):
from paddle.fluid.proto.framework_pb2 import VarType
f = open(param_file, 'rb')
- version = np.fromstring(f.read(4), dtype='int32')
- lod_level = np.fromstring(f.read(8), dtype='int64')
+ version = np.frombuffer(f.read(4), dtype='int32')
+ lod_level = np.frombuffer(f.read(8), dtype='int64')
for i in range(int(lod_level)):
- _size = np.fromstring(f.read(8), dtype='int64')
+ _size = np.frombuffer(f.read(8), dtype='int64')
_ = f.read(_size)
- version = np.fromstring(f.read(4), dtype='int32')
+ version = np.frombuffer(f.read(4), dtype='int32')
tensor_desc = VarType.TensorDesc()
- tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
+ tensor_desc_size = np.frombuffer(f.read(4), dtype='int32')
tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
tensor_shape = tuple(tensor_desc.dims)
if return_shape:
diff --git a/requirements.txt b/requirements.txt
index 5a50e8b80c712ea98b488ed7a00ff01f906748a7..f7804c2e632fcc7cad515e42e325ba797222f81f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,5 @@ pycocotools
visualdl >= 2.0.0b
paddleslim == 1.0.1
shapely
+x2paddle
+paddlepaddle-gpu
diff --git a/setup.py b/setup.py
index 1f42da4da4099b6b651a41b65aaedde7b76093ca..1894367a645ec33979ec0db2843ecdb50a6a6380 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ long_description = "PaddlePaddle Entire Process Development Toolkit"
setuptools.setup(
name="paddlex",
- version='1.0.7',
+ version='1.1.0',
author="paddlex",
author_email="paddlex@baidu.com",
description=long_description,
diff --git a/tutorials/compress/README.md b/tutorials/compress/README.md
index 5bfcbd44ea63ac4f03932cb3b476e2a47f054f50..e85b4485143bcaa6b0f8cc7845eaa05ca17e83c8 100644
--- a/tutorials/compress/README.md
+++ b/tutorials/compress/README.md
@@ -22,9 +22,9 @@ PaddleX提供了两种裁剪训练方式,
# 训练模型
python classification/mobilenetv2.py
# 计算模型参数敏感度
-python classification/cal_sensitivities_file.py --model_dir=output/mobilenetv2/epoch_10 --save_file=./sensitivities.data
+python classification/cal_sensitivities_file.py --model_dir=output/mobilenetv2/best_model --save_file=./sensitivities.data
# 裁剪训练
-python classification/mobilenetv2.py --model_dir=output/mobilenetv2/epoch_10 --sensitivities_file=./sensitivities.data --eval_metric_loss=0.05
+python classification/mobilenetv2.py --model_dir=output/mobilenetv2/best_model --sensitivities_file=./sensitivities.data --eval_metric_loss=0.05
```
2. 第2种方法,使用PaddleX预先计算好的参数敏感度文件
```
diff --git a/tutorials/compress/classification/README.md b/tutorials/compress/classification/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e711c1906366ad4d66798ccf9ef95b79804b5805
--- /dev/null
+++ b/tutorials/compress/classification/README.md
@@ -0,0 +1,3 @@
+# 说明
+
+使用方式见[文档](../README.md)
diff --git a/tutorials/compress/detection/README.md b/tutorials/compress/detection/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e711c1906366ad4d66798ccf9ef95b79804b5805
--- /dev/null
+++ b/tutorials/compress/detection/README.md
@@ -0,0 +1,3 @@
+# 说明
+
+使用方式见[文档](../README.md)
diff --git a/tutorials/compress/segmentation/README.md b/tutorials/compress/segmentation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e711c1906366ad4d66798ccf9ef95b79804b5805
--- /dev/null
+++ b/tutorials/compress/segmentation/README.md
@@ -0,0 +1,3 @@
+# 说明
+
+使用方式见[文档](../README.md)
diff --git a/tutorials/interpret/lime.py b/tutorials/interpret/lime.py
index ae862aa9e41f4ad95c335c8e2a6de5a3b76a4ea2..8028b50e12a470a00cbd41e16029cfe07c7a94d8 100644
--- a/tutorials/interpret/lime.py
+++ b/tutorials/interpret/lime.py
@@ -1,8 +1,3 @@
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-import os.path as osp
import paddlex as pdx
# 下载和解压Imagenet果蔬分类数据集
@@ -18,6 +13,4 @@ model = pdx.load_model('mini_imagenet_veg_mobilenetv2')
# 可解释性可视化
pdx.interpret.lime(
- 'mini_imagenet_veg/mushroom/n07734744_1106.JPEG',
- model,
- save_dir='./')
+ 'mini_imagenet_veg/mushroom/n07734744_1106.JPEG', model, save_dir='./')
diff --git a/tutorials/train/image_classification/alexnet.py b/tutorials/train/image_classification/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..bec066962abd8955f6021c8d578e6543eefa0a70
--- /dev/null
+++ b/tutorials/train/image_classification/alexnet.py
@@ -0,0 +1,59 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/cls_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomCrop(crop_size=224),
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize()
+])
+eval_transforms = transforms.Compose([
+ transforms.ResizeByShort(short_size=256),
+ transforms.CenterCrop(crop_size=224),
+ transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-imagenet
+train_dataset = pdx.datasets.ImageNet(
+ data_dir='vegetables_cls',
+ file_list='vegetables_cls/train_list.txt',
+ label_list='vegetables_cls/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+ data_dir='vegetables_cls',
+ file_list='vegetables_cls/val_list.txt',
+ label_list='vegetables_cls/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001或https://localhost:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+model = pdx.cls.AlexNet(num_classes=len(train_dataset.labels))
+# AlexNet需要指定确定的input_shape
+model.fixed_input_shape = [224, 224]
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/classification.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=10,
+ train_dataset=train_dataset,
+ train_batch_size=32,
+ eval_dataset=eval_dataset,
+ lr_decay_epochs=[4, 6, 8],
+ learning_rate=0.0025,
+ save_dir='output/alexnet',
+ use_vdl=True)
diff --git a/tutorials/train/classification/mobilenetv2.py b/tutorials/train/image_classification/mobilenetv2.py
similarity index 71%
rename from tutorials/train/classification/mobilenetv2.py
rename to tutorials/train/image_classification/mobilenetv2.py
index 3f637125b760de6d992d6a062e4d456bf5038426..7533aab7bc0fc2498d17fd1bd554f595253c05b8 100644
--- a/tutorials/train/classification/mobilenetv2.py
+++ b/tutorials/train/image_classification/mobilenetv2.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.cls import transforms
@@ -10,18 +11,20 @@ veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
+# API说明https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/cls_transforms.html
train_transforms = transforms.Compose([
- transforms.RandomCrop(crop_size=224),
+ transforms.RandomCrop(crop_size=224),
transforms.RandomHorizontalFlip(),
transforms.Normalize()
])
eval_transforms = transforms.Compose([
transforms.ResizeByShort(short_size=256),
- transforms.CenterCrop(crop_size=224),
+ transforms.CenterCrop(crop_size=224),
transforms.Normalize()
])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
@@ -40,6 +43,9 @@ eval_dataset = pdx.datasets.ImageNet(
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/classification.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=10,
train_dataset=train_dataset,
diff --git a/tutorials/train/image_classification/mobilenetv3_small_ssld.py b/tutorials/train/image_classification/mobilenetv3_small_ssld.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f13312d835b582ec673635f11b4c3fff1c95dda
--- /dev/null
+++ b/tutorials/train/image_classification/mobilenetv3_small_ssld.py
@@ -0,0 +1,57 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/cls_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomCrop(crop_size=224),
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize()
+])
+eval_transforms = transforms.Compose([
+ transforms.ResizeByShort(short_size=256),
+ transforms.CenterCrop(crop_size=224),
+ transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-imagenet
+train_dataset = pdx.datasets.ImageNet(
+ data_dir='vegetables_cls',
+ file_list='vegetables_cls/train_list.txt',
+ label_list='vegetables_cls/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+ data_dir='vegetables_cls',
+ file_list='vegetables_cls/val_list.txt',
+ label_list='vegetables_cls/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+model = pdx.cls.MobileNetV3_small_ssld(num_classes=len(train_dataset.labels))
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-imagenet
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=10,
+ train_dataset=train_dataset,
+ train_batch_size=32,
+ eval_dataset=eval_dataset,
+ lr_decay_epochs=[4, 6, 8],
+ learning_rate=0.025,
+ save_dir='output/mobilenetv3_small_ssld',
+ use_vdl=True)
diff --git a/tutorials/train/image_classification/resnet50_vd_ssld.py b/tutorials/train/image_classification/resnet50_vd_ssld.py
new file mode 100644
index 0000000000000000000000000000000000000000..b72ebc52d74f6a0023b830c33f5afc31fb4b7196
--- /dev/null
+++ b/tutorials/train/image_classification/resnet50_vd_ssld.py
@@ -0,0 +1,57 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/cls_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomCrop(crop_size=224),
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize()
+])
+eval_transforms = transforms.Compose([
+ transforms.ResizeByShort(short_size=256),
+ transforms.CenterCrop(crop_size=224),
+ transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-imagenet
+train_dataset = pdx.datasets.ImageNet(
+ data_dir='vegetables_cls',
+ file_list='vegetables_cls/train_list.txt',
+ label_list='vegetables_cls/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+ data_dir='vegetables_cls',
+ file_list='vegetables_cls/val_list.txt',
+ label_list='vegetables_cls/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+model = pdx.cls.ResNet50_vd_ssld(num_classes=len(train_dataset.labels))
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/classification.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=10,
+ train_dataset=train_dataset,
+ train_batch_size=32,
+ eval_dataset=eval_dataset,
+ lr_decay_epochs=[4, 6, 8],
+ learning_rate=0.025,
+ save_dir='output/resnet50_vd_ssld',
+ use_vdl=True)
diff --git a/tutorials/train/classification/resnet50.py b/tutorials/train/image_classification/shufflenetv2.py
similarity index 55%
rename from tutorials/train/classification/resnet50.py
rename to tutorials/train/image_classification/shufflenetv2.py
index 2e5a9b4820c7e66a83abaca0b13e057b15ceb830..cdfa1889ba926f4728277929b76536ddaea75c04 100644
--- a/tutorials/train/classification/resnet50.py
+++ b/tutorials/train/image_classification/shufflenetv2.py
@@ -1,8 +1,8 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddle.fluid as fluid
from paddlex.cls import transforms
import paddlex as pdx
@@ -11,16 +11,20 @@ veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
-train_transforms = transforms.Compose(
- [transforms.RandomCrop(crop_size=224),
- transforms.Normalize()])
+# API说明https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/cls_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomCrop(crop_size=224),
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize()
+])
eval_transforms = transforms.Compose([
transforms.ResizeByShort(short_size=256),
- transforms.CenterCrop(crop_size=224),
+ transforms.CenterCrop(crop_size=224),
transforms.Normalize()
])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
@@ -33,26 +37,21 @@ eval_dataset = pdx.datasets.ImageNet(
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
-# PaddleX支持自定义构建优化器
-step_each_epoch = train_dataset.num_samples // 32
-learning_rate = fluid.layers.cosine_decay(
- learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
-optimizer = fluid.optimizer.Momentum(
- learning_rate=learning_rate,
- momentum=0.9,
- regularization=fluid.regularizer.L2Decay(4e-5))
-
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
-model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
+model = pdx.cls.ShuffleNetV2(num_classes=len(train_dataset.labels))
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/classification.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
- optimizer=optimizer,
- save_dir='output/resnet50',
+ lr_decay_epochs=[4, 6, 8],
+ learning_rate=0.025,
+ save_dir='output/shufflenetv2',
use_vdl=True)
diff --git a/tutorials/train/detection/mask_rcnn_r50_fpn.py b/tutorials/train/instance_segmentation/mask_rcnn_hrnet_fpn.py
similarity index 63%
rename from tutorials/train/detection/mask_rcnn_r50_fpn.py
rename to tutorials/train/instance_segmentation/mask_rcnn_hrnet_fpn.py
index 15a6b840528fe7948c80f4cf605498cf55b5c918..f78446546cd793f96cb074f0a1701a718f7d84b4 100644
--- a/tutorials/train/detection/mask_rcnn_r50_fpn.py
+++ b/tutorials/train/instance_segmentation/mask_rcnn_hrnet_fpn.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
@@ -10,20 +11,22 @@ xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_de
pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),
+ transforms.RandomHorizontalFlip(),
transforms.Normalize(),
- transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
eval_transforms = transforms.Compose([
transforms.Normalize(),
transforms.ResizeByShort(short_size=800, max_size=1333),
- transforms.Padding(coarsest_stride=32)
+ transforms.Padding(coarsest_stride=32),
])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-cocodetection
train_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/train.json',
@@ -41,7 +44,12 @@ eval_dataset = pdx.datasets.CocoDetection(
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
num_classes = len(train_dataset.labels) + 1
-model = pdx.det.MaskRCNN(num_classes=num_classes)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/instance_segmentation.html#maskrcnn
+model = pdx.det.MaskRCNN(num_classes=num_classes, backbone='HRNet_W18')
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/instance_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=12,
train_dataset=train_dataset,
@@ -50,5 +58,5 @@ model.train(
learning_rate=0.00125,
warmup_steps=10,
lr_decay_epochs=[8, 11],
- save_dir='output/mask_rcnn_r50_fpn',
+ save_dir='output/mask_rcnn_hrnet_fpn',
use_vdl=True)
diff --git a/tutorials/train/instance_segmentation/mask_rcnn_r18_fpn.py b/tutorials/train/instance_segmentation/mask_rcnn_r18_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc16b66b3941e0d639fd45dbaa691ec51bc5cfbd
--- /dev/null
+++ b/tutorials/train/instance_segmentation/mask_rcnn_r18_fpn.py
@@ -0,0 +1,62 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压小度熊分拣数据集
+xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
+pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-cocodetection
+train_dataset = pdx.datasets.CocoDetection(
+ data_dir='xiaoduxiong_ins_det/JPEGImages',
+ ann_file='xiaoduxiong_ins_det/train.json',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.CocoDetection(
+ data_dir='xiaoduxiong_ins_det/JPEGImages',
+ ann_file='xiaoduxiong_ins_det/val.json',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+num_classes = len(train_dataset.labels) + 1
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/instance_segmentation.html#maskrcnn
+model = pdx.det.MaskRCNN(num_classes=num_classes, backbone='ResNet18')
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/instance_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=12,
+ train_dataset=train_dataset,
+ train_batch_size=1,
+ eval_dataset=eval_dataset,
+ learning_rate=0.00125,
+ warmup_steps=10,
+ lr_decay_epochs=[8, 11],
+ save_dir='output/mask_rcnn_r18_fpn',
+ use_vdl=True)
diff --git a/tutorials/train/instance_segmentation/mask_rcnn_r50_fpn.py b/tutorials/train/instance_segmentation/mask_rcnn_r50_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..e87c88e5d8feba36df1bd65430058a4f413ba73c
--- /dev/null
+++ b/tutorials/train/instance_segmentation/mask_rcnn_r50_fpn.py
@@ -0,0 +1,62 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压小度熊分拣数据集
+xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
+pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-cocodetection
+train_dataset = pdx.datasets.CocoDetection(
+ data_dir='xiaoduxiong_ins_det/JPEGImages',
+ ann_file='xiaoduxiong_ins_det/train.json',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.CocoDetection(
+ data_dir='xiaoduxiong_ins_det/JPEGImages',
+ ann_file='xiaoduxiong_ins_det/val.json',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+num_classes = len(train_dataset.labels) + 1
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/instance_segmentation.html#maskrcnn
+model = pdx.det.MaskRCNN(num_classes=num_classes, backbone='ResNet50')
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/instance_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=12,
+ train_dataset=train_dataset,
+ train_batch_size=1,
+ eval_dataset=eval_dataset,
+ learning_rate=0.00125,
+ warmup_steps=10,
+ lr_decay_epochs=[8, 11],
+ save_dir='output/mask_rcnn_r50_fpn',
+ use_vdl=True)
diff --git a/tutorials/train/object_detection/faster_rcnn_hrnet_fpn.py b/tutorials/train/object_detection/faster_rcnn_hrnet_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..e46d3ae56b57aa90cdcecdcce3ad3ee1ad67d098
--- /dev/null
+++ b/tutorials/train/object_detection/faster_rcnn_hrnet_fpn.py
@@ -0,0 +1,63 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
+train_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/train_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/val_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+num_classes = len(train_dataset.labels) + 1
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-fasterrcnn
+model = pdx.det.FasterRCNN(num_classes=num_classes, backbone='HRNet_W18')
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#id1
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=12,
+ train_dataset=train_dataset,
+ train_batch_size=2,
+ eval_dataset=eval_dataset,
+ learning_rate=0.0025,
+ lr_decay_epochs=[8, 11],
+ save_dir='output/faster_rcnn_hrnet_fpn',
+ use_vdl=True)
diff --git a/tutorials/train/object_detection/faster_rcnn_r18_fpn.py b/tutorials/train/object_detection/faster_rcnn_r18_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ae82d3ec8166159649f09d33b3f2ad094c3c6ee
--- /dev/null
+++ b/tutorials/train/object_detection/faster_rcnn_r18_fpn.py
@@ -0,0 +1,63 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Normalize(),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.Padding(coarsest_stride=32),
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
+train_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/train_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/val_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+num_classes = len(train_dataset.labels) + 1
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-fasterrcnn
+model = pdx.det.FasterRCNN(num_classes=num_classes, backbone='ResNet18')
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#id1
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=12,
+ train_dataset=train_dataset,
+ train_batch_size=2,
+ eval_dataset=eval_dataset,
+ learning_rate=0.0025,
+ lr_decay_epochs=[8, 11],
+ save_dir='output/faster_rcnn_r50_fpn',
+ use_vdl=True)
diff --git a/tutorials/train/detection/faster_rcnn_r50_fpn.py b/tutorials/train/object_detection/faster_rcnn_r50_fpn.py
similarity index 67%
rename from tutorials/train/detection/faster_rcnn_r50_fpn.py
rename to tutorials/train/object_detection/faster_rcnn_r50_fpn.py
index cbe6dabe535b5972418349ac31576b344652e69d..0f26bfa9a5c571419c5b4b2f6e553f383d011399 100644
--- a/tutorials/train/detection/faster_rcnn_r50_fpn.py
+++ b/tutorials/train/object_detection/faster_rcnn_r50_fpn.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
@@ -10,10 +11,11 @@ insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),
+ transforms.RandomHorizontalFlip(),
transforms.Normalize(),
- transforms.ResizeByShort(short_size=800, max_size=1333),
+ transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
@@ -24,6 +26,7 @@ eval_transforms = transforms.Compose([
])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
@@ -43,7 +46,12 @@ eval_dataset = pdx.datasets.VOCDetection(
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
num_classes = len(train_dataset.labels) + 1
-model = pdx.det.FasterRCNN(num_classes=num_classes)
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-fasterrcnn
+model = pdx.det.FasterRCNN(num_classes=num_classes, backbone='ResNet50')
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#id1
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=12,
train_dataset=train_dataset,
diff --git a/tutorials/train/object_detection/yolov3_darknet53.py b/tutorials/train/object_detection/yolov3_darknet53.py
new file mode 100644
index 0000000000000000000000000000000000000000..085be4bf7ffa3f9eca31f3b2807d83f00544b455
--- /dev/null
+++ b/tutorials/train/object_detection/yolov3_darknet53.py
@@ -0,0 +1,64 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
+train_transforms = transforms.Compose([
+ transforms.MixupImage(mixup_epoch=250),
+ transforms.RandomDistort(),
+ transforms.RandomExpand(),
+ transforms.RandomCrop(),
+ transforms.Resize(target_size=608, interp='RANDOM'),
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize()
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Resize(target_size=608, interp='CUBIC'),
+ transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
+train_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/train_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/val_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+num_classes = len(train_dataset.labels)
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-yolov3
+model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=270,
+ train_dataset=train_dataset,
+ train_batch_size=8,
+ eval_dataset=eval_dataset,
+ learning_rate=0.000125,
+ lr_decay_epochs=[210, 240],
+ save_dir='output/yolov3_darknet53',
+ use_vdl=True)
diff --git a/tutorials/train/detection/yolov3_darknet53.py b/tutorials/train/object_detection/yolov3_mobilenetv1.py
similarity index 69%
rename from tutorials/train/detection/yolov3_darknet53.py
rename to tutorials/train/object_detection/yolov3_mobilenetv1.py
index c38656b04e9a35cd033dc583811c58aa8baafba2..bfc2bea0716c1bc0b7c27cb8014d6215eed8306c 100644
--- a/tutorials/train/detection/yolov3_darknet53.py
+++ b/tutorials/train/object_detection/yolov3_mobilenetv1.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
@@ -10,6 +11,7 @@ insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
train_transforms = transforms.Compose([
transforms.MixupImage(mixup_epoch=250),
transforms.RandomDistort(),
@@ -26,6 +28,7 @@ eval_transforms = transforms.Compose([
])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
@@ -44,7 +47,12 @@ eval_dataset = pdx.datasets.VOCDetection(
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
num_classes = len(train_dataset.labels)
-model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-yolov3
+model = pdx.det.YOLOv3(num_classes=num_classes, backbone='MobileNetV1')
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=270,
train_dataset=train_dataset,
@@ -52,5 +60,5 @@ model.train(
eval_dataset=eval_dataset,
learning_rate=0.000125,
lr_decay_epochs=[210, 240],
- save_dir='output/yolov3_darknet53',
+ save_dir='output/yolov3_mobilenetv1',
use_vdl=True)
diff --git a/tutorials/train/object_detection/yolov3_mobilenetv3.py b/tutorials/train/object_detection/yolov3_mobilenetv3.py
new file mode 100644
index 0000000000000000000000000000000000000000..85570781851665a9ab28a718ecf85a0b078508a3
--- /dev/null
+++ b/tutorials/train/object_detection/yolov3_mobilenetv3.py
@@ -0,0 +1,64 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
+train_transforms = transforms.Compose([
+ transforms.MixupImage(mixup_epoch=250),
+ transforms.RandomDistort(),
+ transforms.RandomExpand(),
+ transforms.RandomCrop(),
+ transforms.Resize(target_size=608, interp='RANDOM'),
+ transforms.RandomHorizontalFlip(),
+ transforms.Normalize()
+])
+
+eval_transforms = transforms.Compose([
+ transforms.Resize(target_size=608, interp='CUBIC'),
+ transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
+train_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/train_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+ data_dir='insect_det',
+ file_list='insect_det/val_list.txt',
+ label_list='insect_det/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+num_classes = len(train_dataset.labels)
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-yolov3
+model = pdx.det.YOLOv3(num_classes=num_classes, backbone='MobileNetV3_large')
+
+# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=270,
+ train_dataset=train_dataset,
+ train_batch_size=8,
+ eval_dataset=eval_dataset,
+ learning_rate=0.000125,
+ lr_decay_epochs=[210, 240],
+ save_dir='output/yolov3_mobilenetv3',
+ use_vdl=True)
diff --git a/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2.py b/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc5b738a0641604f28fd83a47b795313c13bcd39
--- /dev/null
+++ b/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2.py
@@ -0,0 +1,62 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+# 下载和解压视盘分割数据集
+optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
+pdx.utils.download_and_decompress(optic_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/seg_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.ResizeRangeScaling(),
+ transforms.RandomPaddingCrop(crop_size=512),
+ transforms.Normalize()
+])
+
+eval_transforms = transforms.Compose([
+ transforms.ResizeByLong(long_size=512),
+ transforms.Padding(target_size=512),
+ transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-segdataset
+train_dataset = pdx.datasets.SegDataset(
+ data_dir='optic_disc_seg',
+ file_list='optic_disc_seg/train_list.txt',
+ label_list='optic_disc_seg/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.SegDataset(
+ data_dir='optic_disc_seg',
+ file_list='optic_disc_seg/val_list.txt',
+ label_list='optic_disc_seg/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+num_classes = len(train_dataset.labels)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#paddlex-seg-deeplabv3p
+model = pdx.seg.DeepLabv3p(num_classes=num_classes, backbone='MobileNetV2_x1.0')
+
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=40,
+ train_dataset=train_dataset,
+ train_batch_size=4,
+ eval_dataset=eval_dataset,
+ learning_rate=0.01,
+ save_dir='output/deeplabv3p_mobilenetv2',
+ use_vdl=True)
diff --git a/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2_x0.25.py b/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2_x0.25.py
new file mode 100644
index 0000000000000000000000000000000000000000..51bdb3ded85453fd48cce0b4845742421b949f06
--- /dev/null
+++ b/tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2_x0.25.py
@@ -0,0 +1,61 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+# 下载和解压视盘分割数据集
+optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
+pdx.utils.download_and_decompress(optic_dataset, path='./')
+
+# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/seg_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.ResizeRangeScaling(),
+ transforms.RandomPaddingCrop(crop_size=512),
+ transforms.Normalize()
+])
+
+eval_transforms = transforms.Compose([
+ transforms.ResizeByLong(long_size=512),
+ transforms.Padding(target_size=512),
+ transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-segdataset
+train_dataset = pdx.datasets.SegDataset(
+ data_dir='optic_disc_seg',
+ file_list='optic_disc_seg/train_list.txt',
+ label_list='optic_disc_seg/labels.txt',
+ transforms=train_transforms,
+ shuffle=True)
+eval_dataset = pdx.datasets.SegDataset(
+ data_dir='optic_disc_seg',
+ file_list='optic_disc_seg/val_list.txt',
+ label_list='optic_disc_seg/labels.txt',
+ transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+num_classes = len(train_dataset.labels)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#paddlex-seg-deeplabv3p
+model = pdx.seg.DeepLabv3p(num_classes=num_classes, backbone='MobileNetV2_x0.25')
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
+model.train(
+ num_epochs=40,
+ train_dataset=train_dataset,
+ train_batch_size=4,
+ eval_dataset=eval_dataset,
+ learning_rate=0.01,
+ save_dir='output/deeplabv3p_mobilenetv2_x0_25',
+ use_vdl=True)
diff --git a/tutorials/train/segmentation/deeplabv3p.py b/tutorials/train/semantic_segmentation/deeplabv3p_xception65.py
similarity index 55%
rename from tutorials/train/segmentation/deeplabv3p.py
rename to tutorials/train/semantic_segmentation/deeplabv3p_xception65.py
index 346a229a358a76830112acfd596740c070822874..4101c98f7d0e4f61dd0709d608e052615560fffc 100644
--- a/tutorials/train/segmentation/deeplabv3p.py
+++ b/tutorials/train/semantic_segmentation/deeplabv3p_xception65.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
@@ -10,17 +11,22 @@ optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/seg_transforms.html
train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),
- transforms.Resize(target_size=512),
- transforms.RandomPaddingCrop(crop_size=500),
+ transforms.RandomHorizontalFlip(),
+ transforms.ResizeRangeScaling(),
+ transforms.RandomPaddingCrop(crop_size=512),
transforms.Normalize()
])
-eval_transforms = transforms.Compose(
- [transforms.Resize(512), transforms.Normalize()])
+eval_transforms = transforms.Compose([
+ transforms.ResizeByLong(long_size=512),
+ transforms.Padding(target_size=512),
+ transforms.Normalize()
+])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
@@ -39,12 +45,17 @@ eval_dataset = pdx.datasets.SegDataset(
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
num_classes = len(train_dataset.labels)
-model = pdx.seg.DeepLabv3p(num_classes=num_classes)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#paddlex-seg-deeplabv3p
+model = pdx.seg.DeepLabv3p(num_classes=num_classes, backbone='Xception65')
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=40,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
- save_dir='output/deeplab',
+ save_dir='output/deeplabv3p_mobilenetv2',
use_vdl=True)
diff --git a/tutorials/train/segmentation/fast_scnn.py b/tutorials/train/semantic_segmentation/fast_scnn.py
similarity index 56%
rename from tutorials/train/segmentation/fast_scnn.py
rename to tutorials/train/semantic_segmentation/fast_scnn.py
index 9c48d31eda7b612243e65df124b51722c4ea59e4..38fa51a7ab6242795dfd16c322d004b733e62a74 100644
--- a/tutorials/train/segmentation/fast_scnn.py
+++ b/tutorials/train/semantic_segmentation/fast_scnn.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
@@ -10,13 +11,22 @@ optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
-train_transforms = transforms.ComposedSegTransforms(
- mode='train', train_crop_size=[769, 769])
-eval_transforms = transforms.ComposedSegTransforms(mode='eval')
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/seg_transforms.html
+train_transforms = transforms.Compose([
+ transforms.RandomHorizontalFlip(),
+ transforms.ResizeRangeScaling(),
+ transforms.RandomPaddingCrop(crop_size=512),
+ transforms.Normalize()
+])
+
+eval_transforms = transforms.Compose([
+ transforms.ResizeByLong(long_size=512),
+ transforms.Padding(target_size=512),
+ transforms.Normalize()
+])
# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
@@ -35,9 +45,13 @@ eval_dataset = pdx.datasets.SegDataset(
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
-# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#fastscnn
num_classes = len(train_dataset.labels)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#paddlex-seg-fastscnn
model = pdx.seg.FastSCNN(num_classes=num_classes)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=20,
train_dataset=train_dataset,
diff --git a/tutorials/train/segmentation/hrnet.py b/tutorials/train/semantic_segmentation/hrnet.py
similarity index 60%
rename from tutorials/train/segmentation/hrnet.py
rename to tutorials/train/semantic_segmentation/hrnet.py
index f887b78c3ae16ae66235f1965ada8bd2355d62c6..9526e99b352eee73ca3ee4d308ec9fe36250f7d1 100644
--- a/tutorials/train/segmentation/hrnet.py
+++ b/tutorials/train/semantic_segmentation/hrnet.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
@@ -10,17 +11,22 @@ optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/seg_transforms.html
train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(),
- transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize()
+ transforms.RandomHorizontalFlip(),
+ transforms.ResizeRangeScaling(),
+ transforms.RandomPaddingCrop(crop_size=512),
+ transforms.Normalize()
])
eval_transforms = transforms.Compose([
- transforms.ResizeByLong(long_size=512),
- transforms.Padding(target_size=512), transforms.Normalize()
+ transforms.ResizeByLong(long_size=512),
+ transforms.Padding(target_size=512),
+ transforms.Normalize()
])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
@@ -39,7 +45,12 @@ eval_dataset = pdx.datasets.SegDataset(
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
num_classes = len(train_dataset.labels)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#paddlex-seg-hrnet
model = pdx.seg.HRNet(num_classes=num_classes)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=20,
train_dataset=train_dataset,
diff --git a/tutorials/train/segmentation/unet.py b/tutorials/train/semantic_segmentation/unet.py
similarity index 63%
rename from tutorials/train/segmentation/unet.py
rename to tutorials/train/semantic_segmentation/unet.py
index a683af98322eacb9d0775b3a5256d900f5743bb2..c0ba72666d4b386667cc747077916eaf251675a9 100644
--- a/tutorials/train/segmentation/unet.py
+++ b/tutorials/train/semantic_segmentation/unet.py
@@ -1,5 +1,6 @@
+# 环境变量配置,用于控制是否使用GPU
+# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
import os
-# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
@@ -10,20 +11,21 @@ optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
+# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/seg_transforms.html
train_transforms = transforms.Compose([
- transforms.RandomHorizontalFlip(),
+ transforms.RandomHorizontalFlip(),
transforms.ResizeRangeScaling(),
- transforms.RandomPaddingCrop(crop_size=512),
+ transforms.RandomPaddingCrop(crop_size=512),
transforms.Normalize()
])
eval_transforms = transforms.Compose([
- transforms.ResizeByLong(long_size=512),
- transforms.Padding(target_size=512),
+ transforms.ResizeByLong(long_size=512), transforms.Padding(target_size=512),
transforms.Normalize()
])
# 定义训练和验证所用的数据集
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
@@ -42,7 +44,12 @@ eval_dataset = pdx.datasets.SegDataset(
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
num_classes = len(train_dataset.labels)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#paddlex-seg-deeplabv3p
model = pdx.seg.UNet(num_classes=num_classes)
+
+# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#train
+# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
model.train(
num_epochs=20,
train_dataset=train_dataset,