提交 4b52b8df 编写于 作者: D Dmitry Kurtaev

Layers for fast-neural-style models: https://github.com/jcjohnson/fast-neural-style

上级 60cbc46d
......@@ -377,6 +377,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* starting from the first one. The rest of dimensions won't
* be padded.
* @param value Value to be padded. Defaults to zero.
* @param type Padding type: 'constant', 'reflect'
* @param input_dims Torch's parameter. If @p input_dims is not equal to the
* actual input dimensionality then the `[0]th` dimension
* is considered as a batch dimension and @p paddings are shifted
......
......@@ -112,16 +112,12 @@ static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const
static inline Mat getPlane(const Mat &m, int n, int cn)
{
CV_Assert(m.dims > 2);
Range range[CV_MAX_DIM];
int sz[CV_MAX_DIM];
for(int i = 2; i < m.dims; i++)
{
sz[i-2] = m.size.p[i];
range[i] = Range::all();
}
range[0] = Range(n, n+1);
range[1] = Range(cn, cn+1);
return m(range).reshape(1, m.dims-2, sz);
return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));
}
static inline MatShape shape(const int* dims, const int n = 4)
......@@ -191,6 +187,14 @@ inline int clamp(int ax, const MatShape& shape)
return clamp(ax, (int)shape.size());
}
inline Range clamp(const Range& r, int axisSize)
{
Range clamped(std::max(r.start, 0),
r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);
CV_Assert(clamped.start < clamped.end, clamped.end <= axisSize);
return clamped;
}
CV__DNN_EXPERIMENTAL_NS_END
}
}
......
......@@ -10,6 +10,7 @@ Implementation of padding layer, which adds paddings to input blob.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "op_halide.hpp"
#include <vector>
......@@ -26,6 +27,7 @@ public:
setParamsFrom(params);
paddingValue = params.get<float>("value", 0);
inputDims = params.get<int>("input_dims", -1);
paddingType = params.get<String>("type", "constant");
CV_Assert(params.has("paddings"));
const DictValue& paddingsParam = params.get("paddings");
......@@ -94,8 +96,45 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
outputs[0].setTo(paddingValue);
inputs[0]->copyTo(outputs[0](dstRanges));
if (paddingType == "constant")
{
outputs[0].setTo(paddingValue);
inputs[0]->copyTo(outputs[0](dstRanges));
}
else if (paddingType == "reflect")
{
CV_Assert(inputs.size() == 1);
CV_Assert(outputs.size() == 1);
CV_Assert(inputs[0]->dims == 4);
CV_Assert(outputs[0].dims == 4);
if (inputs[0]->size[0] != outputs[0].size[0] || inputs[0]->size[1] != outputs[0].size[1])
CV_Error(Error::StsNotImplemented, "Only spatial reflection padding is supported.");
const int inpHeight = inputs[0]->size[2];
const int inpWidth = inputs[0]->size[3];
const int outHeight = outputs[0].size[2];
const int outWidth = outputs[0].size[3];
const int padTop = dstRanges[2].start;
const int padBottom = outHeight - dstRanges[2].end;
const int padLeft = dstRanges[3].start;
const int padRight = outWidth - dstRanges[3].end;
CV_Assert(padTop < inpHeight, padBottom < inpHeight,
padLeft < inpWidth, padRight < inpWidth);
for (size_t n = 0; n < inputs[0]->size[0]; ++n)
{
for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
{
copyMakeBorder(getPlane(*inputs[0], n, ch),
getPlane(outputs[0], n, ch),
padTop, padBottom, padLeft, padRight,
BORDER_REFLECT_101);
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown padding type: " + paddingType);
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
......@@ -124,6 +163,7 @@ private:
std::vector<Range> dstRanges;
int inputDims;
float paddingValue;
std::string paddingType;
};
Ptr<PaddingLayer> PaddingLayer::create(const LayerParams &params)
......
......@@ -58,7 +58,7 @@ public:
axis = params.get<int>("axis", 1);
if (params.has("slice_point"))
{
CV_Assert(!params.has("begin") && !params.has("size"));
CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
const DictValue &indicesValue = params.get("slice_point");
sliceRanges.resize(indicesValue.size() + 1,
std::vector<Range>(axis + 1, Range::all()));
......@@ -71,24 +71,34 @@ public:
}
sliceRanges.back()[axis].start = prevSlice;
}
else if (params.has("begin") && params.has("size"))
else if (params.has("begin"))
{
CV_Assert(params.has("size") ^ params.has("end"));
const DictValue &begins = params.get("begin");
const DictValue &sizes = params.get("size");
CV_Assert(begins.size() == sizes.size());
const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");
CV_Assert(begins.size() == sizesOrEnds.size());
sliceRanges.resize(1);
sliceRanges[0].resize(begins.size(), Range::all());
for (int i = 0; i < begins.size(); ++i)
{
int start = begins.get<int>(i);
int size = sizes.get<int>(i);
int sizeOrEnd = sizesOrEnds.get<int>(i); // It may be negative to reverse indexation.
CV_Assert(start >= 0);
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
sliceRanges[0][i].start = start;
if (size > 0)
sliceRanges[0][i].end = start + size;
if (params.has("size"))
{
int size = sizeOrEnd;
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
sliceRanges[0][i].end = start > 0 ? start + size : -1; // We'll finalize a negative value later.
}
else
{
int end = sizeOrEnd;
CV_Assert(end < 0 || end > start); // End index is excluded.
sliceRanges[0][i].end = end; // We'll finalize a negative value later.
}
}
}
}
......@@ -109,8 +119,7 @@ public:
CV_Assert(sliceRanges[i].size() <= inpShape.size());
for (int j = 0; j < sliceRanges[i].size(); ++j)
{
outputs[i][j] = std::min(sliceRanges[i][j].end, inpShape[j]) -
std::max(sliceRanges[i][j].start, 0);
outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
}
}
}
......@@ -152,8 +161,7 @@ public:
// Clamp.
for (int j = 0; j < sliceRanges[i].size(); ++j)
{
sliceRanges[i][j].start = std::max(0, sliceRanges[i][j].start);
sliceRanges[i][j].end = std::min(sliceRanges[i][j].end, inpShape[j]);
sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
}
// Fill the rest of ranges.
for (int j = sliceRanges[i].size(); j < inpShape[-1]; ++j)
......
......@@ -617,7 +617,7 @@ struct TorchImporter : public ::cv::dnn::Importer
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
readObject();
}
else if (nnName == "SpatialBatchNormalization")
else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization")
{
newModule->apiType = "BatchNorm";
readTorchTable(scalarParams, tensorParams);
......@@ -626,19 +626,31 @@ struct TorchImporter : public ::cv::dnn::Importer
float eps = float(scalarParams.get<double>("eps"));
layerParams.set("eps", eps);
CV_Assert((tensorParams.count("running_var") || tensorParams.count("running_std")) &&
tensorParams.count("running_mean"));
layerParams.blobs.push_back(tensorParams["running_mean"].second);
if (tensorParams.count("running_mean"))
{
layerParams.blobs.push_back(tensorParams["running_mean"].second);
}
else
{
CV_Assert(scalarParams.has("nOutput"));
layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
}
if (tensorParams.count("running_var"))
{
layerParams.blobs.push_back(tensorParams["running_var"].second);
}
else
else if (tensorParams.count("running_std"))
{
layerParams.blobs.push_back(tensorParams["running_std"].second);
pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
}
else
{
CV_Assert(scalarParams.has("nOutput"));
layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
}
if (tensorParams.count("weight"))
{
......@@ -652,6 +664,16 @@ struct TorchImporter : public ::cv::dnn::Importer
layerParams.blobs.push_back(tensorParams["bias"].second);
}
if (nnName == "InstanceNormalization")
{
cv::Ptr<Module> mvnModule(new Module(nnName));
mvnModule->apiType = "MVN";
curModule->modules.push_back(mvnModule);
layerParams.blobs[0].setTo(0); // batch norm's mean
layerParams.blobs[1].setTo(1); // batch norm's std
}
curModule->modules.push_back(newModule);
}
else if (nnName == "PReLU")
......@@ -691,7 +713,9 @@ struct TorchImporter : public ::cv::dnn::Importer
layerParams.set("scale", scale);
curModule->modules.push_back(newModule);
}
else if (nnName == "Identity")
// TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
// It's a loss function that has an Identity forward.
else if (nnName == "Identity" || nnName == "TotalVariation")
{
readTorchTable(scalarParams, tensorParams);
newModule->apiType = "Identity";
......@@ -866,7 +890,7 @@ struct TorchImporter : public ::cv::dnn::Importer
layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
curModule->modules.push_back(newModule);
}
else if (nnName == "SpatialZeroPadding")
else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
{
readTorchTable(scalarParams, tensorParams);
CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
......@@ -889,6 +913,26 @@ struct TorchImporter : public ::cv::dnn::Importer
paddings[5] = padRight;
layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
layerParams.set("input_dims", 3);
if (nnName == "SpatialReflectionPadding")
layerParams.set("type", "reflect");
curModule->modules.push_back(newModule);
}
else if (nnName == "ShaveImage")
{
// ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
// It may be mapped to Slice layer.
readTorchTable(scalarParams, tensorParams);
CV_Assert(scalarParams.has("size"));
int size = scalarParams.get<int>("size");
int begins[] = {0, 0, size, size};
int ends[] = {-1, -1, -size - 1, -size - 1};
newModule->apiType = "Slice";
layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
curModule->modules.push_back(newModule);
}
else
......
......@@ -231,6 +231,7 @@ TEST(Torch_Importer, net_padding)
{
runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);
runTorchNet("net_spatial_zero_padding", DNN_TARGET_CPU, "", false, true);
runTorchNet("net_spatial_reflection_padding", DNN_TARGET_CPU, "", false, true);
}
TEST(Torch_Importer, ENet_accuracy)
......@@ -338,6 +339,49 @@ OCL_TEST(Torch_Importer, ENet_accuracy)
}
}
// Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
// th fast_neural_style.lua \
// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
// -output_image lena.png \
// -median_filter 0 \
// -image_size 0 \
// -model models/eccv16/starry_night.t7
// th fast_neural_style.lua \
// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
// -output_image lena.png \
// -median_filter 0 \
// -image_size 0 \
// -model models/instance_norm/feathers.t7
TEST(Torch_Importer, FastNeuralStyle_accuracy)
{
std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
"dnn/fast_neural_style_instance_norm_feathers.t7"};
std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
for (int i = 0; i < 2; ++i)
{
const string model = findDataFile(models[i], false);
Net net = readNetFromTorch(model);
Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
net.setInput(inputBlob);
Mat out = net.forward();
// Deprocessing.
getPlane(out, 0, 0) += 103.939;
getPlane(out, 0, 1) += 116.779;
getPlane(out, 0, 2) += 123.68;
out = cv::min(cv::max(0, out), 255);
Mat ref = imread(findDataFile(targets[i]));
Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
normAssert(out, refBlob, "", 0.5, 1.1);
}
}
}
#endif
import cv2 as cv
import numpy as np
import argparse
parser = argparse.ArgumentParser(
description='This script is used to run style transfer models from '
'https://github.com/jcjohnson/fast-neural-style using OpenCV')
parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
parser.add_argument('--model', help='Path to .t7 model')
parser.add_argument('--width', default=-1, type=int, help='Resize input to specific width.')
parser.add_argument('--height', default=-1, type=int, help='Resize input to specific height.')
parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of postprocessing blurring.')
args = parser.parse_args()
net = cv.dnn.readNetFromTorch(args.model)
if args.input:
cap = cv.VideoCapture(args.input)
else:
cap = cv.VideoCapture(0)
cv.namedWindow('Styled image', cv.WINDOW_NORMAL)
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
cv.waitKey()
break
inWidth = args.width if args.width != -1 else frame.shape[1]
inHeight = args.height if args.height != -1 else frame.shape[0]
inp = cv.dnn.blobFromImage(frame, 1.0, (inWidth, inHeight),
(103.939, 116.779, 123.68), swapRB=False, crop=False)
net.setInput(inp)
out = net.forward()
out = out.reshape(3, out.shape[2], out.shape[3])
out[0] += 103.939
out[1] += 116.779
out[2] += 123.68
out /= 255
out = out.transpose(1, 2, 0)
t, _ = net.getPerfProfile()
freq = cv.getTickFrequency() / 1000
print t / freq, 'ms'
if args.median_filter:
out = cv.medianBlur(out, args.median_filter)
cv.imshow('Styled image', out)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册