提交 0be1e09f 编写于 作者: D dzhwinter

"fix ci"

上级 5447046a
...@@ -84,12 +84,11 @@ class SequenceExpandOp : public framework::OperatorWithKernel { ...@@ -84,12 +84,11 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
} }
} }
out_dims[0] = out_first_dim; out_dims[0] = out_first_dim;
ctx->SetOutputDim("Out", out_dims);
} else { } else {
out_dims[0] = -1; out_dims[0] = -1;
ctx->SetOutputDim("Out", out_dims);
ctx->ShareLoD("X", /*->*/ "Out");
} }
ctx->SetOutputDim("Out", out_dims);
ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
......
...@@ -24,123 +24,128 @@ namespace operators { ...@@ -24,123 +24,128 @@ namespace operators {
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
template <typename T> template <typename T>
__global__ void sequence_expand_kernel(const T* x_data, T* out_data, __global__ void sequence_expand_kernel(const T* x_data, const size_t* x_lod,
const size_t* lod, const size_t* ref_lod,
const size_t* out_offset, const size_t lod_size,
size_t lod_size, size_t element_len, /* default=1,
size_t x_size) { the instance length*/
int bid_x = blockIdx.x; const int x_item_length, T* out_data) {
if (bid_x > lod_size) return; constexpr int N = 1024;
int repeats = lod[bid_x]; __shared__ int mem[N];
int offset = out_offset[bid_x]; int offset = 0;
for (int tid_y = threadIdx.y; tid_y < repeats; tid_y += blockDim.y) { for (int i = 0; i < lod_size; ++i) {
for (int tid_x = threadIdx.x; tid_x < element_len; tid_x += blockDim.x) { mem[i] = offset;
out_data[(offset + tid_y) * element_len + tid_x] = if (i < lod_size - 1) {
x_data[bid_x * element_len + tid_x]; offset += (ref_lod[i + 1] - ref_lod[i]) * (x_lod[i + 1] - x_lod[i]);
} }
} }
} __syncthreads();
template <typename T> int bid = blockIdx.x;
__global__ void sequence_expand_grad_kernel(const T* dout_data, T* dx_data, if (bid >= lod_size - 1) return;
const size_t* lod,
const size_t* out_offset, int x_item_count = x_lod[bid + 1] - x_lod[bid];
size_t lod_size, size_t element_len, int repeats = ref_lod[bid + 1] - ref_lod[bid];
size_t dout_size, size_t dx_size) { int out_offset = mem[bid];
// reduce visit memory time. int x_offset = x_lod[bid];
// dout_shm = [0 - dout_size-1], dx_shm = [dout_size-1, dout_size + dx_size-1] for (int tid_z = threadIdx.z; tid_z < repeats; tid_z += blockDim.z) {
if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && for (int tid_y = threadIdx.y; tid_y < x_item_count; tid_y += blockDim.y) {
threadIdx.y == 0) { for (int tid_x = threadIdx.x; tid_x < x_item_length;
printf("lod_size=%ld, element_size=%ld, dout_size=%ld, dx_size=%ld\n", tid_x += blockDim.x) {
lod_size, element_len, dout_size, dx_size); out_data[(out_offset + tid_z * x_item_count + tid_y) * x_item_length +
} tid_x] = x_data[(x_offset + tid_y) * x_item_length + tid_x];
extern __shared__ T shm[]; }
T* dout_shm = shm;
T* dx_shm = &shm[dout_size];
// int idx = threadIdx.x + blockIdx.x * blockDim.x;
for (int idx = 0; idx < dout_size; ++idx) {
if (idx < dx_size) {
dx_shm[idx] = 0.0;
}
if (idx < dout_size) {
dout_shm[idx] = dout_data[idx];
} }
} }
}
int bid_x = blockIdx.x; template <typename T>
if (bid_x > lod_size) return; __global__ void sequence_expand_grad_kernel(const T* dout_data,
int repeats = lod[bid_x]; const size_t* ref_lod,
int offset = out_offset[bid_x]; const size_t* dx_lod,
if (threadIdx.x == 0) { const size_t lod_size,
printf("repeats=%d, offset=%ld\n", repeats, offset); /* default=1,
} the instance length*/
for (int tid_y = threadIdx.y; tid_y < repeats; tid_y += blockDim.y) { const int x_item_length,
for (int tid_x = threadIdx.x; tid_x < element_len; tid_x += blockDim.x) { T* dx_data) {
T val = dout_shm[(offset + tid_y) * element_len + tid_x]; // TODO(dzhwinter) : too many atomicAdd
platform::CudaAtomicAdd(&dx_shm[bid_x * element_len + tid_x], val); // use shared memory to reduce memory visits
int dx_idx = bid_x * element_len + tid_x; constexpr int N = 1024;
int dout_idx = (offset + tid_y) * element_len + tid_x; __shared__ int mem[N];
printf("dx_idx=%d, dout_idx=%d, dx_data=%f, dout_data=%f, val=%f \n", int offset = 0;
dx_idx, dout_idx, dx_shm[dx_idx], dout_shm[dout_idx], val); for (int i = 0; i < lod_size; ++i) {
mem[i] = offset;
if (i < lod_size - 1) {
offset += (ref_lod[i + 1] - ref_lod[i]) * (dx_lod[i + 1] - dx_lod[i]);
} }
} }
__syncthreads(); __syncthreads();
// copy shared memory back to dx
for (int idx = threadIdx.x + blockIdx.x * blockDim.x; idx < dx_size; int bid = blockIdx.x;
idx += blockDim.x * gridDim.x) { if (bid >= lod_size - 1) return;
dx_data[idx] = dx_shm[idx]; int x_item_count = dx_lod[bid + 1] - dx_lod[bid];
int repeats = ref_lod[bid + 1] - ref_lod[bid];
int out_offset = mem[bid];
int x_offset = dx_lod[bid];
for (int tid_z = threadIdx.z; tid_z < repeats; tid_z += blockDim.z) {
for (int tid_y = threadIdx.y; tid_y < x_item_count; tid_y += blockDim.y) {
for (int tid_x = threadIdx.x; tid_x < x_item_length;
tid_x += blockDim.x) {
platform::CudaAtomicAdd(
&dx_data[(x_offset + tid_y) * x_item_length + tid_x],
dout_data[(out_offset + tid_z * x_item_count + tid_y) *
x_item_length +
tid_x]);
}
}
} }
} }
template <typename T> template <typename T>
struct SequenceExpandFunctor<platform::CUDADeviceContext, T> { struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
void operator()(const platform::CUDADeviceContext& context, void operator()(
const LoDTensor& x, LoDTensor* out) { const platform::CUDADeviceContext& context, const LoDTensor& x,
auto x_dims = x.dims(); const framework::Vector<size_t>& x_lod, /*expand source lod*/
size_t element_len = framework::product(x_dims) / x_dims[0]; const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
auto lod = out->lod().back(); LoDTensor* out) {
framework::Vector<size_t> out_lod; int x_item_length = 1;
for (size_t i = 0; i < lod.size() - 1; ++i) { x_item_length = x.numel() / x.dims()[0];
out_lod.push_back(lod[i + 1] - lod[i]); VLOG(0) << "x_item_length" << x_item_length;
} int thread_x = std::max(static_cast<int>(ref_lod.size()), 32);
int thread_y = std::max(1024 / thread_x, 16);
int thread_x = std::max(static_cast<int>(element_len), 32); int thread_z = std::min(1024 / thread_x / thread_y, 16);
int block_x = static_cast<int>(out_lod.size()); int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, 1024 / thread_x); dim3 block_size(thread_x, thread_y, thread_z);
dim3 grid_size(block_x, 1); dim3 grid_size(block_x, 1);
sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>( sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), out->mutable_data<T>(context.GetPlace()), x.data<T>(), x_lod.CUDAData(context.GetPlace()),
out_lod.CUDAData(context.GetPlace()), lod.CUDAData(context.GetPlace()), ref_lod.CUDAData(context.GetPlace()), x_lod.size(), x_item_length,
out_lod.size(), element_len, framework::product(x_dims)); out->mutable_data<T>(context.GetPlace()));
} }
}; };
template <typename T> template <typename T>
struct SequenceExpandGradFunctor<platform::CUDADeviceContext, T> { struct SequenceExpandGradFunctor<platform::CUDADeviceContext, T> {
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context,
const LoDTensor& x, const LoDTensor& out, const LoDTensor& dout,
const LoDTensor& dout, LoDTensor* dx) { const framework::Vector<size_t>& x_lod, /*expand source lod*/
auto x_dims = x.dims(); const framework::Vector<size_t>& ref_lod, /*expand based lod*/
size_t element_len = framework::product(x_dims) / x_dims[0]; LoDTensor* dx) {
auto lod = out.lod().back(); int x_item_length = 1;
framework::Vector<size_t> out_lod; x_item_length = framework::product(dx->dims()) / dx->dims()[0];
for (size_t i = 0; i < lod.size() - 1; ++i) {
out_lod.push_back(lod[i + 1] - lod[i]); int thread_x = std::max(static_cast<int>(ref_lod.size()), 32);
} int thread_y = std::max(1024 / thread_x, 16);
size_t dout_size = framework::product(dout.dims()); int thread_z = std::min(1024 / thread_x / thread_y, 16);
size_t dx_size = framework::product(dx->dims()); int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, thread_y, thread_z);
int thread_x = std::max(static_cast<int>(element_len), 32);
dim3 block_size(thread_x, 1024 / thread_x);
int block_x = static_cast<int>(out_lod.size());
dim3 grid_size(block_x, 1); dim3 grid_size(block_x, 1);
sequence_expand_grad_kernel<<<grid_size, block_size, sequence_expand_grad_kernel<<<grid_size, block_size, 0, context.stream()>>>(
(dout_size + dx_size) * sizeof(T), dout.data<T>(), ref_lod.CUDAData(context.GetPlace()),
context.stream()>>>( x_lod.CUDAData(context.GetPlace()), ref_lod.size(), x_item_length,
dout.data<T>(), dx->mutable_data<T>(context.GetPlace()), dx->mutable_data<T>(context.GetPlace()));
out_lod.CUDAData(context.GetPlace()), lod.CUDAData(context.GetPlace()),
out_lod.size(), element_len, dout_size, dx_size);
} }
}; };
......
...@@ -13,8 +13,10 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <numeric> // std::itoa #include <numeric> // std::iota
#include <glog/logging.h>
#include <sstream>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
...@@ -29,40 +31,42 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; ...@@ -29,40 +31,42 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceExpandFunctor { struct SequenceExpandFunctor {
void operator()(const DeviceContext& ctx, const LoDTensor& x, LoDTensor* out); void operator()(
const DeviceContext& ctx, const LoDTensor& x,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* out);
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceExpandGradFunctor { struct SequenceExpandGradFunctor {
void operator()(const DeviceContext& ctx, const LoDTensor& x, void operator()(
const LoDTensor& out, const LoDTensor& dout, LoDTensor* dx); const DeviceContext& ctx, const LoDTensor& dout,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* dx);
}; };
template <typename T> template <typename T>
struct SequenceExpandFunctor<platform::CPUDeviceContext, T> { struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context, const LoDTensor& x, void operator()(
LoDTensor* out) { const platform::CPUDeviceContext& context, const LoDTensor& x,
auto& out_lod = out->lod()[0]; const framework::Vector<size_t>& x_lod, /*expand source lod*/
framework::Vector<size_t> x_lod; const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
if (x.lod() == 1) { LoDTensor* out) {
x_lod = x.lod()[0];
} else {
x_lod.reserve(out_lod.size());
std::itoa(x_lod.begin(), x_lod.end(), 0); // fill 0 ~ out_lod.size()-1
}
int out_offset = 0; int out_offset = 0;
auto& eigen_place = *context.eigen_device(); auto& eigen_place = *context.eigen_device();
for (size_t i = 1; i < out_lod.size(); ++i) { for (size_t i = 1; i < ref_lod.size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; int repeat_num = ref_lod[i] - ref_lod[i - 1];
int x_start = x_lod[i - 1]; int x_start = x_lod[i - 1];
int x_end = x_lod[i]; int x_end = x_lod[i];
int x_seq_len = x_end - x_start; int x_seq_len = x_end - x_start;
if (repeat_num > 0) { if (repeat_num > 0) {
auto x_sub_tensor = x->Slice(x_start, x_end); auto x_sub_tensor = x.Slice(x_start, x_end);
x_sub_tensor.Resize({1, x_sub_tensor.numel()}); x_sub_tensor.Resize({1, x_sub_tensor.numel()});
int out_start = out_offset; int out_start = out_offset;
if (x_lod.size() == 1) { if (out->lod().size() == 1) {
out_start = out_lod[0][out_offset]; out_start = out->lod()[0][out_offset];
} }
auto out_sub_tensor = auto out_sub_tensor =
out->Slice(out_start, out_start + x_seq_len * repeat_num); out->Slice(out_start, out_start + x_seq_len * repeat_num);
...@@ -71,6 +75,7 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> { ...@@ -71,6 +75,7 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
EigenMatrix<T>::From(x_sub_tensor) EigenMatrix<T>::From(x_sub_tensor)
.broadcast(Eigen::array<int, 2>({{repeat_num, 1}})); .broadcast(Eigen::array<int, 2>({{repeat_num, 1}}));
} }
out_offset += repeat_num;
} }
} }
}; };
...@@ -96,13 +101,10 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -96,13 +101,10 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
return; return;
} }
auto& out_lod = *out->mutable_lod();
// x lod level is at most 1. // x lod level is at most 1.
if (x_lod.size() == 0) { framework::Vector<size_t> out_lod;
out_lod = y_lod[ref_level]; if (x_lod.size() == 1) {
} else if (x_lod.size() == 1) { out_lod.push_back(0);
out_lod.resize(1);
out_lod[0] = {0};
int out_offset = 0; int out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
...@@ -110,14 +112,25 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -110,14 +112,25 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
int x_end = x_lod[0][i]; int x_end = x_lod[0][i];
int x_seq_len = x_end - x_start; int x_seq_len = x_end - x_start;
for (int j = 0; j < repeat_num; ++j) { for (int j = 0; j < repeat_num; ++j) {
out_lod[0].push_back(out_lod[0].back() + x_seq_len); out_lod.push_back(out_lod.back() + x_seq_len);
out_offset++; out_offset++;
} }
} }
// write lod to out if x has lod
auto& ref_lod = *out->mutable_lod();
ref_lod[0] = out_lod;
}
framework::Vector<size_t> ref_x_lod;
if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0];
} else {
// x_lod doesn't has lod, use fake x lod, level = 0
ref_x_lod.resize(x->dims()[0] + 1);
std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
} }
SequenceExpandFunctor<DeviceContext, T> functor; SequenceExpandFunctor<DeviceContext, T> functor;
functor(context.template device_context<DeviceContext>(), *x, out); functor(context.template device_context<DeviceContext>(), *x, ref_x_lod,
y_lod[ref_level], out);
} }
}; };
...@@ -135,32 +148,29 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -135,32 +148,29 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
* */ * */
template <typename T> template <typename T>
struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> { struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context, const LoDTensor& x, void operator()(
const LoDTensor& out, const LoDTensor& dout, LoDTensor* dx) { const platform::CPUDeviceContext& context, const LoDTensor& dout,
auto& dev_ctx = context.template device_context<DeviceContext>(); const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
math::SetConstant<DeviceContext, T> set_zero; LoDTensor* dx) {
set_zero(dev_ctx, g_x, static_cast<T>(0)); math::SetConstant<platform::CPUDeviceContext, T> set_zero;
set_zero(context, dx, static_cast<T>(0));
int g_out_offset = 0;
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { int dout_offset = 0;
int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]; for (size_t i = 1; i < ref_lod.size(); ++i) {
int repeat_num = ref_lod[i] - ref_lod[i - 1];
if (repeat_num > 0) { if (repeat_num > 0) {
int x_start = i - 1; int x_start = x_lod[i - 1];
int x_end = i; int x_end = x_lod[i];
if (x_lod.size() == 1) {
x_start = x_lod[0][i - 1];
x_end = x_lod[0][i];
}
int x_seq_len = x_end - x_start; int x_seq_len = x_end - x_start;
auto g_x_sub = g_x->Slice(x_start, x_end); auto dx_sub = dx->Slice(x_start, x_end);
g_x_sub.Resize(flatten_to_1d(g_x_sub.dims())); dx_sub.Resize(flatten_to_1d(dx_sub.dims()));
int g_out_end = g_out_offset + repeat_num * x_seq_len; int dout_end = dout_offset + repeat_num * x_seq_len;
auto g_out_sub = g_out->Slice(g_out_offset, g_out_end); auto dout_sub = dout.Slice(dout_offset, dout_end);
g_out_sub.Resize({repeat_num, g_x_sub.dims()[0]}); dout_sub.Resize({repeat_num, dx_sub.dims()[0]});
math::ColwiseSum<DeviceContext, T> col_sum; math::ColwiseSum<platform::CPUDeviceContext, T> col_sum;
col_sum(dev_ctx, g_out_sub, &g_x_sub); col_sum(context, dout_sub, &dx_sub);
g_out_offset += repeat_num * x_seq_len; dout_offset += repeat_num * x_seq_len;
} }
} }
} }
...@@ -179,20 +189,26 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> { ...@@ -179,20 +189,26 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
g_x->mutable_data<T>(context.GetPlace()); g_x->mutable_data<T>(context.GetPlace());
g_x->set_lod(x->lod()); g_x->set_lod(x->lod());
auto& x_lod = x->lod();
auto& y_lod = y->lod(); auto& y_lod = y->lod();
if (ref_level == -1) ref_level = y_lod.size() - 1; if (ref_level == -1) ref_level = y_lod.size() - 1;
// just copy the gradient // just copy the gradient
if (y_lod[ref_level].size() <= 1) { if (y_lod[ref_level].size() <= 1) {
framework::TensorCopy(*g_out, context.GetPlace(), g_x); framework::TensorCopy(*g_out, context.GetPlace(), g_x);
return; return;
} }
framework::Vector<size_t> ref_x_lod;
framework::Vector<size_t> ref_lod = y_lod[ref_level];
if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0];
} else {
// x_lod doesn't has lod, use fake x lod, level = 0
ref_x_lod.resize(x->dims()[0] + 1);
std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
}
SequenceExpandGradFunctor<DeviceContext, T> functor; SequenceExpandGradFunctor<DeviceContext, T> functor;
functor(context.template device_context<DeviceContext>(), *x, *y, *g_out, functor(context.template device_context<DeviceContext>(), *g_out, ref_x_lod,
g_x); ref_lod, g_x);
} }
}; };
......
...@@ -19,14 +19,8 @@ from op_test import OpTest ...@@ -19,14 +19,8 @@ from op_test import OpTest
class TestSequenceExpand(OpTest): class TestSequenceExpand(OpTest):
def set_data(self): def set_data(self):
x = [i / 10.0 for i in range(3)] x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y = [i / 10.0 for i in range(8)] y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
x_data = np.array(x).reshape(3, 1).astype('float32')
y_data = np.array(y).reshape(8, 1).astype('float32')
print(x_data)
print(y_data)
# x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
# y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
y_lod = [[0, 1, 4, 8]] y_lod = [[0, 1, 4, 8]]
self.inputs = {'X': x_data, 'Y': (y_data, y_lod)} self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
...@@ -53,8 +47,10 @@ class TestSequenceExpand(OpTest): ...@@ -53,8 +47,10 @@ class TestSequenceExpand(OpTest):
x_len = x_idx[i] - x_idx[i - 1] x_len = x_idx[i] - x_idx[i - 1]
if repeat_num > 0: if repeat_num > 0:
x_sub = x_data[x_idx[i - 1]:x_idx[i], :] x_sub = x_data[x_idx[i - 1]:x_idx[i], :]
x_sub = np.repeat(x_sub, repeat_num, axis=0) stacked_x_sub = x_sub
out = np.vstack((out, x_sub)) for r in range(repeat_num - 1):
stacked_x_sub = np.vstack((stacked_x_sub, x_sub))
out = np.vstack((out, stacked_x_sub))
if x_lod is not None: if x_lod is not None:
for j in xrange(repeat_num): for j in xrange(repeat_num):
out_lod[0].append(out_lod[0][-1] + x_len) out_lod[0].append(out_lod[0][-1] + x_len)
...@@ -107,11 +103,11 @@ class TestSequenceExpandCase3(TestSequenceExpand): ...@@ -107,11 +103,11 @@ class TestSequenceExpandCase3(TestSequenceExpand):
class TestSequenceExpandCase4(TestSequenceExpand): class TestSequenceExpandCase4(TestSequenceExpand):
def set_data(self): def set_data(self):
data = [0.1, 0.3, 0.2, 0.15, 0.25, 0.2, 0.15, 0.25, 0.1, 0.3] data = np.random.uniform(0.1, 1, [5 * 2, 1])
x_data = np.array(data).reshape([5, 2]).astype('float32') x_data = np.array(data).reshape([5, 2]).astype('float32')
x_lod = [[0, 2, 5]] x_lod = [[0, 2, 5]]
y_data = np.random.uniform(0.1, 1, [2, 1]).astype('float32') y_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y_lod = [[0, 1, 2], [0, 1, 2]] y_lod = [[0, 1, 3], [0, 1, 3]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册