diff --git a/paddle/testing/TestUtil.cpp b/paddle/testing/TestUtil.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c691fe26255914811c8861cff80495c821990179 --- /dev/null +++ b/paddle/testing/TestUtil.cpp @@ -0,0 +1,219 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "TestUtil.h" +#include +#include "paddle/math/SparseMatrix.h" + +DEFINE_int32(fixed_seq_length, 0, "Produce some sequence of fixed length"); + +namespace paddle { + +std::string randStr(const int len) { + std::string str = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + std::string s = ""; + for (int i = 0; i < len; ++i) s += str[(rand() % 62)]; // NOLINT + return s; +} + +MatrixPtr makeRandomSparseMatrix(size_t height, + size_t width, + bool withValue, + bool useGpu, + bool equalNnzPerSample) { + std::vector ids(height); + std::vector indices(height + 1); + indices[0] = 0; + + std::function randomer = [] { return uniformRandom(10); }; + if (equalNnzPerSample) { + size_t n = 0; + do { + n = uniformRandom(10); + } while (!n); + randomer = [=] { return n; }; + } + for (size_t i = 0; i < height; ++i) { + indices[i + 1] = indices[i] + std::min(randomer(), width); + ids[i] = i; + } + + if (!withValue) { + std::vector data; + data.resize(indices[height] - indices[0]); + for (size_t i = 0; i < data.size(); ++i) { + data[i].col = uniformRandom(width); + } + auto mat = Matrix::createSparseMatrix( + height, width, data.size(), NO_VALUE, SPARSE_CSR, false, useGpu); + if (useGpu) { + std::dynamic_pointer_cast(mat)->copyFrom( + ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT); + } else { + std::dynamic_pointer_cast(mat)->copyFrom( + ids.data(), indices.data(), data.data()); + } + return mat; + } else { + std::vector data; + data.resize(indices[height] - indices[0]); + for (size_t i = 0; i < data.size(); ++i) { + data[i].col = uniformRandom(width); + data[i].value = rand() / static_cast(RAND_MAX); // NOLINT + } + auto mat = Matrix::createSparseMatrix( + height, width, data.size(), FLOAT_VALUE, SPARSE_CSR, false, useGpu); + if (useGpu) { + std::dynamic_pointer_cast(mat)->copyFrom( + ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT); + } else { + std::dynamic_pointer_cast(mat)->copyFrom( + ids.data(), indices.data(), data.data()); + } + return mat; + } +} + +void generateSequenceStartPositions(size_t batchSize, + IVectorPtr& sequenceStartPositions) { + ICpuGpuVectorPtr gpuCpuVec; + generateSequenceStartPositions(batchSize, gpuCpuVec); + sequenceStartPositions = gpuCpuVec->getMutableVector(false); +} + +void generateSequenceStartPositions(size_t batchSize, + ICpuGpuVectorPtr& sequenceStartPositions) { + int numSeqs; + if (FLAGS_fixed_seq_length != 0) { + numSeqs = std::ceil((float)batchSize / (float)FLAGS_fixed_seq_length); + } else { + numSeqs = batchSize / 10 + 1; + } + sequenceStartPositions = + ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false); + int* buf = sequenceStartPositions->getMutableData(false); + int64_t pos = 0; + int len = FLAGS_fixed_seq_length; + int maxLen = 2 * batchSize / numSeqs; + for (int i = 0; i < numSeqs; ++i) { + if (FLAGS_fixed_seq_length == 0) { + len = uniformRandom( + std::min(maxLen, batchSize - pos - numSeqs + i)) + + 1; + } + buf[i] = pos; + pos += len; + VLOG(1) << " len=" << len; + } + buf[numSeqs] = batchSize; +} + +void generateSubSequenceStartPositions( + const ICpuGpuVectorPtr& sequenceStartPositions, + ICpuGpuVectorPtr& subSequenceStartPositions) { + int numSeqs = sequenceStartPositions->getSize() - 1; + const int* buf = sequenceStartPositions->getData(false); + int numOnes = 0; + for (int i = 0; i < numSeqs; ++i) { + if (buf[i + 1] - buf[i] == 1) { + ++numOnes; + } + } + // each seq has two sub-seq except length 1 + int numSubSeqs = numSeqs * 2 - numOnes; + subSequenceStartPositions = + ICpuGpuVector::create(numSubSeqs + 1, /* useGpu= */ false); + int* subBuf = subSequenceStartPositions->getMutableData(false); + int j = 0; + for (int i = 0; i < numSeqs; ++i) { + if (buf[i + 1] - buf[i] == 1) { + subBuf[j++] = buf[i]; + } else { + int len = uniformRandom(buf[i + 1] - buf[i] - 1) + 1; + subBuf[j++] = buf[i]; + subBuf[j++] = buf[i] + len; + } + } + subBuf[j] = buf[numSeqs]; +} + +void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions, + IVectorPtr& cpuSequenceDims) { + /* generate sequences with 2 dims */ + int numSeqs = sequenceStartPositions->getSize() - 1; + int numDims = 2; + + cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false); + int* bufStarts = sequenceStartPositions->getData(); + int* bufDims = cpuSequenceDims->getData(); + + for (int i = 0; i < numSeqs; i++) { + int len = bufStarts[i + 1] - bufStarts[i]; + /* get width and height randomly */ + std::vector dimVec; + for (int j = 0; j < len; j++) { + if (len % (j + 1) == 0) { + dimVec.push_back(1); + } + } + int idx = rand() % dimVec.size(); // NOLINT use rand_r + bufDims[i * numDims] = dimVec[idx]; + bufDims[i * numDims + 1] = len / dimVec[idx]; + } +} + +void generateMDimSequenceData(const ICpuGpuVectorPtr& sequenceStartPositions, + IVectorPtr& cpuSequenceDims) { + /* generate sequences with 2 dims */ + int numSeqs = sequenceStartPositions->getSize() - 1; + int numDims = 2; + + cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false); + const int* bufStarts = sequenceStartPositions->getData(false); + int* bufDims = cpuSequenceDims->getData(); + + for (int i = 0; i < numSeqs; i++) { + int len = bufStarts[i + 1] - bufStarts[i]; + /* get width and height randomly */ + std::vector dimVec; + for (int j = 0; j < len; j++) { + if (len % (j + 1) == 0) { + dimVec.push_back(1); + } + } + int idx = rand() % dimVec.size(); // NOLINT use rand_r + bufDims[i * numDims] = dimVec[idx]; + bufDims[i * numDims + 1] = len / dimVec[idx]; + } +} + +void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) { + EXPECT_EQ(a->getWidth(), b->getWidth()); + EXPECT_EQ(a->getHeight(), b->getHeight()); + EXPECT_EQ(a->isTransposed(), b->isTransposed()); + for (size_t r = 0; r < a->getHeight(); ++r) { + for (size_t c = 0; c < a->getWidth(); ++c) { + EXPECT_FLOAT_EQ(a->getElement(r, c), b->getElement(r, c)); + } + } +} + +void checkVectorEqual(const IVectorPtr& a, const IVectorPtr& b) { + EXPECT_EQ(a->getSize(), b->getSize()); + for (size_t r = 0; r < a->getSize(); ++r) { + EXPECT_FLOAT_EQ(a->get(r), b->get(r)); + } +} +} // namespace paddle diff --git a/paddle/testing/TestUtil.h b/paddle/testing/TestUtil.h new file mode 100644 index 0000000000000000000000000000000000000000..ec86469aebbafbf5406a21e6825eda6c105a6b9d --- /dev/null +++ b/paddle/testing/TestUtil.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/math/Matrix.h" + +namespace paddle { + +std::string randStr(const int len); + +inline int uniformRandom(int n) { return n == 0 ? 0 : rand() % n; } + +inline bool approximatelyEqual(float a, float b, float epsilon) { + return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon); +} + +MatrixPtr makeRandomSparseMatrix(size_t height, + size_t width, + bool withValue, + bool useGpu, + bool equalNnzPerSample = false); + +/** + * @brief generate sequenceStartPositions for INPUT_SEQUENCE_DATA, + * INPUT_HASSUB_SEQUENCE_DATA and INPUT_SEQUENCE_LABEL + * + * @param batchSize batchSize + * sequenceStartPositions[out] generation output + */ +void generateSequenceStartPositions(size_t batchSize, + IVectorPtr& sequenceStartPositions); + +void generateSequenceStartPositions(size_t batchSize, + ICpuGpuVectorPtr& sequenceStartPositions); + +/** + * @brief generate subSequenceStartPositions for INPUT_HASSUB_SEQUENCE_DATA + * according to sequenceStartPositions + * + * @param sequenceStartPositions[in] input + * subSequenceStartPositions[out] generation output + */ +void generateSubSequenceStartPositions(const IVectorPtr& sequenceStartPositions, + IVectorPtr& subSequenceStartPositions); + +void generateSubSequenceStartPositions( + const ICpuGpuVectorPtr& sequenceStartPositions, + ICpuGpuVectorPtr& subSequenceStartPositions); + +/** + * @brief generate cpuSequenceDims for INPUT_SEQUENCE_MDIM_DATA according to + * sequenceStartPositions + * + * @param sequenceStartPositions[in] input + * cpuSequenceDims[out] generation output + */ +void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions, + IVectorPtr& cpuSequenceDims); +void generateMDimSequenceData(const ICpuGpuVectorPtr& sequenceStartPositions, + IVectorPtr& cpuSequenceDims); + +void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b); + +void checkVectorEqual(const IVectorPtr& a, const IVectorPtr& b); +} // namespace paddle