提交 80c68d38 编写于 作者: L Luo Tao

clang format .cc .h .cpp .c and .hpp file

上级 ad0c144e
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
...@@ -112,7 +111,7 @@ void Arguments::setSlotSequenceStartPositions(size_t idx, ...@@ -112,7 +111,7 @@ void Arguments::setSlotSequenceStartPositions(size_t idx,
} }
void Arguments::setSlotSubSequenceStartPositions( void Arguments::setSlotSubSequenceStartPositions(
size_t idx, IVector *vec) throw(RangeError) { size_t idx, IVector* vec) throw(RangeError) {
auto& a = m->getArg(idx); auto& a = m->getArg(idx);
auto& v = m->cast<paddle::IVector>(vec->getSharedPtr()); auto& v = m->cast<paddle::IVector>(vec->getSharedPtr());
a.subSequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v); a.subSequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
#include "paddle/trainer/Trainer.h" #include "paddle/trainer/Trainer.h"
...@@ -44,8 +43,7 @@ TrainerConfig* TrainerConfig::createFromTrainerConfigFile( ...@@ -44,8 +43,7 @@ TrainerConfig* TrainerConfig::createFromTrainerConfigFile(
return retv; return retv;
} }
TrainerConfig* TrainerConfig::createFromProtoString( TrainerConfig* TrainerConfig::createFromProtoString(const std::string& str) {
const std::string& str) {
auto retv = new TrainerConfig(); auto retv = new TrainerConfig();
paddle::TrainerConfig trainerConfigProto; paddle::TrainerConfig trainerConfigProto;
auto conf = std::make_shared<paddle::TrainerConfigHelper>(trainerConfigProto); auto conf = std::make_shared<paddle::TrainerConfigHelper>(trainerConfigProto);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
...@@ -27,7 +26,8 @@ GradientMachine::GradientMachine() : m(new GradientMachinePrivate()) {} ...@@ -27,7 +26,8 @@ GradientMachine::GradientMachine() : m(new GradientMachinePrivate()) {}
GradientMachine::~GradientMachine() { delete m; } GradientMachine::~GradientMachine() { delete m; }
GradientMachine* GradientMachine::createFromPaddleModelPtr( GradientMachine* GradientMachine::createFromPaddleModelPtr(
const void* confPtr, GradientMatchineCreateMode mode, const void* confPtr,
GradientMatchineCreateMode mode,
const std::vector<int>& types) { const std::vector<int>& types) {
auto& conf = *(const paddle::ModelConfig*)(confPtr); auto& conf = *(const paddle::ModelConfig*)(confPtr);
std::vector<ParameterType> realTypes; std::vector<ParameterType> realTypes;
...@@ -44,7 +44,8 @@ GradientMachine* GradientMachine::createFromPaddleModelPtr( ...@@ -44,7 +44,8 @@ GradientMachine* GradientMachine::createFromPaddleModelPtr(
} }
GradientMachine* GradientMachine::createByConfigProtoStr( GradientMachine* GradientMachine::createByConfigProtoStr(
const std::string& protoStr, GradientMatchineCreateMode mode, const std::string& protoStr,
GradientMatchineCreateMode mode,
const std::vector<int>& types) { const std::vector<int>& types) {
paddle::ModelConfig conf; paddle::ModelConfig conf;
conf.ParseFromString(protoStr); conf.ParseFromString(protoStr);
...@@ -56,13 +57,15 @@ GradientMachine* GradientMachine::createByConfigProtoStr( ...@@ -56,13 +57,15 @@ GradientMachine* GradientMachine::createByConfigProtoStr(
} }
GradientMachine* GradientMachine::createByModelConfig( GradientMachine* GradientMachine::createByModelConfig(
ModelConfig* conf, GradientMatchineCreateMode mode, ModelConfig* conf,
GradientMatchineCreateMode mode,
const std::vector<int>& types) { const std::vector<int>& types) {
auto confPtr = &conf->m->conf->getModelConfig(); auto confPtr = &conf->m->conf->getModelConfig();
return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types); return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
} }
void GradientMachine::forward(const Arguments& inArgs, Arguments* outArgs, void GradientMachine::forward(const Arguments& inArgs,
Arguments* outArgs,
PassType passType) { PassType passType) {
auto& in = auto& in =
m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr()); m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr());
...@@ -99,7 +102,8 @@ void GradientMachine::backward(const UpdateCallback& callback) { ...@@ -99,7 +102,8 @@ void GradientMachine::backward(const UpdateCallback& callback) {
} }
void GradientMachine::forwardBackward(const Arguments& inArgs, void GradientMachine::forwardBackward(const Arguments& inArgs,
Arguments* outArgs, PassType passType, Arguments* outArgs,
PassType passType,
const UpdateCallback& callback) { const UpdateCallback& callback) {
auto& in = auto& in =
m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr()); m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr());
...@@ -129,7 +133,7 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { ...@@ -129,7 +133,7 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
void GradientMachine::randParameters() { m->machine->randParameters(); } void GradientMachine::randParameters() { m->machine->randParameters(); }
Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const
throw(UnsupportError) { throw(UnsupportError) {
auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>(m->machine); auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>(m->machine);
if (nn) { if (nn) {
auto mat = nn->getLayerOutput(layerName); auto mat = nn->getLayerOutput(layerName);
...@@ -140,8 +144,11 @@ Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const ...@@ -140,8 +144,11 @@ Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const
} }
SequenceGenerator* GradientMachine::asSequenceGenerator( SequenceGenerator* GradientMachine::asSequenceGenerator(
const std::vector<std::string>& dict, size_t begin_id, size_t end_id, const std::vector<std::string>& dict,
size_t max_length, size_t beam_size) { size_t begin_id,
size_t end_id,
size_t max_length,
size_t beam_size) {
SequenceGenerator* r = SequenceGenerator* r =
SequenceGenerator::createByGradientMachineSharedPtr(&m->machine); SequenceGenerator::createByGradientMachineSharedPtr(&m->machine);
r->setDict(dict); r->setDict(dict);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "PaddleAPI.h" #include "PaddleAPI.h"
...@@ -23,7 +22,8 @@ limitations under the License. */ ...@@ -23,7 +22,8 @@ limitations under the License. */
template <typename T1, typename T2> template <typename T1, typename T2>
void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) { void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) {
dest->resize(src.size()); dest->resize(src.size());
std::transform(src.begin(), src.end(), dest->begin(), [](T1 t){ std::transform(src.begin(),
return static_cast<T2>(t); src.end(),
}); dest->begin(),
[](T1 t) { return static_cast<T2>(t); });
} }
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
...@@ -44,17 +43,21 @@ Matrix* Matrix::createZero(size_t height, size_t width, bool useGpu) { ...@@ -44,17 +43,21 @@ Matrix* Matrix::createZero(size_t height, size_t width, bool useGpu) {
return m; return m;
} }
Matrix* Matrix::createDense(const std::vector<float>& data, size_t height, Matrix* Matrix::createDense(const std::vector<float>& data,
size_t width, bool useGpu) { size_t height,
size_t width,
bool useGpu) {
auto m = new Matrix(); auto m = new Matrix();
m->m->mat = paddle::Matrix::create(height, width, useGpu); m->m->mat = paddle::Matrix::create(height, width, useGpu);
m->m->mat->copyFrom(data.data(), data.size()); m->m->mat->copyFrom(data.data(), data.size());
return m; return m;
} }
Matrix* Matrix::createDenseFromNumpy(float* data, int dim1, int dim2, Matrix* Matrix::createDenseFromNumpy(float* data,
bool copy, bool useGpu) int dim1,
throw (UnsupportError) { int dim2,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) { if (useGpu) {
/// Gpu mode only supports copy=True /// Gpu mode only supports copy=True
if (!copy) { if (!copy) {
...@@ -66,7 +69,9 @@ Matrix* Matrix::createDenseFromNumpy(float* data, int dim1, int dim2, ...@@ -66,7 +69,9 @@ Matrix* Matrix::createDenseFromNumpy(float* data, int dim1, int dim2,
} }
} }
Matrix* Matrix::createCpuDenseFromNumpy(float* data, int dim1, int dim2, Matrix* Matrix::createCpuDenseFromNumpy(float* data,
int dim1,
int dim2,
bool copy) { bool copy) {
auto m = new Matrix(); auto m = new Matrix();
if (copy) { if (copy) {
...@@ -85,12 +90,20 @@ Matrix* Matrix::createGpuDenseFromNumpy(float* data, int dim1, int dim2) { ...@@ -85,12 +90,20 @@ Matrix* Matrix::createGpuDenseFromNumpy(float* data, int dim1, int dim2) {
return m; return m;
} }
Matrix* Matrix::createSparse(size_t height, size_t width, size_t nnz, Matrix* Matrix::createSparse(size_t height,
bool isNonVal, bool isTrans, bool useGpu) { size_t width,
size_t nnz,
bool isNonVal,
bool isTrans,
bool useGpu) {
auto m = new Matrix(); auto m = new Matrix();
m->m->mat = paddle::Matrix::createSparseMatrix( m->m->mat = paddle::Matrix::createSparseMatrix(
height, width, nnz, isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE, height,
isTrans, useGpu); width,
nnz,
isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE,
isTrans,
useGpu);
return m; return m;
} }
...@@ -221,7 +234,8 @@ FloatArray Matrix::getData() const { ...@@ -221,7 +234,8 @@ FloatArray Matrix::getData() const {
} }
void Matrix::sparseCopyFrom( void Matrix::sparseCopyFrom(
const std::vector<int>& rows, const std::vector<int>& cols, const std::vector<int>& rows,
const std::vector<int>& cols,
const std::vector<float>& vals) throw(UnsupportError) { const std::vector<float>& vals) throw(UnsupportError) {
auto cpuSparseMat = auto cpuSparseMat =
std::dynamic_pointer_cast<paddle::CpuSparseMatrix>(m->mat); std::dynamic_pointer_cast<paddle::CpuSparseMatrix>(m->mat);
...@@ -240,7 +254,8 @@ void Matrix::sparseCopyFrom( ...@@ -240,7 +254,8 @@ void Matrix::sparseCopyFrom(
void* Matrix::getSharedPtr() const { return &m->mat; } void* Matrix::getSharedPtr() const { return &m->mat; }
void Matrix::toNumpyMatInplace(float** view_data, int* dim1, void Matrix::toNumpyMatInplace(float** view_data,
int* dim1,
int* dim2) throw(UnsupportError) { int* dim2) throw(UnsupportError) {
auto cpuMat = std::dynamic_pointer_cast<paddle::CpuMatrix>(m->mat); auto cpuMat = std::dynamic_pointer_cast<paddle::CpuMatrix>(m->mat);
if (cpuMat) { if (cpuMat) {
...@@ -251,7 +266,8 @@ void Matrix::toNumpyMatInplace(float** view_data, int* dim1, ...@@ -251,7 +266,8 @@ void Matrix::toNumpyMatInplace(float** view_data, int* dim1,
throw UnsupportError(); throw UnsupportError();
} }
} }
void Matrix::copyToNumpyMat(float** view_m_data, int* dim1, void Matrix::copyToNumpyMat(float** view_m_data,
int* dim1,
int* dim2) throw(UnsupportError) { int* dim2) throw(UnsupportError) {
static_assert(sizeof(paddle::real) == sizeof(float), static_assert(sizeof(paddle::real) == sizeof(float),
"Currently PaddleAPI only support for single " "Currently PaddleAPI only support for single "
...@@ -269,8 +285,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1, ...@@ -269,8 +285,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
} else if (auto gpuMat = dynamic_cast<paddle::GpuMatrix*>(m->mat.get())) { } else if (auto gpuMat = dynamic_cast<paddle::GpuMatrix*>(m->mat.get())) {
auto src = gpuMat->getData(); auto src = gpuMat->getData();
auto dest = *view_m_data; auto dest = *view_m_data;
hl_memcpy_device2host(dest, src, hl_memcpy_device2host(
sizeof(paddle::real) * (*dim1) * (*dim2)); dest, src, sizeof(paddle::real) * (*dim1) * (*dim2));
} else { } else {
LOG(WARNING) << "Unexpected Situation"; LOG(WARNING) << "Unexpected Situation";
throw UnsupportError(); throw UnsupportError();
...@@ -278,7 +294,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1, ...@@ -278,7 +294,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
} }
} }
void Matrix::copyFromNumpyMat(float* data, int dim1, void Matrix::copyFromNumpyMat(float* data,
int dim1,
int dim2) throw(UnsupportError, RangeError) { int dim2) throw(UnsupportError, RangeError) {
if (isSparse()) { if (isSparse()) {
throw UnsupportError(); throw UnsupportError();
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <stddef.h> #include <stddef.h>
...@@ -61,8 +60,8 @@ class RangeError {}; ...@@ -61,8 +60,8 @@ class RangeError {};
/// Not support Error, such as access GPU memory directly, etc. /// Not support Error, such as access GPU memory directly, etc.
class UnsupportError : public std::runtime_error { class UnsupportError : public std::runtime_error {
public: public:
UnsupportError() : std::runtime_error(" ") {}; UnsupportError() : std::runtime_error(" "){};
UnsupportError(const std::string& message) : std::runtime_error(message) {}; UnsupportError(const std::string& message) : std::runtime_error(message){};
}; };
/// This type will map to python's list of float. /// This type will map to python's list of float.
...@@ -112,7 +111,8 @@ public: ...@@ -112,7 +111,8 @@ public:
/** /**
* Create A Matrix with height,width, which is filled by zero. * Create A Matrix with height,width, which is filled by zero.
*/ */
static Matrix* createZero(size_t height, size_t width, static Matrix* createZero(size_t height,
size_t width,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
/** /**
...@@ -124,8 +124,11 @@ public: ...@@ -124,8 +124,11 @@ public:
* *
* @note the default sparse type is SPARSE_CSR. * @note the default sparse type is SPARSE_CSR.
*/ */
static Matrix* createSparse(size_t height, size_t width, size_t nnz, static Matrix* createSparse(size_t height,
bool isNonVal = true, bool trans = false, size_t width,
size_t nnz,
bool isNonVal = true,
bool trans = false,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
/** /**
...@@ -134,13 +137,17 @@ public: ...@@ -134,13 +137,17 @@ public:
* @param data list of float should be passed in python. * @param data list of float should be passed in python.
* @note the value will be copy into a new matrix. * @note the value will be copy into a new matrix.
*/ */
static Matrix* createDense(const std::vector<float>& data, size_t height, static Matrix* createDense(const std::vector<float>& data,
size_t width, bool useGpu = isUsingGpu()); size_t height,
size_t width,
static Matrix* createDenseFromNumpy(float* data, int dim1, int dim2, bool useGpu = isUsingGpu());
bool copy = true,
bool useGpu = isUsingGpu()) static Matrix* createDenseFromNumpy(
throw (UnsupportError); float* data,
int dim1,
int dim2,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/** /**
* Create Cpu Dense Matrix from numpy matrix, dtype=float32 * Create Cpu Dense Matrix from numpy matrix, dtype=float32
...@@ -151,7 +158,9 @@ public: ...@@ -151,7 +158,9 @@ public:
* @param copy true if copy into a new matrix, false will create * @param copy true if copy into a new matrix, false will create
* matrix inplace. * matrix inplace.
*/ */
static Matrix* createCpuDenseFromNumpy(float* data, int dim1, int dim2, static Matrix* createCpuDenseFromNumpy(float* data,
int dim1,
int dim2,
bool copy = false); bool copy = false);
/// Create Gpu Dense Matrix from numpy matrix, dtype=float32 /// Create Gpu Dense Matrix from numpy matrix, dtype=float32
...@@ -171,11 +180,13 @@ public: ...@@ -171,11 +180,13 @@ public:
* numpy_mat = m.toNumpyMat() * numpy_mat = m.toNumpyMat()
* @endcode * @endcode
*/ */
void toNumpyMatInplace(float** view_data, int* dim1, void toNumpyMatInplace(float** view_data,
int* dim1,
int* dim2) throw(UnsupportError); int* dim2) throw(UnsupportError);
/// Copy To numpy mat. /// Copy To numpy mat.
void copyToNumpyMat(float** view_m_data, int* dim1, void copyToNumpyMat(float** view_m_data,
int* dim1,
int* dim2) throw(UnsupportError); int* dim2) throw(UnsupportError);
/// Copy From Numpy Mat /// Copy From Numpy Mat
...@@ -248,15 +259,18 @@ public: ...@@ -248,15 +259,18 @@ public:
static Vector* create(const std::vector<float>& data, static Vector* create(const std::vector<float>& data,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
static Vector* createVectorFromNumpy(float* data, int dim, bool copy = true, static Vector* createVectorFromNumpy(
bool useGpu = isUsingGpu()) float* data,
throw (UnsupportError); int dim,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/** /**
* Create Cpu Vector from numpy array, which dtype=float32 * Create Cpu Vector from numpy array, which dtype=float32
* *
* If copy is false, it will create vector inplace. * If copy is false, it will create vector inplace.
*/ */
static Vector* createCpuVectorFromNumpy(float* data, int dim, static Vector* createCpuVectorFromNumpy(float* data,
int dim,
bool copy = false); bool copy = false);
/// Create Gpu Vector from numpy array, which dtype=float32 /// Create Gpu Vector from numpy array, which dtype=float32
...@@ -312,16 +326,19 @@ public: ...@@ -312,16 +326,19 @@ public:
static IVector* create(const std::vector<int>& data, static IVector* create(const std::vector<int>& data,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
static IVector* createVectorFromNumpy(int* data, int dim, bool copy = true, static IVector* createVectorFromNumpy(
bool useGpu = isUsingGpu()) int* data,
throw (UnsupportError); int dim,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/** /**
* Create Cpu IVector from numpy array, which dtype=int32 * Create Cpu IVector from numpy array, which dtype=int32
* *
* If copy is false, it will create vector inplace * If copy is false, it will create vector inplace
*/ */
static IVector* createCpuVectorFromNumpy(int* data, int dim, static IVector* createCpuVectorFromNumpy(int* data,
int dim,
bool copy = false); bool copy = false);
/** /**
* Create Gpu IVector from numpy array, which dtype=int32 * Create Gpu IVector from numpy array, which dtype=int32
...@@ -605,7 +622,8 @@ class ParameterTraverseCallback { ...@@ -605,7 +622,8 @@ class ParameterTraverseCallback {
public: public:
~ParameterTraverseCallback(); ~ParameterTraverseCallback();
void apply(const std::vector<Vector*>& vecs, const ParameterConfig& config, void apply(const std::vector<Vector*>& vecs,
const ParameterConfig& config,
size_t sparseId); size_t sparseId);
private: private:
...@@ -638,7 +656,8 @@ public: ...@@ -638,7 +656,8 @@ public:
void finishBatch(); void finishBatch();
void update(const std::vector<Vector*>& vecs, const ParameterConfig& conf, void update(const std::vector<Vector*>& vecs,
const ParameterConfig& conf,
size_t sparseId = NO_SPARSE_ID); size_t sparseId = NO_SPARSE_ID);
std::vector<int> getParameterTypes() const; std::vector<int> getParameterTypes() const;
...@@ -678,7 +697,8 @@ public: ...@@ -678,7 +697,8 @@ public:
* model config by TrainerConfig * model config by TrainerConfig
*/ */
static GradientMachine* createByModelConfig( static GradientMachine* createByModelConfig(
ModelConfig* conf, GradientMatchineCreateMode mode = CREATE_MODE_NORMAL, ModelConfig* conf,
GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
const std::vector<int>& parameterTypes = defaultParamTypes); const std::vector<int>& parameterTypes = defaultParamTypes);
/** /**
...@@ -701,7 +721,8 @@ public: ...@@ -701,7 +721,8 @@ public:
/** /**
* Combine forward/backward * Combine forward/backward
*/ */
void forwardBackward(const Arguments& inArgs, Arguments* outArgs, void forwardBackward(const Arguments& inArgs,
Arguments* outArgs,
PassType passType, PassType passType,
const UpdateCallback& callback = UpdateCallback()); const UpdateCallback& callback = UpdateCallback());
...@@ -722,14 +743,17 @@ public: ...@@ -722,14 +743,17 @@ public:
*/ */
SequenceGenerator* asSequenceGenerator( SequenceGenerator* asSequenceGenerator(
const std::vector<std::string>& dict = std::vector<std::string>(), const std::vector<std::string>& dict = std::vector<std::string>(),
size_t begin_id = 0UL, size_t end_id = 0UL, size_t max_length = 100UL, size_t begin_id = 0UL,
size_t end_id = 0UL,
size_t max_length = 100UL,
size_t beam_size = -1UL); size_t beam_size = -1UL);
private: private:
GradientMachinePrivate* m; GradientMachinePrivate* m;
static GradientMachine* createFromPaddleModelPtr( static GradientMachine* createFromPaddleModelPtr(
const void* confPtr, GradientMatchineCreateMode mode, const void* confPtr,
GradientMatchineCreateMode mode,
const std::vector<int>& types); const std::vector<int>& types);
// Not to use c++ 11 init-list, so we use static var as function default arg. // Not to use c++ 11 init-list, so we use static var as function default arg.
...@@ -751,8 +775,8 @@ public: ...@@ -751,8 +775,8 @@ public:
/// Create A Trainer By TrainerConfig. using paddle command line. /// Create A Trainer By TrainerConfig. using paddle command line.
static Trainer* createByCommandLine() throw(IOError); static Trainer* createByCommandLine() throw(IOError);
static Trainer* create(TrainerConfig* optConfig, GradientMachine* gm) static Trainer* create(TrainerConfig* optConfig,
throw(IOError); GradientMachine* gm) throw(IOError);
/// Start training /// Start training
void startTrain(); void startTrain();
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/parameter/Parameter.h" #include "paddle/parameter/Parameter.h"
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
#include "paddle/parameter/ParameterOptimizer.h" #include "paddle/parameter/ParameterOptimizer.h"
...@@ -32,17 +31,21 @@ struct ParameterTraverseCallbackPrivate { ...@@ -32,17 +31,21 @@ struct ParameterTraverseCallbackPrivate {
const paddle::ParameterOptimizer::TraverseCallback& callback) const paddle::ParameterOptimizer::TraverseCallback& callback)
: callback(callback) {} : callback(callback) {}
void apply(const std::vector<Vector*>& vecs, const ParameterConfig& conf, void apply(const std::vector<Vector*>& vecs,
const ParameterConfig& conf,
size_t sparseId) { size_t sparseId) {
std::vector<paddle::VectorPtr> real_vecs; std::vector<paddle::VectorPtr> real_vecs;
real_vecs.resize(vecs.size()); real_vecs.resize(vecs.size());
std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) { std::transform(vecs.begin(),
if (v) { vecs.end(),
return *(paddle::VectorPtr*)(v->getSharedPtr()); real_vecs.begin(),
} else { [](Vector* v) {
return paddle::VectorPtr(); if (v) {
} return *(paddle::VectorPtr*)(v->getSharedPtr());
}); } else {
return paddle::VectorPtr();
}
});
paddle::ParameterConfig& real_conf = paddle::ParameterConfig& real_conf =
*(paddle::ParameterConfig*)(const_cast<ParameterConfig&>(conf) *(paddle::ParameterConfig*)(const_cast<ParameterConfig&>(conf)
...@@ -86,10 +89,12 @@ void ParameterOptimizer::startBatch(size_t numSamplesProcessed) { ...@@ -86,10 +89,12 @@ void ParameterOptimizer::startBatch(size_t numSamplesProcessed) {
void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); } void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); }
void ParameterOptimizer::update(const std::vector<Vector*>& vecs, void ParameterOptimizer::update(const std::vector<Vector*>& vecs,
const ParameterConfig& conf, size_t sparseId) { const ParameterConfig& conf,
ParameterTraverseCallbackPrivate invoker([&]( size_t sparseId) {
const paddle::VectorPtr _vecs[], const paddle::ParameterConfig& config, ParameterTraverseCallbackPrivate invoker(
size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); }); [&](const paddle::VectorPtr _vecs[],
const paddle::ParameterConfig& config,
size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); });
invoker.apply(vecs, conf, sparseId); invoker.apply(vecs, conf, sparseId);
} }
...@@ -116,8 +121,9 @@ void ParameterTraverseCallback::apply(const std::vector<Vector*>& vecs, ...@@ -116,8 +121,9 @@ void ParameterTraverseCallback::apply(const std::vector<Vector*>& vecs,
ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal( ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal(
const ParameterConfig& config) const { const ParameterConfig& config) const {
auto& param_config = *(paddle::ParameterConfig*)const_cast<ParameterConfig&>( auto& param_config =
config).getRawPtr(); *(paddle::ParameterConfig*)const_cast<ParameterConfig&>(config)
.getRawPtr();
auto callback = m->optimizer->needSpecialTraversal(param_config); auto callback = m->optimizer->needSpecialTraversal(param_config);
if (callback) { if (callback) {
auto retCallback = new ParameterTraverseCallback(); auto retCallback = new ParameterTraverseCallback();
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/gserver/gradientmachines/GradientMachine.h" #include "paddle/gserver/gradientmachines/GradientMachine.h"
#include "paddle/parameter/Argument.h" #include "paddle/parameter/Argument.h"
...@@ -42,8 +41,10 @@ struct Path { ...@@ -42,8 +41,10 @@ struct Path {
// position // position
static void findNBest(paddle::GradientMachine* gradMachine, static void findNBest(paddle::GradientMachine* gradMachine,
std::vector<paddle::Argument>& inArgs, std::vector<paddle::Argument>& inArgs,
std::vector<Path>& finalPaths, size_t bos_id, std::vector<Path>& finalPaths,
size_t eos_id, size_t max_length) { size_t bos_id,
size_t eos_id,
size_t max_length) {
std::vector<Path> paths; std::vector<Path> paths;
Path emptyPath; Path emptyPath;
paths.push_back(emptyPath); paths.push_back(emptyPath);
...@@ -166,7 +167,8 @@ public: ...@@ -166,7 +167,8 @@ public:
if (id < getSize()) { if (id < getSize()) {
Path& p = (*path_)[id]; Path& p = (*path_)[id];
std::ostringstream sout; std::ostringstream sout;
std::transform(p.ids.begin(), p.ids.end(), std::transform(p.ids.begin(),
p.ids.end(),
std::ostream_iterator<std::string>(sout, split ? " " : ""), std::ostream_iterator<std::string>(sout, split ? " " : ""),
[&](int id) { return (*dict_)[id]; }); [&](int id) { return (*dict_)[id]; });
return sout.str(); return sout.str();
......
...@@ -64,12 +64,11 @@ Trainer* Trainer::createByCommandLine() throw(IOError) { ...@@ -64,12 +64,11 @@ Trainer* Trainer::createByCommandLine() throw(IOError) {
Trainer::Trainer(TrainerConfig* config, GradientMachine* gm) Trainer::Trainer(TrainerConfig* config, GradientMachine* gm)
: m(new TrainerPrivate()) { : m(new TrainerPrivate()) {
m->init(config->m->conf, /* testing= */false, gm ? gm->m->machine : nullptr); m->init(config->m->conf, /* testing= */ false, gm ? gm->m->machine : nullptr);
} }
Trainer* Trainer::create(TrainerConfig* config, GradientMachine* gm) Trainer* Trainer::create(TrainerConfig* config,
throw(IOError) GradientMachine* gm) throw(IOError) {
{
auto retv = new Trainer(config, gm); auto retv = new Trainer(config, gm);
if (retv->m->getConfig().IsInitialized()) { if (retv->m->getConfig().IsInitialized()) {
return retv; return retv;
...@@ -134,15 +133,17 @@ void Trainer::finishTestPeriod() { m->finishTestPeriod(); } ...@@ -134,15 +133,17 @@ void Trainer::finishTestPeriod() { m->finishTestPeriod(); }
Matrix* Trainer::getLayerOutput(const std::string& layerName) { Matrix* Trainer::getLayerOutput(const std::string& layerName) {
auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>( auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>(
this->m->getGradientMachine()); this->m->getGradientMachine());
CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork";
auto m = nn->getLayerOutput(layerName); auto m = nn->getLayerOutput(layerName);
return Matrix::createByPaddleMatrixPtr(&m); return Matrix::createByPaddleMatrixPtr(&m);
} }
void Trainer::forwardOneBatch(size_t batchSize) { m->forwardOneBatch(batchSize); } void Trainer::forwardOneBatch(size_t batchSize) {
m->forwardOneBatch(batchSize);
}
bool TrainerPrivate::forwardOneBatch(size_t batchSize) { bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
CHECK(dataProvider_) << "data_provider is not specified"; CHECK(dataProvider_) << "data_provider is not specified";
paddle::DataBatch dataBatch; paddle::DataBatch dataBatch;
int num = dataProvider_->getNextBatch(batchSize, &dataBatch); int num = dataProvider_->getNextBatch(batchSize, &dataBatch);
...@@ -156,7 +157,6 @@ bool TrainerPrivate::forwardOneBatch(size_t batchSize) { ...@@ -156,7 +157,6 @@ bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
void TrainerPrivate::forwardOneDataBatch( void TrainerPrivate::forwardOneDataBatch(
const std::vector<paddle::Argument>& inArgs) { const std::vector<paddle::Argument>& inArgs) {
std::vector<paddle::Argument>& outArgs = forwardOutput_; std::vector<paddle::Argument>& outArgs = forwardOutput_;
if (config_->getOptConfig().use_sparse_remote_updater()) { if (config_->getOptConfig().use_sparse_remote_updater()) {
......
...@@ -37,13 +37,15 @@ FloatArray::FloatArray(const float* b, const size_t l) ...@@ -37,13 +37,15 @@ FloatArray::FloatArray(const float* b, const size_t l)
IntArray::IntArray(const int* b, const size_t l, bool f) IntArray::IntArray(const int* b, const size_t l, bool f)
: buf(b), length(l), needFree(f) {} : buf(b), length(l), needFree(f) {}
IntWithFloatArray::IntWithFloatArray(const float* v, const int* i, size_t l, IntWithFloatArray::IntWithFloatArray(const float* v,
const int* i,
size_t l,
bool f) bool f)
: valBuf(v), idxBuf(i), length(l), needFree(f) {} : valBuf(v), idxBuf(i), length(l), needFree(f) {}
bool isUsingGpu() {return FLAGS_use_gpu;} bool isUsingGpu() { return FLAGS_use_gpu; }
void setUseGpu(bool useGpu) {FLAGS_use_gpu = useGpu;} void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
bool isGpuVersion() { bool isGpuVersion() {
#ifdef PADDLE_ONLY_CPU #ifdef PADDLE_ONLY_CPU
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
...@@ -39,8 +38,10 @@ IVector* IVector::create(const std::vector<int>& data, bool useGpu) { ...@@ -39,8 +38,10 @@ IVector* IVector::create(const std::vector<int>& data, bool useGpu) {
return v; return v;
} }
IVector* IVector::createVectorFromNumpy(int* data, int dim, bool copy, IVector* IVector::createVectorFromNumpy(int* data,
bool useGpu) throw (UnsupportError){ int dim,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) { if (useGpu) {
/// if use gpu only copy=true is supported /// if use gpu only copy=true is supported
if (!copy) { if (!copy) {
...@@ -137,8 +138,8 @@ void IVector::copyToNumpyArray(int** view_m_data, int* dim1) { ...@@ -137,8 +138,8 @@ void IVector::copyToNumpyArray(int** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast<paddle::CpuIVector*>(m->vec.get())) { if (auto cpuVec = dynamic_cast<paddle::CpuIVector*>(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1)); std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1));
} else if (auto gpuVec = dynamic_cast<paddle::GpuIVector*>(m->vec.get())) { } else if (auto gpuVec = dynamic_cast<paddle::GpuIVector*>(m->vec.get())) {
hl_memcpy_device2host(*view_m_data, gpuVec->getData(), hl_memcpy_device2host(
sizeof(int) * (*dim1)); *view_m_data, gpuVec->getData(), sizeof(int) * (*dim1));
} else { } else {
LOG(INFO) << "Unexpected situation"; LOG(INFO) << "Unexpected situation";
} }
...@@ -201,8 +202,10 @@ Vector* Vector::createByPaddleVectorPtr(void* ptr) { ...@@ -201,8 +202,10 @@ Vector* Vector::createByPaddleVectorPtr(void* ptr) {
} }
} }
Vector* Vector::createVectorFromNumpy(float* data, int dim, bool copy, Vector* Vector::createVectorFromNumpy(float* data,
bool useGpu) throw (UnsupportError){ int dim,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) { if (useGpu) {
/// if use gpu only copy=True is supported /// if use gpu only copy=True is supported
if (!copy) { if (!copy) {
...@@ -251,8 +254,8 @@ void Vector::copyToNumpyArray(float** view_m_data, int* dim1) { ...@@ -251,8 +254,8 @@ void Vector::copyToNumpyArray(float** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) { if (auto cpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1)); std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1));
} else if (auto gpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) { } else if (auto gpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
hl_memcpy_device2host(*view_m_data, gpuVec->getData(), hl_memcpy_device2host(
sizeof(float) * (*dim1)); *view_m_data, gpuVec->getData(), sizeof(float) * (*dim1));
} else { } else {
LOG(INFO) << "Unexpected situation"; LOG(INFO) << "Unexpected situation";
} }
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_ACTIVATION_FUNCTIONS_H_ #ifndef HL_ACTIVATION_FUNCTIONS_H_
#define HL_ACTIVATION_FUNCTIONS_H_ #define HL_ACTIVATION_FUNCTIONS_H_
...@@ -21,11 +20,8 @@ limitations under the License. */ ...@@ -21,11 +20,8 @@ limitations under the License. */
/** /**
* Active functions: sigmoid, relu, tanh and linear. * Active functions: sigmoid, relu, tanh and linear.
*/ */
#define HPPL_ACTIVE_FUNCTION {hppl::sigmoid, \ #define HPPL_ACTIVE_FUNCTION \
hppl::relu, \ { hppl::sigmoid, hppl::relu, hppl::tanh, hppl::linear }
hppl::tanh, \
hppl::linear \
}
namespace hppl { namespace hppl {
...@@ -42,18 +38,18 @@ public: ...@@ -42,18 +38,18 @@ public:
#ifdef __NVCC__ #ifdef __NVCC__
namespace gpu { namespace gpu {
static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} }
#else #else
namespace cpu { namespace cpu {
static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} }
#ifdef __AVX__ #ifdef __AVX__
namespace avx { namespace avx {
static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} }
#endif #endif
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_AGGREGATE_H_ #ifndef HL_AGGREGATE_H_
#define HL_AGGREGATE_H_ #define HL_AGGREGATE_H_
......
...@@ -12,22 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,22 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_AVX_FUNCTIONS_H_ #ifndef HL_AVX_FUNCTIONS_H_
#define HL_AVX_FUNCTIONS_H_ #define HL_AVX_FUNCTIONS_H_
#include <immintrin.h> #include <immintrin.h>
namespace hppl { namespace hppl {
__m256 relu(const __m256 a); __m256 relu(const __m256 a);
__m256 sigmoid(const __m256 a); __m256 sigmoid(const __m256 a);
__m256 tanh(const __m256 a); __m256 tanh(const __m256 a);
__m256 linear(const __m256 a); __m256 linear(const __m256 a);
__m256 relu(const __m256 a, const __m256 b); __m256 relu(const __m256 a, const __m256 b);
__m256 sigmoid(const __m256 a, const __m256 b); __m256 sigmoid(const __m256 a, const __m256 b);
__m256 tanh(const __m256 a, const __m256 b); __m256 tanh(const __m256 a, const __m256 b);
__m256 linear(const __m256 a, const __m256 b); __m256 linear(const __m256 a, const __m256 b);
} // namespace hppl } // namespace hppl
#endif // HL_AVX_FUNCTIONS_H_ #endif // HL_AVX_FUNCTIONS_H_
...@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_BASE_H_ #ifndef HL_BASE_H_
#define HL_BASE_H_ #define HL_BASE_H_
...@@ -33,36 +31,36 @@ limitations under the License. */ ...@@ -33,36 +31,36 @@ limitations under the License. */
* HPPL_STREAM_DEFAULT is HPPL default stream. * HPPL_STREAM_DEFAULT is HPPL default stream.
*/ */
typedef enum { typedef enum {
HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/ HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/
HPPL_STREAM_1 = 1, HPPL_STREAM_1 = 1,
HPPL_STREAM_2 = 2, HPPL_STREAM_2 = 2,
HPPL_STREAM_3 = 3, HPPL_STREAM_3 = 3,
HPPL_STREAM_4 = 4, HPPL_STREAM_4 = 4,
HPPL_THREAD_STREAM_1 = 5, HPPL_THREAD_STREAM_1 = 5,
HPPL_THREAD_STREAM_2 = 6, HPPL_THREAD_STREAM_2 = 6,
HPPL_THREAD_STREAM_3 = 7, HPPL_THREAD_STREAM_3 = 7,
HPPL_THREAD_STREAM_4 = 8, HPPL_THREAD_STREAM_4 = 8,
HPPL_STREAM_END HPPL_STREAM_END
} hl_stream_t; } hl_stream_t;
/** /**
* @brief HPPL activation mode. * @brief HPPL activation mode.
*/ */
typedef enum { typedef enum {
HL_ACTIVATION_SIGMOID = 0, HL_ACTIVATION_SIGMOID = 0,
HL_ACTIVATION_RELU = 1, HL_ACTIVATION_RELU = 1,
HL_ACTIVATION_TANH = 2, HL_ACTIVATION_TANH = 2,
HL_ACTIVATION_LINEAR = 3, HL_ACTIVATION_LINEAR = 3,
HL_ACTIVATION_END HL_ACTIVATION_END
} hl_activation_mode_t; } hl_activation_mode_t;
/** /**
* @brief Transpose type. * @brief Transpose type.
*/ */
typedef enum { typedef enum {
HPPL_OP_N = 0, /* transpose */ HPPL_OP_N = 0, /* transpose */
HPPL_OP_T = 1, /* non transpose */ HPPL_OP_T = 1, /* non transpose */
HPPL_OP_END HPPL_OP_END
} hl_trans_op_t; } hl_trans_op_t;
/** /**
...@@ -148,23 +146,21 @@ typedef struct { ...@@ -148,23 +146,21 @@ typedef struct {
* @brief Sparse matrix value type. * @brief Sparse matrix value type.
*/ */
typedef enum { typedef enum {
HL_NO_VALUE = 0, /* matrix values only 0 or 1 */ HL_NO_VALUE = 0, /* matrix values only 0 or 1 */
HL_FLOAT_VALUE = 1, HL_FLOAT_VALUE = 1,
HL_VALUE_END HL_VALUE_END
} hl_matrix_value_t; } hl_matrix_value_t;
/** /**
* @brief HPPL matrix format. * @brief HPPL matrix format.
*/ */
typedef enum { typedef enum {
HL_SPARSE_CSR = 0, HL_SPARSE_CSR = 0,
HL_SPARSE_CSC = 1, HL_SPARSE_CSC = 1,
HL_SPARSE_END HL_SPARSE_END
} hl_matrix_format_t; } hl_matrix_format_t;
typedef struct _hl_matrix_s *hl_matrix_s;
typedef struct _hl_matrix_s * hl_matrix_s;
/** /**
* @brief HPPL sparse matrix. * @brief HPPL sparse matrix.
...@@ -177,12 +173,12 @@ typedef struct _hl_matrix_s * hl_matrix_s; ...@@ -177,12 +173,12 @@ typedef struct _hl_matrix_s * hl_matrix_s;
* @param nnz nonzero values of sparse matrix. * @param nnz nonzero values of sparse matrix.
*/ */
typedef struct { typedef struct {
hl_matrix_s matrix; hl_matrix_s matrix;
hl_matrix_format_t format; hl_matrix_format_t format;
hl_matrix_value_t type; hl_matrix_value_t type;
int rows; int rows;
int cols; int cols;
size_t nnz; size_t nnz;
} _hl_sparse_matrix_s, *hl_sparse_matrix_s; } _hl_sparse_matrix_s, *hl_sparse_matrix_s;
#ifndef PADDLE_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
...@@ -195,7 +191,7 @@ typedef struct { ...@@ -195,7 +191,7 @@ typedef struct {
* *
* HL_FLOAT_MIN: 1.17549435e-38F * HL_FLOAT_MIN: 1.17549435e-38F
*/ */
#define HL_FLOAT_MAX 3.40282347e+38F #define HL_FLOAT_MAX 3.40282347e+38F
/** /**
* if real == double * if real == double
* *
...@@ -203,20 +199,18 @@ typedef struct { ...@@ -203,20 +199,18 @@ typedef struct {
* *
* HL_FLOAT_MIN: 2.2250738585072014e-308 * HL_FLOAT_MIN: 2.2250738585072014e-308
*/ */
#define HL_FLOAT_MIN 1.17549435e-38F #define HL_FLOAT_MIN 1.17549435e-38F
#else #else
#define HL_FLOAT_MAX 1.7976931348623157e+308 #define HL_FLOAT_MAX 1.7976931348623157e+308
#define HL_FLOAT_MIN 2.2250738585072014e-308 #define HL_FLOAT_MIN 2.2250738585072014e-308
#endif #endif
/** /**
* The maximum input value for exp, used to avoid overflow problem. * The maximum input value for exp, used to avoid overflow problem.
* *
* Currently only used for tanh function. * Currently only used for tanh function.
*/ */
#define EXP_MAX_INPUT 40.0 #define EXP_MAX_INPUT 40.0
/** /**
* @brief DIVUP(x, y) is similar to ceil(x / y). * @brief DIVUP(x, y) is similar to ceil(x / y).
...@@ -224,7 +218,7 @@ typedef struct { ...@@ -224,7 +218,7 @@ typedef struct {
* the size of blockDim. * the size of blockDim.
*/ */
#ifndef DIVUP #ifndef DIVUP
#define DIVUP(x, y) (((x) + (y) - 1) / (y)) #define DIVUP(x, y) (((x) + (y)-1) / (y))
#endif #endif
#ifdef __NVCC__ #ifdef __NVCC__
...@@ -233,7 +227,7 @@ typedef struct { ...@@ -233,7 +227,7 @@ typedef struct {
#include "hl_cuda.h" #include "hl_cuda.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
extern __thread bool g_sync_flag; extern __thread bool g_sync_flag;
extern __thread cudaStream_t default_stream; extern __thread cudaStream_t default_stream;
#define STREAM_DEFAULT default_stream #define STREAM_DEFAULT default_stream
...@@ -241,16 +235,15 @@ extern __thread cudaStream_t default_stream; ...@@ -241,16 +235,15 @@ extern __thread cudaStream_t default_stream;
* @brief Check cuda kernel execution. * @brief Check cuda kernel execution.
* @param msg error string * @param msg error string
*/ */
#define CHECK_SYNC(msg) \ #define CHECK_SYNC(msg) \
if (true == g_sync_flag) { \ if (true == g_sync_flag) { \
hl_stream_synchronize(HPPL_STREAM_DEFAULT); \ hl_stream_synchronize(HPPL_STREAM_DEFAULT); \
cudaError_t err \ cudaError_t err = (cudaError_t)hl_get_device_last_error(); \
= (cudaError_t)hl_get_device_last_error(); \ CHECK_EQ(cudaSuccess, err) \
CHECK_EQ(cudaSuccess, err) << "[" << msg << "] " \ << "[" << msg << "] " \
<< "CUDA error: " \ << "CUDA error: " << hl_get_device_error_string((size_t)err); \
<< hl_get_device_error_string((size_t)err); \
} }
#endif /* __NVCC__ */ #endif /* __NVCC__ */
#endif /* HL_BASE_H_ */ #endif /* HL_BASE_H_ */
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_BATCH_TRANSPOSE_H_ #ifndef HL_BATCH_TRANSPOSE_H_
#define HL_BATCH_TRANSPOSE_H_ #define HL_BATCH_TRANSPOSE_H_
...@@ -31,10 +30,7 @@ limitations under the License. */ ...@@ -31,10 +30,7 @@ limitations under the License. */
* order. Each batch has height * width data, which are * order. Each batch has height * width data, which are
* arranged in height-first (or row-first) manner. * arranged in height-first (or row-first) manner.
*/ */
extern void batchTranspose(const real* input, extern void batchTranspose(
real* output, const real* input, real* output, int width, int height, int batchSize);
int width,
int height,
int batchSize);
#endif // HL_BATCH_TRANSPOSE_H_ #endif // HL_BATCH_TRANSPOSE_H_
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CNN_H_ #ifndef HL_CNN_H_
#define HL_CNN_H_ #define HL_CNN_H_
...@@ -37,15 +36,21 @@ limitations under the License. */ ...@@ -37,15 +36,21 @@ limitations under the License. */
* @param[in] alpha * @param[in] alpha
* @param[in] beta * @param[in] beta
*/ */
extern void hl_shrink_col2feature( extern void hl_shrink_col2feature(const real* dataCol,
const real * dataCol, size_t channels, size_t channels,
size_t height, size_t width, size_t height,
size_t blockH, size_t blockW, size_t width,
size_t strideH, size_t strideW, size_t blockH,
size_t paddingH, size_t paddingW, size_t blockW,
size_t outputH, size_t outputW, size_t strideH,
real* dataIm, size_t strideW,
real alpha = 1.0f, real beta = 0.0f); size_t paddingH,
size_t paddingW,
size_t outputH,
size_t outputW,
real* dataIm,
real alpha = 1.0f,
real beta = 0.0f);
/** /**
* @brief Expand feature to column. * @brief Expand feature to column.
...@@ -65,14 +70,19 @@ extern void hl_shrink_col2feature( ...@@ -65,14 +70,19 @@ extern void hl_shrink_col2feature(
* @param[out] dataCol expand data. * @param[out] dataCol expand data.
* *
*/ */
extern void hl_expand_feature2col( extern void hl_expand_feature2col(const real* dataIm,
const real* dataIm, size_t channels, size_t channels,
size_t height, size_t width, size_t height,
size_t blockH, size_t blockW, size_t width,
size_t strideH, size_t strideW, size_t blockH,
size_t paddingH, size_t paddingW, size_t blockW,
size_t outputH, size_t outputW, size_t strideH,
real* dataCol); size_t strideW,
size_t paddingH,
size_t paddingW,
size_t outputH,
size_t outputW,
real* dataCol);
/** /**
* @brief Maximum pool forward. * @brief Maximum pool forward.
...@@ -94,15 +104,21 @@ extern void hl_expand_feature2col( ...@@ -94,15 +104,21 @@ extern void hl_expand_feature2col(
* @param[in] tgtStride stride between output data samples. * @param[in] tgtStride stride between output data samples.
* *
*/ */
extern void hl_maxpool_forward( extern void hl_maxpool_forward(const int frameCnt,
const int frameCnt, const real* inputData, const real* inputData,
const int channels, const int channels,
const int height, const int width, const int height,
const int pooledH, const int pooledW, const int width,
const int sizeX, const int sizeY, const int pooledH,
const int strideH, const int strideW, const int pooledW,
const int paddingH, const int paddingW, const int sizeX,
real* tgtData, const int tgtStride); const int sizeY,
const int strideH,
const int strideW,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
...@@ -125,20 +141,28 @@ extern void hl_maxpool_forward( ...@@ -125,20 +141,28 @@ extern void hl_maxpool_forward(
* @param[in] paddingH padding height. * @param[in] paddingH padding height.
* @param[in] paddingW padding width. * @param[in] paddingW padding width.
* @param[out] targetGrad output grad. * @param[out] targetGrad output grad.
* @param[in] outStride stride between output data samples. * @param[in] outStride stride between output data samples.
* *
*/ */
extern void hl_maxpool_backward( extern void hl_maxpool_backward(const int frameCnt,
const int frameCnt, const real* inputData, const real* inputData,
const real* outData, const real* outGrad, const real* outData,
const int channels, const int height, const real* outGrad,
const int width, const int channels,
const int pooledH, const int pooledW, const int height,
const int sizeX, const int sizeY, const int width,
const int strideH, const int strideW, const int pooledH,
const int paddingH, const int paddingW, const int pooledW,
real scaleA, real scaleB, const int sizeX,
real* targetGrad, const int outStride); const int sizeY,
const int strideH,
const int strideW,
const int paddingH,
const int paddingW,
real scaleA,
real scaleB,
real* targetGrad,
const int outStride);
/** /**
* @brief Averge pool forward. * @brief Averge pool forward.
...@@ -160,15 +184,21 @@ extern void hl_maxpool_backward( ...@@ -160,15 +184,21 @@ extern void hl_maxpool_backward(
* @param[in] tgtStride stride between output data samples. * @param[in] tgtStride stride between output data samples.
* *
*/ */
extern void hl_avgpool_forward( extern void hl_avgpool_forward(const int frameCnt,
const int frameCnt, const real* inputData, const real* inputData,
const int channels, const int channels,
const int height, const int width, const int height,
const int pooledH, const int pooledW, const int width,
const int sizeX, const int sizeY, const int pooledH,
const int strideH, const int strideW, const int pooledW,
const int paddingH, const int paddingW, const int sizeX,
real* tgtData, const int tgtStride); const int sizeY,
const int strideH,
const int strideW,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
...@@ -189,19 +219,26 @@ extern void hl_avgpool_forward( ...@@ -189,19 +219,26 @@ extern void hl_avgpool_forward(
* @param[in] scaleA scale. * @param[in] scaleA scale.
* @param[in] scaleB scale. * @param[in] scaleB scale.
* @param[out] backGrad output grad. * @param[out] backGrad output grad.
* @param[in] outStride stride between output data samples. * @param[in] outStride stride between output data samples.
* *
*/ */
extern void hl_avgpool_backward( extern void hl_avgpool_backward(const int frameCnt,
const int frameCnt, const real* outGrad, const real* outGrad,
const int channels, const int height, const int channels,
const int width, const int height,
const int pooledH, const int pooledW, const int width,
const int sizeX, const int sizeY, const int pooledH,
const int strideH, const int strideW, const int pooledW,
int paddingH, int paddingW, const int sizeX,
real scaleA, real scaleB, const int sizeY,
real* backGrad, const int outStride); const int strideH,
const int strideW,
int paddingH,
int paddingW,
real scaleA,
real scaleB,
real* backGrad,
const int outStride);
/** /**
* @brief Cross-map-respose normalize forward. * @brief Cross-map-respose normalize forward.
...@@ -218,10 +255,16 @@ extern void hl_avgpool_backward( ...@@ -218,10 +255,16 @@ extern void hl_avgpool_backward(
* @param[in] beta scale. * @param[in] beta scale.
* *
*/ */
extern void hl_CMRNorm_forward( extern void hl_CMRNorm_forward(size_t frameCnt,
size_t frameCnt, const real* in, real* scale, real* out, const real* in,
size_t channels, size_t height, size_t width, size_t sizeX, real* scale,
real alpha, real beta); real* out,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta);
/** /**
* @brief Cross-map-respose normalize backward. * @brief Cross-map-respose normalize backward.
...@@ -240,11 +283,18 @@ extern void hl_CMRNorm_forward( ...@@ -240,11 +283,18 @@ extern void hl_CMRNorm_forward(
* @param[in] beta scale. * @param[in] beta scale.
* *
*/ */
extern void hl_CMRNorm_backward( extern void hl_CMRNorm_backward(size_t frameCnt,
size_t frameCnt, const real* inV, const real* scale, const real* inV,
const real* outV, const real* outDiff, real *inDiff, const real* scale,
size_t channels, size_t height, size_t width, size_t sizeX, const real* outV,
real alpha, real beta); const real* outDiff,
real* inDiff,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta);
/** /**
* @brief Bilinear interpolation forward. * @brief Bilinear interpolation forward.
...@@ -278,24 +328,24 @@ extern void hl_bilinear_forward(const real* inData, ...@@ -278,24 +328,24 @@ extern void hl_bilinear_forward(const real* inData,
const real ratioH, const real ratioH,
const real ratioW); const real ratioW);
/** /**
* @brief Bilinear interpolation backward. * @brief Bilinear interpolation backward.
* *
* @param[out] inGrad input gradient. * @param[out] inGrad input gradient.
* @param[in] inImgH input image height. * @param[in] inImgH input image height.
* @param[in] inImgW input image width. * @param[in] inImgW input image width.
* @param[in] inputH input batchSize. * @param[in] inputH input batchSize.
* @param[in] inputW input image data dim. * @param[in] inputW input image data dim.
* @param[in] outGrad output gradient. * @param[in] outGrad output gradient.
* @param[in] outImgH output image height. * @param[in] outImgH output image height.
* @param[in] outImgW output image width. * @param[in] outImgW output image width.
* @param[in] outputH output batchSize. * @param[in] outputH output batchSize.
* @param[in] outputW output image data dim. * @param[in] outputW output image data dim.
* @param[in] numChannels number of channels. * @param[in] numChannels number of channels.
* @param[in] ratioH inImgH / outImgH. * @param[in] ratioH inImgH / outImgH.
* @param[in] ratioW inImgW / outImgW. * @param[in] ratioW inImgW / outImgW.
* *
*/ */
extern void hl_bilinear_backward(real* inGrad, extern void hl_bilinear_backward(real* inGrad,
const size_t inImgH, const size_t inImgH,
const size_t inImgW, const size_t inImgW,
...@@ -321,9 +371,13 @@ extern void hl_bilinear_backward(real* inGrad, ...@@ -321,9 +371,13 @@ extern void hl_bilinear_backward(real* inGrad,
* @param[in] featLen feature length = image height * image width. * @param[in] featLen feature length = image height * image width.
* @param[in] groups number of groups. * @param[in] groups number of groups.
*/ */
extern void hl_maxout_forward( extern void hl_maxout_forward(const real* inData,
const real* inData, real* outData, int* idData, real* outData,
size_t batchSize, size_t size, size_t featLen, size_t groups); int* idData,
size_t batchSize,
size_t size,
size_t featLen,
size_t groups);
/** /**
* @brief MaxOut backward. * @brief MaxOut backward.
...@@ -336,8 +390,12 @@ extern void hl_maxout_forward( ...@@ -336,8 +390,12 @@ extern void hl_maxout_forward(
* @param[in] featLen feature length = image height * image width. * @param[in] featLen feature length = image height * image width.
* @param[in] groups number of groups. * @param[in] groups number of groups.
*/ */
extern void hl_maxout_backward( extern void hl_maxout_backward(real* inGrad,
real* inGrad, const real* outGrad, const int* idData, const real* outGrad,
size_t batchSize, size_t size, size_t featLen, size_t groups); const int* idData,
size_t batchSize,
size_t size,
size_t featLen,
size_t groups);
#endif /* HL_CNN_H_ */ #endif /* HL_CNN_H_ */
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_H_ #ifndef HL_CUDA_H_
#define HL_CUDA_H_ #define HL_CUDA_H_
...@@ -22,8 +21,7 @@ limitations under the License. */ ...@@ -22,8 +21,7 @@ limitations under the License. */
/** /**
* @brief HPPL event. * @brief HPPL event.
*/ */
typedef struct _hl_event_st * hl_event_t; typedef struct _hl_event_st *hl_event_t;
/** /**
* @brief return cuda runtime api version. * @brief return cuda runtime api version.
...@@ -42,7 +40,7 @@ extern void hl_start(); ...@@ -42,7 +40,7 @@ extern void hl_start();
* if device is NULL, will start all GPU. * if device is NULL, will start all GPU.
* @param[in] number number of devices. * @param[in] number number of devices.
*/ */
extern void hl_specify_devices_start(int* device, int number); extern void hl_specify_devices_start(int *device, int number);
/** /**
* @brief Queries if a device may directly access a peer device's memory. * @brief Queries if a device may directly access a peer device's memory.
...@@ -126,7 +124,7 @@ extern int hl_get_device(); ...@@ -126,7 +124,7 @@ extern int hl_get_device();
* *
* @return dest_d pointer to device memory. * @return dest_d pointer to device memory.
*/ */
extern void* hl_malloc_device(size_t size); extern void *hl_malloc_device(size_t size);
/** /**
* @brief Free device memory. * @brief Free device memory.
...@@ -143,7 +141,7 @@ extern void hl_free_mem_device(void *dest_d); ...@@ -143,7 +141,7 @@ extern void hl_free_mem_device(void *dest_d);
* *
* @return dest_h pointer to host memory. * @return dest_h pointer to host memory.
*/ */
extern void* hl_malloc_host(size_t size); extern void *hl_malloc_host(size_t size);
/** /**
* @brief Free host page-lock memory. * @brief Free host page-lock memory.
...@@ -228,9 +226,9 @@ extern void hl_srand(unsigned int seed); ...@@ -228,9 +226,9 @@ extern void hl_srand(unsigned int seed);
* @param[in] stream stream id. * @param[in] stream stream id.
*/ */
extern void hl_memcpy_async(void *dst, extern void hl_memcpy_async(void *dst,
void *src, void *src,
size_t size, size_t size,
hl_stream_t stream); hl_stream_t stream);
/** /**
* @brief Waits for stream tasks to complete. * @brief Waits for stream tasks to complete.
...@@ -261,8 +259,7 @@ extern void hl_destroy_event(hl_event_t event); ...@@ -261,8 +259,7 @@ extern void hl_destroy_event(hl_event_t event);
* *
* @return time Time between start and end in ms. * @return time Time between start and end in ms.
*/ */
extern float hl_event_elapsed_time(hl_event_t start, extern float hl_event_elapsed_time(hl_event_t start, hl_event_t end);
hl_event_t end);
/** /**
* @brief Records an event. * @brief Records an event.
...@@ -300,7 +297,7 @@ extern void hl_set_device_flags_block(); ...@@ -300,7 +297,7 @@ extern void hl_set_device_flags_block();
/** /**
* @brief Returns the last error string from a cuda runtime call. * @brief Returns the last error string from a cuda runtime call.
*/ */
extern const char* hl_get_device_error_string(); extern const char *hl_get_device_error_string();
/** /**
* @brief Returns the last error string from a cuda runtime call. * @brief Returns the last error string from a cuda runtime call.
...@@ -309,7 +306,7 @@ extern const char* hl_get_device_error_string(); ...@@ -309,7 +306,7 @@ extern const char* hl_get_device_error_string();
* *
* @see hl_get_device_last_error() * @see hl_get_device_last_error()
*/ */
extern const char* hl_get_device_error_string(size_t err); extern const char *hl_get_device_error_string(size_t err);
/** /**
* @brief Returns the last error number. * @brief Returns the last error number.
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_CUBLAS_H_ #ifndef HL_CUDA_CUBLAS_H_
#define HL_CUDA_CUBLAS_H_ #define HL_CUDA_CUBLAS_H_
...@@ -29,12 +28,8 @@ limitations under the License. */ ...@@ -29,12 +28,8 @@ limitations under the License. */
* @param[in] ldc the first dimension of C_d. * @param[in] ldc the first dimension of C_d.
* *
*/ */
extern void hl_matrix_transpose(real *A_d, extern void hl_matrix_transpose(
real *C_d, real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc);
int dimM,
int dimN,
int lda,
int ldc);
/* /*
* @brief Matrix transpose, while lda = dimN, ldc = dimM. * @brief Matrix transpose, while lda = dimN, ldc = dimM.
...@@ -45,10 +40,7 @@ extern void hl_matrix_transpose(real *A_d, ...@@ -45,10 +40,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
* *
*/ */
extern void hl_matrix_transpose(real *A_d, extern void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN);
real *C_d,
int dimM,
int dimN);
/* /*
* @brief Matrix inverse * @brief Matrix inverse
...@@ -60,11 +52,7 @@ extern void hl_matrix_transpose(real *A_d, ...@@ -60,11 +52,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] ldc the first dimension of C_d * @param[in] ldc the first dimension of C_d
* *
*/ */
extern void hl_matrix_inverse(real *A_d, extern void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc);
real *C_d,
int dimN,
int lda,
int ldc);
/** /**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
...@@ -84,12 +72,19 @@ extern void hl_matrix_inverse(real *A_d, ...@@ -84,12 +72,19 @@ extern void hl_matrix_inverse(real *A_d,
* @param[in] ldc the first dimension of C_d. * @param[in] ldc the first dimension of C_d.
* *
*/ */
extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa, extern void hl_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta, int dimN,
int lda, int ldb, int ldc); int dimK,
real alpha,
real beta,
int lda,
int ldb,
int ldc);
/** /**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
...@@ -106,11 +101,16 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa, ...@@ -106,11 +101,16 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
* @param[in] beta scalar used for multiplication. * @param[in] beta scalar used for multiplication.
* *
*/ */
extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa, extern void hl_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta); int dimN,
int dimK,
real alpha,
real beta);
/** /**
* @brief This function performs the matrix-vector multiplication. * @brief This function performs the matrix-vector multiplication.
...@@ -132,11 +132,17 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa, ...@@ -132,11 +132,17 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
* *
*/ */
extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, extern void hl_matrix_mul_vector(real *A_d,
real *B_d, real *C_d, hl_trans_op_t trans,
int dimM, int dimN, real *B_d,
real alpha, real beta, real *C_d,
int lda, int incb, int incc); int dimM,
int dimN,
real alpha,
real beta,
int lda,
int incb,
int incc);
/** /**
* @brief This function performs the matrix-vector multiplication. * @brief This function performs the matrix-vector multiplication.
...@@ -154,9 +160,13 @@ extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, ...@@ -154,9 +160,13 @@ extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
* @param[in] beta scalar used for multiplication. * @param[in] beta scalar used for multiplication.
* *
*/ */
extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, extern void hl_matrix_mul_vector(real *A_d,
real *B_d, real *C_d, hl_trans_op_t trans,
int dimM, int dimN, real *B_d,
real alpha, real beta); real *C_d,
int dimM,
int dimN,
real alpha,
real beta);
#endif /* HL_CUDA_CUBLAS_H_ */ #endif /* HL_CUDA_CUBLAS_H_ */
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_CUDNN_H_ #ifndef HL_CUDA_CUDNN_H_
#define HL_CUDA_CUDNN_H_ #define HL_CUDA_CUDNN_H_
...@@ -22,7 +21,7 @@ limitations under the License. */ ...@@ -22,7 +21,7 @@ limitations under the License. */
* hppl pooling mode * hppl pooling mode
*/ */
typedef enum { typedef enum {
HL_POOLING_MAX = 0, HL_POOLING_MAX = 0,
// average includes padded values // average includes padded values
HL_POOLING_AVERAGE = 1, HL_POOLING_AVERAGE = 1,
// average does not include padded values // average does not include padded values
...@@ -324,17 +323,16 @@ extern void hl_convolution_forward_add_bias(hl_tensor_descriptor bias, ...@@ -324,17 +323,16 @@ extern void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
* @param[in] sizeInBytes gpu workspace size (bytes). * @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdFilterAlgo backward filter algorithm. * @param[in] convBwdFilterAlgo backward filter algorithm.
*/ */
extern void hl_convolution_backward_filter( extern void hl_convolution_backward_filter(hl_tensor_descriptor input,
hl_tensor_descriptor input, real* input_data,
real* input_data, hl_tensor_descriptor output,
hl_tensor_descriptor output, real* output_grad_data,
real* output_grad_data, hl_filter_descriptor filter,
hl_filter_descriptor filter, real* filter_grad_data,
real* filter_grad_data, hl_convolution_descriptor conv,
hl_convolution_descriptor conv, void* gpuWorkSpace,
void* gpuWorkSpace, size_t sizeInBytes,
size_t sizeInBytes, int convBwdFilterAlgo);
int convBwdFilterAlgo);
/** /**
* @brief convolution backward data(calculate input image grad data). * @brief convolution backward data(calculate input image grad data).
...@@ -350,17 +348,16 @@ extern void hl_convolution_backward_filter( ...@@ -350,17 +348,16 @@ extern void hl_convolution_backward_filter(
* @param[in] sizeInBytes gpu workspace size (bytes). * @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdDataAlgo backward data algorithm. * @param[in] convBwdDataAlgo backward data algorithm.
*/ */
extern void hl_convolution_backward_data( extern void hl_convolution_backward_data(hl_tensor_descriptor input,
hl_tensor_descriptor input, real* input_data_grad,
real* input_data_grad, hl_tensor_descriptor output,
hl_tensor_descriptor output, real* output_grad_data,
real* output_grad_data, hl_filter_descriptor filter,
hl_filter_descriptor filter, real* filter_data,
real* filter_data, hl_convolution_descriptor conv,
hl_convolution_descriptor conv, void* gpuWorkSpace,
void* gpuWorkSpace, size_t sizeInBytes,
size_t sizeInBytes, int convBwdDataAlgo);
int convBwdDataAlgo);
/** /**
* @brief convolution backward bias(calculate bias grad data). * @brief convolution backward bias(calculate bias grad data).
...@@ -383,8 +380,8 @@ extern void hl_convolution_backward_bias(hl_tensor_descriptor bias, ...@@ -383,8 +380,8 @@ extern void hl_convolution_backward_bias(hl_tensor_descriptor bias,
* @param[in] height matrix height. * @param[in] height matrix height.
* @param[in] width matrix width. * @param[in] width matrix width.
*/ */
extern void hl_softmax_forward(real *input, extern void hl_softmax_forward(real* input,
real *output, real* output,
int height, int height,
int width); int width);
...@@ -396,8 +393,8 @@ extern void hl_softmax_forward(real *input, ...@@ -396,8 +393,8 @@ extern void hl_softmax_forward(real *input,
* @param[in] height matrix height. * @param[in] height matrix height.
* @param[in] width matrix width. * @param[in] width matrix width.
*/ */
extern void hl_softmax_backward(real *output_value, extern void hl_softmax_backward(real* output_value,
real *output_grad, real* output_grad,
int height, int height,
int width); int width);
...@@ -426,18 +423,18 @@ extern void hl_softmax_backward(real *output_value, ...@@ -426,18 +423,18 @@ extern void hl_softmax_backward(real *output_value,
* *
*/ */
extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outputDesc, hl_tensor_descriptor outputDesc,
real *output, real* output,
hl_tensor_descriptor bnParamDesc, hl_tensor_descriptor bnParamDesc,
real *scale, real* scale,
real *bias, real* bias,
double factor, double factor,
real *runningMean, real* runningMean,
real *runningInvVar, real* runningInvVar,
double epsilon, double epsilon,
real *savedMean, real* savedMean,
real *savedVar); real* savedVar);
/** /**
* @brief cudnn batch norm forward. * @brief cudnn batch norm forward.
...@@ -463,14 +460,14 @@ extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, ...@@ -463,14 +460,14 @@ extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
* *
*/ */
extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outputDesc, hl_tensor_descriptor outputDesc,
real *output, real* output,
hl_tensor_descriptor bnParamDesc, hl_tensor_descriptor bnParamDesc,
real *scale, real* scale,
real *bias, real* bias,
real *estimatedMean, real* estimatedMean,
real *estimatedVar, real* estimatedVar,
double epsilon); double epsilon);
/** /**
...@@ -483,7 +480,8 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, ...@@ -483,7 +480,8 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
* @param[in] inGradDesc input tensor descriptor desc. * @param[in] inGradDesc input tensor descriptor desc.
* @param[in] inGrad input data. * @param[in] inGrad input data.
* @param[in] dBnParamDesc tensor descriptor desc. * @param[in] dBnParamDesc tensor descriptor desc.
* bnScale, bnBias, running mean/var, save_mean/var. * bnScale, bnBias, running mean/var,
* save_mean/var.
* @param[in] scale batch normalization scale parameter (in original * @param[in] scale batch normalization scale parameter (in original
* paper scale is referred to as gamma). * paper scale is referred to as gamma).
* @param[in] scaleGrad batch normalization scale parameter (in original * @param[in] scaleGrad batch normalization scale parameter (in original
...@@ -497,17 +495,17 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, ...@@ -497,17 +495,17 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
* *
*/ */
extern void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, extern void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outGradDesc, hl_tensor_descriptor outGradDesc,
real *outGrad, real* outGrad,
hl_tensor_descriptor inGradDesc, hl_tensor_descriptor inGradDesc,
real *inGrad, real* inGrad,
hl_tensor_descriptor dBnParamDesc, hl_tensor_descriptor dBnParamDesc,
real *scale, real* scale,
real *scaleGrad, real* scaleGrad,
real *biasGrad, real* biasGrad,
double epsilon, double epsilon,
real *savedMean, real* savedMean,
real *savedInvVar); real* savedInvVar);
#endif // HL_CUDA_CUDNN_H_ #endif // HL_CUDA_CUDNN_H_
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_DSO_LOADER_H_ #ifndef HL_DSO_LOADER_H_
#define HL_DSO_LOADER_H_ #define HL_DSO_LOADER_H_
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_FUNCTIONS_H_ #ifndef HL_FUNCTIONS_H_
#define HL_FUNCTIONS_H_ #define HL_FUNCTIONS_H_
...@@ -21,30 +20,30 @@ limitations under the License. */ ...@@ -21,30 +20,30 @@ limitations under the License. */
/** /**
* sigmoid threshold maximum * sigmoid threshold maximum
*/ */
#define SIGMOID_THRESHOLD_MIN -40.0 #define SIGMOID_THRESHOLD_MIN -40.0
/** /**
* sigmoid threshold minimum * sigmoid threshold minimum
*/ */
#define SIGMOID_THRESHOLD_MAX 13.0 #define SIGMOID_THRESHOLD_MAX 13.0
#ifndef __NVCC__ #ifndef __NVCC__
namespace hppl { namespace hppl {
/* /*
* forward activation * forward activation
*/ */
real relu(const real a); real relu(const real a);
real sigmoid(const real a); real sigmoid(const real a);
real tanh(const real a); real tanh(const real a);
real linear(const real a); real linear(const real a);
/* /*
* backward activation * backward activation
*/ */
real relu(const real a, const real b); real relu(const real a, const real b);
real sigmoid(const real a, const real b); real sigmoid(const real a, const real b);
real tanh(const real a, const real b); real tanh(const real a, const real b);
real linear(const real a, const real b); real linear(const real a, const real b);
} // namespace hppl } // namespace hppl
#ifdef __AVX__ #ifdef __AVX__
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_GPU_H_ #ifndef HL_GPU_H_
#define HL_GPU_H_ #define HL_GPU_H_
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_LSTM_H_ #ifndef HL_LSTM_H_
#define HL_LSTM_H_ #define HL_LSTM_H_
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_MATRIX_H_ #ifndef HL_MATRIX_H_
#define HL_MATRIX_H_ #define HL_MATRIX_H_
...@@ -30,13 +29,8 @@ limitations under the License. */ ...@@ -30,13 +29,8 @@ limitations under the License. */
* @param[in] beta scalar used for addition. * @param[in] beta scalar used for addition.
* *
*/ */
extern void hl_matrix_add(real* A_d, extern void hl_matrix_add(
real* B_d, real* A_d, real* B_d, real* C_d, int dimM, int dimN, real alpha, real beta);
real* C_d,
int dimM,
int dimN,
real alpha,
real beta);
/** /**
* @brief Matrix Softmax. * @brief Matrix Softmax.
* *
...@@ -46,7 +40,7 @@ extern void hl_matrix_add(real* A_d, ...@@ -46,7 +40,7 @@ extern void hl_matrix_add(real* A_d,
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
* *
*/ */
extern void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN); extern void hl_matrix_softmax(real* A_d, real* C_d, int dimM, int dimN);
/** /**
* @brief Matrix softmax derivative. * @brief Matrix softmax derivative.
...@@ -58,11 +52,8 @@ extern void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN); ...@@ -58,11 +52,8 @@ extern void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN);
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
* *
*/ */
extern void hl_matrix_softmax_derivative(real* grad_d, extern void hl_matrix_softmax_derivative(
real* output_d, real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN);
real* sftmaxSum_d,
int dimM,
int dimN);
/** /**
* @brief Sequence softmax. * @brief Sequence softmax.
...@@ -73,8 +64,8 @@ extern void hl_matrix_softmax_derivative(real* grad_d, ...@@ -73,8 +64,8 @@ extern void hl_matrix_softmax_derivative(real* grad_d,
* @param[in] numSequence sequence number. * @param[in] numSequence sequence number.
* *
*/ */
extern void hl_sequence_softmax_forward(real *A_d, extern void hl_sequence_softmax_forward(real* A_d,
real *C_d, real* C_d,
const int* index, const int* index,
int numSequence); int numSequence);
...@@ -88,11 +79,8 @@ extern void hl_sequence_softmax_forward(real *A_d, ...@@ -88,11 +79,8 @@ extern void hl_sequence_softmax_forward(real *A_d,
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
* *
*/ */
extern void hl_matrix_classification_error(real* A_d, extern void hl_matrix_classification_error(
int* B_d, real* A_d, int* B_d, real* C_d, int dimM, int dimN);
real* C_d,
int dimM,
int dimN);
/** /**
* @brief Matrix cross entropy. * @brief Matrix cross entropy.
...@@ -104,11 +92,8 @@ extern void hl_matrix_classification_error(real* A_d, ...@@ -104,11 +92,8 @@ extern void hl_matrix_classification_error(real* A_d,
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
* *
*/ */
extern void hl_matrix_cross_entropy(real* A_d, extern void hl_matrix_cross_entropy(
real* C_d, real* A_d, real* C_d, int* label_d, int dimM, int dimN);
int* label_d,
int dimM,
int dimN);
/** /**
* @brief Matrix cross entropy back propagation. * @brief Matrix cross entropy back propagation.
...@@ -120,11 +105,8 @@ extern void hl_matrix_cross_entropy(real* A_d, ...@@ -120,11 +105,8 @@ extern void hl_matrix_cross_entropy(real* A_d,
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
* *
*/ */
extern void hl_matrix_cross_entropy_bp(real* grad_d, extern void hl_matrix_cross_entropy_bp(
real* output_d, real* grad_d, real* output_d, int* label_d, int dimM, int dimN);
int* label_d,
int dimM,
int dimN);
/** /**
* @brief Matrix multi-binary label cross entropy * @brief Matrix multi-binary label cross entropy
...@@ -135,11 +117,8 @@ extern void hl_matrix_cross_entropy_bp(real* grad_d, ...@@ -135,11 +117,8 @@ extern void hl_matrix_cross_entropy_bp(real* grad_d,
* @param[in] dimM matrix height. * @param[in] dimM matrix height.
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
*/ */
extern void hl_matrix_multi_binary_cross_entropy(real* output, extern void hl_matrix_multi_binary_cross_entropy(
real* entropy, real* output, real* entropy, hl_sparse_matrix_s mat, int dimM, int dimN);
hl_sparse_matrix_s mat,
int dimM,
int dimN);
/** /**
* @brief Matrix multi-binary label cross entropy backprop * @brief Matrix multi-binary label cross entropy backprop
...@@ -150,11 +129,8 @@ extern void hl_matrix_multi_binary_cross_entropy(real* output, ...@@ -150,11 +129,8 @@ extern void hl_matrix_multi_binary_cross_entropy(real* output,
* @param[in] dimM matrix height. * @param[in] dimM matrix height.
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
*/ */
extern void hl_matrix_multi_binary_cross_entropy_bp(real* output, extern void hl_matrix_multi_binary_cross_entropy_bp(
real* grad, real* output, real* grad, hl_sparse_matrix_s mat, int dimM, int dimN);
hl_sparse_matrix_s mat,
int dimM,
int dimN);
/** /**
* @brief Matrix zero memory. * @brief Matrix zero memory.
...@@ -176,12 +152,8 @@ extern void hl_matrix_zero_mem(real* data, int num); ...@@ -176,12 +152,8 @@ extern void hl_matrix_zero_mem(real* data, int num);
* @param[in] partial_sum * @param[in] partial_sum
*/ */
extern void hl_param_relu_forward(real* output, extern void hl_param_relu_forward(
real* input, real* output, real* input, real* w, int width, int height, int partial_sum);
real* w,
int width,
int height,
int partial_sum);
/** /**
* @brief parameter relu backward w * @brief parameter relu backward w
* *
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_SEQUENCE_H_ #ifndef HL_SEQUENCE_H_
#define HL_SEQUENCE_H_ #define HL_SEQUENCE_H_
...@@ -32,7 +31,7 @@ limitations under the License. */ ...@@ -32,7 +31,7 @@ limitations under the License. */
extern void hl_max_sequence_forward(real* input, extern void hl_max_sequence_forward(real* input,
const int* sequence, const int* sequence,
real* output, real* output,
int *index, int* index,
int numSequences, int numSequences,
int dim); int dim);
...@@ -46,11 +45,8 @@ extern void hl_max_sequence_forward(real* input, ...@@ -46,11 +45,8 @@ extern void hl_max_sequence_forward(real* input,
* @param[in] dim input dimension. * @param[in] dim input dimension.
* *
*/ */
extern void hl_max_sequence_backward(real* outputGrad, extern void hl_max_sequence_backward(
int *index, real* outputGrad, int* index, real* inputGrad, int numSequences, int dim);
real* inputGrad,
int numSequences,
int dim);
/** /**
* @brief Context projection forward. * @brief Context projection forward.
...@@ -63,7 +59,8 @@ extern void hl_max_sequence_backward(real* outputGrad, ...@@ -63,7 +59,8 @@ extern void hl_max_sequence_backward(real* outputGrad,
* @param[in] inputDim input sequence dimension. * @param[in] inputDim input sequence dimension.
* @param[in] contextLength context length. * @param[in] contextLength context length.
* @param[in] contextStart context start. * @param[in] contextStart context start.
* @param[in] beginPad number of extra timesteps added at the beginning. * @param[in] beginPad number of extra timesteps added at the
* beginning.
* @param[in] isPadding trainable padding. * @param[in] isPadding trainable padding.
* *
*/ */
...@@ -109,7 +106,8 @@ extern void hl_context_projection_backward_data(real* outputGrad, ...@@ -109,7 +106,8 @@ extern void hl_context_projection_backward_data(real* outputGrad,
* @param[in] totalPad number of extra timesteps. * @param[in] totalPad number of extra timesteps.
* @param[in] contextLength context length. * @param[in] contextLength context length.
* @param[in] contextStart context start. * @param[in] contextStart context start.
* @param[in] beginPad number of extra timesteps added at the beginning. * @param[in] beginPad number of extra timesteps added at the
* beginning.
* *
*/ */
extern void hl_context_projection_backward_weight(real* outputGrad, extern void hl_context_projection_backward_weight(real* outputGrad,
...@@ -141,9 +139,9 @@ extern void hl_context_projection_backward_weight(real* outputGrad, ...@@ -141,9 +139,9 @@ extern void hl_context_projection_backward_weight(real* outputGrad,
* @param[in] seq2batch copy direction. * @param[in] seq2batch copy direction.
* *
*/ */
extern void hl_sequence2batch_copy(real *batch, extern void hl_sequence2batch_copy(real* batch,
real *sequence, real* sequence,
const int *batchIndex, const int* batchIndex,
int seqWidth, int seqWidth,
int batchCount, int batchCount,
bool seq2batch); bool seq2batch);
...@@ -167,9 +165,9 @@ extern void hl_sequence2batch_copy(real *batch, ...@@ -167,9 +165,9 @@ extern void hl_sequence2batch_copy(real *batch,
* @param[in] seq2batch copy direction. * @param[in] seq2batch copy direction.
* *
*/ */
extern void hl_sequence2batch_add(real *batch, extern void hl_sequence2batch_add(real* batch,
real *sequence, real* sequence,
int *batchIndex, int* batchIndex,
int seqWidth, int seqWidth,
int batchCount, int batchCount,
bool seq2batch); bool seq2batch);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_SPARSE_H_ #ifndef HL_SPARSE_H_
#define HL_SPARSE_H_ #define HL_SPARSE_H_
...@@ -31,7 +30,7 @@ limitations under the License. */ ...@@ -31,7 +30,7 @@ limitations under the License. */
*/ */
extern void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, extern void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d,
hl_matrix_format_t format, hl_matrix_format_t format,
hl_matrix_value_t value_type, hl_matrix_value_t value_type,
int dimM, int dimM,
int dimN, int dimN,
int nnz); int nnz);
...@@ -60,10 +59,10 @@ extern void hl_free_sparse_matrix(hl_sparse_matrix_s A_d); ...@@ -60,10 +59,10 @@ extern void hl_free_sparse_matrix(hl_sparse_matrix_s A_d);
* *
*/ */
extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
void * dest_d, void *dest_d,
size_t size, size_t size,
hl_matrix_format_t format, hl_matrix_format_t format,
hl_matrix_value_t value_type, hl_matrix_value_t value_type,
int dimM, int dimM,
int dimN, int dimN,
int nnz); int nnz);
...@@ -94,11 +93,11 @@ extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, ...@@ -94,11 +93,11 @@ extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
* *
*/ */
extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
real* value_d, real *value_d,
int* rows_d, int *rows_d,
int* cols_d, int *cols_d,
hl_matrix_format_t format, hl_matrix_format_t format,
hl_matrix_value_t value_type, hl_matrix_value_t value_type,
int dimM, int dimM,
int dimN, int dimN,
int nnz); int nnz);
...@@ -259,10 +258,14 @@ extern void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d, ...@@ -259,10 +258,14 @@ extern void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d,
*/ */
extern void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d, extern void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d,
hl_trans_op_t transa, hl_trans_op_t transa,
real *B_d, hl_trans_op_t transb, real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta); int dimN,
int dimK,
real alpha,
real beta);
/** /**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
...@@ -311,11 +314,16 @@ extern void hl_matrix_dense_mul_csc(real *A_d, ...@@ -311,11 +314,16 @@ extern void hl_matrix_dense_mul_csc(real *A_d,
* @note transb is not support HPPL_OP_T. * @note transb is not support HPPL_OP_T.
* *
*/ */
extern void hl_sparse_matrix_mul(real* A_d, hl_trans_op_t transa, extern void hl_sparse_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
hl_sparse_matrix_s C_d, hl_sparse_matrix_s C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta); int dimN,
int dimK,
real alpha,
real beta);
/** /**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
...@@ -336,12 +344,16 @@ extern void hl_sparse_matrix_mul(real* A_d, hl_trans_op_t transa, ...@@ -336,12 +344,16 @@ extern void hl_sparse_matrix_mul(real* A_d, hl_trans_op_t transa,
* @note transa is not support HPPL_OP_T. * @note transa is not support HPPL_OP_T.
* *
*/ */
extern void hl_matrix_dense_mul_csr(real *A_d, hl_trans_op_t transa, extern void hl_matrix_dense_mul_csr(real *A_d,
hl_trans_op_t transa,
hl_sparse_matrix_s B_d, hl_sparse_matrix_s B_d,
hl_trans_op_t transb, hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta); int dimN,
int dimK,
real alpha,
real beta);
/** /**
* @brief Memcpy csc_matrix to host. * @brief Memcpy csc_matrix to host.
...@@ -412,7 +424,6 @@ extern void hl_memcpy_from_csr_matrix(real *csr_val, ...@@ -412,7 +424,6 @@ extern void hl_memcpy_from_csr_matrix(real *csr_val,
hl_sparse_matrix_s csr_matrix, hl_sparse_matrix_s csr_matrix,
hl_stream_t stream); hl_stream_t stream);
/** /**
* @brief A_d[j] += B_d[i,j] for i in range(height) * @brief A_d[j] += B_d[i,j] for i in range(height)
* *
...@@ -423,19 +434,13 @@ extern void hl_memcpy_from_csr_matrix(real *csr_val, ...@@ -423,19 +434,13 @@ extern void hl_memcpy_from_csr_matrix(real *csr_val,
* @param[in] scale scale of B_d * @param[in] scale scale of B_d
* *
*/ */
extern void hl_sparse_matrix_column_sum(real* A_d, extern void hl_sparse_matrix_column_sum(
hl_sparse_matrix_s B_d, real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale);
int dimM,
int dimN,
real scale);
/** /**
* @brief implementation of csr sparse matrix in hl_sparse_matirx_column_sum * @brief implementation of csr sparse matrix in hl_sparse_matirx_column_sum
*/ */
extern void hl_matrix_csr_column_sum(real* A_d, extern void hl_matrix_csr_column_sum(
hl_sparse_matrix_s B_d, real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale);
int dimM,
int dimN,
real scale);
/** /**
* @brief A_d[i,j] += B_d[j] * @brief A_d[i,j] += B_d[j]
...@@ -446,13 +451,13 @@ extern void hl_matrix_csr_column_sum(real* A_d, ...@@ -446,13 +451,13 @@ extern void hl_matrix_csr_column_sum(real* A_d,
* *
*/ */
extern void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d, extern void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
real scale); real scale);
/** /**
* @brief implementation of csr sparse matrix in hl_sparse_matrix_add_bias * @brief implementation of csr sparse matrix in hl_sparse_matrix_add_bias
*/ */
extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
real scale); real scale);
/** /**
...@@ -470,7 +475,7 @@ extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, ...@@ -470,7 +475,7 @@ extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d,
* *
*/ */
extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
int dimM, int dimM,
int dimN, int dimN,
real alpha, real alpha,
...@@ -479,7 +484,7 @@ extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, ...@@ -479,7 +484,7 @@ extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d,
* @brief implementation of csr sparse matrix in hl_sparse_matrix_add_dense * @brief implementation of csr sparse matrix in hl_sparse_matrix_add_dense
*/ */
extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
int dimM, int dimM,
int dimN, int dimN,
real alpha, real alpha,
...@@ -493,7 +498,7 @@ extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, ...@@ -493,7 +498,7 @@ extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d,
* @return return rows pointer, which is gpu address * @return return rows pointer, which is gpu address
* *
*/ */
extern int* hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat); extern int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat);
/** /**
* @brief get cols pionter of GpuSparseMatrix * @brief get cols pionter of GpuSparseMatrix
...@@ -503,7 +508,7 @@ extern int* hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat); ...@@ -503,7 +508,7 @@ extern int* hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat);
* @return return cols pointer, which is gpu address * @return return cols pointer, which is gpu address
* *
*/ */
extern int* hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat); extern int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat);
/** /**
* @brief get value pionter of GpuSparseMatrix * @brief get value pionter of GpuSparseMatrix
...@@ -513,7 +518,6 @@ extern int* hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat); ...@@ -513,7 +518,6 @@ extern int* hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat);
* @return return value pointer, which is gpu address * @return return value pointer, which is gpu address
* *
*/ */
extern real* hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat); extern real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat);
#endif /* HL_SPARSE_H_ */ #endif /* HL_SPARSE_H_ */
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_TABLE_APPLY_H_ #ifndef HL_TABLE_APPLY_H_
#define HL_TABLE_APPLY_H_ #define HL_TABLE_APPLY_H_
...@@ -31,8 +30,10 @@ limitations under the License. */ ...@@ -31,8 +30,10 @@ limitations under the License. */
* @param[in] dim width of table. * @param[in] dim width of table.
* *
*/ */
extern void hl_matrix_select_rows(real* output, int ldo, extern void hl_matrix_select_rows(real* output,
real* table, int ldt, int ldo,
real* table,
int ldt,
int* ids, int* ids,
int numSamples, int numSamples,
int tableSize, int tableSize,
...@@ -53,8 +54,10 @@ extern void hl_matrix_select_rows(real* output, int ldo, ...@@ -53,8 +54,10 @@ extern void hl_matrix_select_rows(real* output, int ldo,
* @param[in] dim width of table. * @param[in] dim width of table.
* *
*/ */
extern void hl_matrix_add_to_rows(real* table, int ldt, extern void hl_matrix_add_to_rows(real* table,
real* input, int ldi, int ldt,
real* input,
int ldi,
int* ids, int* ids,
int numSamples, int numSamples,
int tableSize, int tableSize,
...@@ -72,8 +75,7 @@ extern void hl_matrix_add_to_rows(real* table, int ldt, ...@@ -72,8 +75,7 @@ extern void hl_matrix_add_to_rows(real* table, int ldt,
* *
*/ */
template <class T> template <class T>
extern void hl_vector_select_from(T* dst, int sized, extern void hl_vector_select_from(
const T* src, int sizes, T* dst, int sized, const T* src, int sizes, const int* ids, int sizei);
const int* ids, int sizei);
#endif /* HL_TABLE_APPLY_H_ */ #endif /* HL_TABLE_APPLY_H_ */
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_TIME_H_ #ifndef HL_TIME_H_
#define HL_TIME_H_ #define HL_TIME_H_
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_TOP_K_H_ #ifndef HL_TOP_K_H_
#define HL_TOP_K_H_ #define HL_TOP_K_H_
...@@ -31,9 +30,11 @@ limitations under the License. */ ...@@ -31,9 +30,11 @@ limitations under the License. */
* @param[in] numSamples height of input value. * @param[in] numSamples height of input value.
* *
*/ */
extern void hl_matrix_top_k(real* topVal, int ldv, extern void hl_matrix_top_k(real* topVal,
int * topIds, int ldv,
real* src, int lds, int* topIds,
real* src,
int lds,
int dim, int dim,
int beamSize, int beamSize,
int numSamples); int numSamples);
...@@ -50,8 +51,9 @@ extern void hl_matrix_top_k(real* topVal, int ldv, ...@@ -50,8 +51,9 @@ extern void hl_matrix_top_k(real* topVal, int ldv,
* *
* @note Only support HL_SPARSE_CSR format. * @note Only support HL_SPARSE_CSR format.
*/ */
extern void hl_sparse_matrix_top_k(real* topVal, int ldv, extern void hl_sparse_matrix_top_k(real* topVal,
int * topIds, int ldv,
int* topIds,
hl_sparse_matrix_s src, hl_sparse_matrix_s src,
int beamSize, int beamSize,
int numSamples); int numSamples);
......
...@@ -12,29 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,29 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_AGGREGATE_STUB_H_ #ifndef HL_AGGREGATE_STUB_H_
#define HL_AGGREGATE_STUB_H_ #define HL_AGGREGATE_STUB_H_
#include "hl_aggregate.h" #include "hl_aggregate.h"
inline void hl_matrix_row_sum(real *A_d, real *C_d, inline void hl_matrix_row_sum(real *A_d, real *C_d, int dimM, int dimN) {}
int dimM, int dimN) {}
inline void hl_matrix_row_max(real *A_d, real *C_d, inline void hl_matrix_row_max(real *A_d, real *C_d, int dimM, int dimN) {}
int dimM, int dimN) {}
inline void hl_matrix_row_min(real *A_d, real *C_d, inline void hl_matrix_row_min(real *A_d, real *C_d, int dimM, int dimN) {}
int dimM, int dimN) {}
inline void hl_matrix_column_sum(real *A_d, real *C_d, inline void hl_matrix_column_sum(real *A_d, real *C_d, int dimM, int dimN) {}
int dimM, int dimN) {}
inline void hl_matrix_column_max(real *A_d, real *C_d, inline void hl_matrix_column_max(real *A_d, real *C_d, int dimM, int dimN) {}
int dimM, int dimN) {}
inline void hl_matrix_column_min(real *A_d, real *C_d, inline void hl_matrix_column_min(real *A_d, real *C_d, int dimM, int dimN) {}
int dimM, int dimN) {}
inline void hl_vector_sum(real *A_d, real *C_h, int dimM) {} inline void hl_vector_sum(real *A_d, real *C_h, int dimM) {}
......
...@@ -12,84 +12,134 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,84 +12,134 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CNN_STUB_H_ #ifndef HL_CNN_STUB_H_
#define HL_CNN_STUB_H_ #define HL_CNN_STUB_H_
#include "hl_cnn.h" #include "hl_cnn.h"
inline void hl_shrink_col2feature( inline void hl_shrink_col2feature(const real* dataCol,
const real * dataCol, size_t channels, size_t channels,
size_t height, size_t width, size_t height,
size_t blockH, size_t blockW, size_t width,
size_t strideH, size_t strideW, size_t blockH,
size_t paddingH, size_t paddingW, size_t blockW,
size_t outputH, size_t outputW, size_t strideH,
real* dataIm, size_t strideW,
real alpha, real beta) {} size_t paddingH,
size_t paddingW,
inline void hl_expand_feature2col( size_t outputH,
const real* dataIm, size_t channels, size_t outputW,
size_t height, size_t width, real* dataIm,
size_t blockH, size_t blockW, real alpha,
size_t strideH, size_t strideW, real beta) {}
size_t paddingH, size_t paddingW,
size_t outputH, size_t outputW, inline void hl_expand_feature2col(const real* dataIm,
real* dataCol) {} size_t channels,
size_t height,
inline void hl_maxpool_forward( size_t width,
const int frameCnt, const real* inputData, size_t blockH,
const int channels, size_t blockW,
const int height, const int width, size_t strideH,
const int pooledH, const int pooledW, size_t strideW,
const int sizeX, const int sizeY, size_t paddingH,
const int strideH, const int strideW, size_t paddingW,
const int paddingH, const int paddingW, size_t outputH,
real* tgtData, const int tgtStride) {} size_t outputW,
real* dataCol) {}
inline void hl_maxpool_backward(
const int frameCnt, const real* inputData, inline void hl_maxpool_forward(const int frameCnt,
const real* outData, const real* outGrad, const real* inputData,
const int channels, const int height, const int channels,
const int width, const int height,
const int pooledH, const int pooledW, const int width,
const int sizeX, const int sizeY, const int pooledH,
const int strideH, const int strideW, const int pooledW,
const int paddingH, const int paddingW, const int sizeX,
real scaleA, real scaleB, const int sizeY,
real* targetGrad, const int outStride) {} const int strideH,
const int strideW,
inline void hl_avgpool_forward( const int paddingH,
const int frameCnt, const real* inputData, const int paddingW,
const int channels, real* tgtData,
const int height, const int width, const int tgtStride) {}
const int pooledH, const int pooledW,
const int sizeX, const int sizeY, inline void hl_maxpool_backward(const int frameCnt,
const int strideH, const int strideW, const real* inputData,
const int paddingH, const int paddingW, const real* outData,
real* tgtData, const int tgtStride) {} const real* outGrad,
const int channels,
inline void hl_avgpool_backward( const int height,
const int frameCnt, const real* outGrad, const int width,
const int channels, const int height, const int pooledH,
const int width, const int pooledW,
const int pooledH, const int pooledW, const int sizeX,
const int sizeX, const int sizeY, const int sizeY,
const int strideH, const int strideW, const int strideH,
int paddingH, int paddingW, const int strideW,
real scaleA, real scaleB, const int paddingH,
real* backGrad, const int outStride) {} const int paddingW,
real scaleA,
inline void hl_CMRNorm_forward( real scaleB,
size_t frameCnt, const real* in, real* scale, real* out, real* targetGrad,
size_t channels, size_t height, size_t width, size_t sizeX, const int outStride) {}
real alpha, real beta) {}
inline void hl_avgpool_forward(const int frameCnt,
inline void hl_CMRNorm_backward( const real* inputData,
size_t frameCnt, const real* inV, const real* scale, const int channels,
const real* outV, const real* outDiff, real *inDiff, const int height,
size_t channels, size_t height, size_t width, size_t sizeX, const int width,
real alpha, real beta) {} const int pooledH,
const int pooledW,
const int sizeX,
const int sizeY,
const int strideH,
const int strideW,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride) {}
inline void hl_avgpool_backward(const int frameCnt,
const real* outGrad,
const int channels,
const int height,
const int width,
const int pooledH,
const int pooledW,
const int sizeX,
const int sizeY,
const int strideH,
const int strideW,
int paddingH,
int paddingW,
real scaleA,
real scaleB,
real* backGrad,
const int outStride) {}
inline void hl_CMRNorm_forward(size_t frameCnt,
const real* in,
real* scale,
real* out,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta) {}
inline void hl_CMRNorm_backward(size_t frameCnt,
const real* inV,
const real* scale,
const real* outV,
const real* outDiff,
real* inDiff,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta) {}
inline void hl_bilinear_forward(const real* inData, inline void hl_bilinear_forward(const real* inData,
const size_t inImgH, const size_t inImgH,
...@@ -106,25 +156,33 @@ inline void hl_bilinear_forward(const real* inData, ...@@ -106,25 +156,33 @@ inline void hl_bilinear_forward(const real* inData,
const real ratioW) {} const real ratioW) {}
inline void hl_bilinear_backward(real* inGrad, inline void hl_bilinear_backward(real* inGrad,
const size_t inImgH, const size_t inImgH,
const size_t inImgW, const size_t inImgW,
const size_t inputH, const size_t inputH,
const size_t inputW, const size_t inputW,
const real* outGrad, const real* outGrad,
const size_t outImgH, const size_t outImgH,
const size_t outImgW, const size_t outImgW,
const size_t outputH, const size_t outputH,
const size_t outputW, const size_t outputW,
const size_t numChannels, const size_t numChannels,
const real ratioH, const real ratioH,
const real ratioW) {} const real ratioW) {}
inline void hl_maxout_forward( inline void hl_maxout_forward(const real* inData,
const real* inData, real* outData, int* idData, real* outData,
size_t batchSize, size_t size, size_t featLen, size_t group) {} int* idData,
size_t batchSize,
inline void hl_maxout_backward( size_t size,
real* inGrad, const real* outGrad, const int* idData, size_t featLen,
size_t batchSize, size_t size, size_t featLen, size_t group) {} size_t group) {}
inline void hl_maxout_backward(real* inGrad,
const real* outGrad,
const int* idData,
size_t batchSize,
size_t size,
size_t featLen,
size_t group) {}
#endif // HL_CNN_STUB_H_ #endif // HL_CNN_STUB_H_
...@@ -12,41 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,41 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_CUBLAS_STUB_H_ #ifndef HL_CUDA_CUBLAS_STUB_H_
#define HL_CUDA_CUBLAS_STUB_H_ #define HL_CUDA_CUBLAS_STUB_H_
#include "hl_cuda_cublas.h" #include "hl_cuda_cublas.h"
inline void hl_matrix_transpose(real *A_d, inline void hl_matrix_transpose(
real *C_d, real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc) {}
int dimM,
int dimN, inline void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN) {}
int lda,
int ldc) {}
inline void hl_matrix_transpose(real *A_d,
real *C_d,
int dimM,
int dimN) {}
inline void hl_matrix_inverse(real *A_d,
real *C_d,
int dimN,
int lda,
int ldc) {}
inline void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
real *B_d, hl_trans_op_t transb,
real *C_d,
int dimM, int dimN, int dimK,
real alpha, real beta,
int lda, int ldb, int ldc) {}
inline void hl_matrix_mul(real *A_d, hl_trans_op_t transa, inline void hl_matrix_inverse(
real *B_d, hl_trans_op_t transb, real *A_d, real *C_d, int dimN, int lda, int ldc) {}
inline void hl_matrix_mul(real *A_d,
hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d,
int dimM,
int dimN,
int dimK,
real alpha,
real beta,
int lda,
int ldb,
int ldc) {}
inline void hl_matrix_mul(real *A_d,
hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta) {} int dimN,
int dimK,
real alpha,
real beta) {}
#endif // HL_CUDA_CUBLAS_STUB_H_ #endif // HL_CUDA_CUBLAS_STUB_H_
...@@ -12,15 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,15 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_CUDNN_STUB_H_ #ifndef HL_CUDA_CUDNN_STUB_H_
#define HL_CUDA_CUDNN_STUB_H_ #define HL_CUDA_CUDNN_STUB_H_
#include "hl_cuda_cudnn.h" #include "hl_cuda_cudnn.h"
inline int hl_get_cudnn_lib_version() { inline int hl_get_cudnn_lib_version() { return 0; }
return 0;
}
inline void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) {} inline void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) {}
...@@ -68,41 +65,41 @@ inline void hl_pooling_backward(hl_tensor_descriptor input, ...@@ -68,41 +65,41 @@ inline void hl_pooling_backward(hl_tensor_descriptor input,
hl_pooling_descriptor pooling) {} hl_pooling_descriptor pooling) {}
inline void hl_create_filter_descriptor(hl_filter_descriptor* filter, inline void hl_create_filter_descriptor(hl_filter_descriptor* filter,
int input_feature_maps, int input_feature_maps,
int output_feature_maps, int output_feature_maps,
int height, int height,
int width) {} int width) {}
inline void hl_destroy_filter_descriptor(hl_filter_descriptor filter) {} inline void hl_destroy_filter_descriptor(hl_filter_descriptor filter) {}
inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
hl_tensor_descriptor image, hl_tensor_descriptor image,
hl_filter_descriptor filter, hl_filter_descriptor filter,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width) {} int stride_width) {}
inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
hl_tensor_descriptor image, hl_tensor_descriptor image,
hl_filter_descriptor filter, hl_filter_descriptor filter,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width) {} int stride_width) {}
inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {} inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {}
inline void hl_conv_workspace(hl_tensor_descriptor input, inline void hl_conv_workspace(hl_tensor_descriptor input,
hl_tensor_descriptor output, hl_tensor_descriptor output,
hl_filter_descriptor filter, hl_filter_descriptor filter,
hl_convolution_descriptor conv, hl_convolution_descriptor conv,
int* convFwdAlgo, int* convFwdAlgo,
size_t* fwdLimitBytes, size_t* fwdLimitBytes,
int* convBwdDataAlgo, int* convBwdDataAlgo,
size_t* bwdDataLimitBytes, size_t* bwdDataLimitBytes,
int* convBwdFilterAlgo, int* convBwdFilterAlgo,
size_t* bwdFilterLimitBytes) {} size_t* bwdFilterLimitBytes) {}
inline void hl_convolution_forward(hl_tensor_descriptor input, inline void hl_convolution_forward(hl_tensor_descriptor input,
real* input_data, real* input_data,
...@@ -116,86 +113,84 @@ inline void hl_convolution_forward(hl_tensor_descriptor input, ...@@ -116,86 +113,84 @@ inline void hl_convolution_forward(hl_tensor_descriptor input,
int convFwdAlgo) {} int convFwdAlgo) {}
inline void hl_convolution_forward_add_bias(hl_tensor_descriptor bias, inline void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
real* bias_data, real* bias_data,
hl_tensor_descriptor output, hl_tensor_descriptor output,
real* output_data) {} real* output_data) {}
inline void hl_convolution_backward_filter( inline void hl_convolution_backward_filter(hl_tensor_descriptor input,
hl_tensor_descriptor input, real* input_data,
real* input_data, hl_tensor_descriptor output,
hl_tensor_descriptor output, real* output_grad_data,
real* output_grad_data, hl_filter_descriptor filter,
hl_filter_descriptor filter, real* filter_grad_data,
real* filter_grad_data, hl_convolution_descriptor conv,
hl_convolution_descriptor conv, void* gpuWorkSpace,
void* gpuWorkSpace, size_t sizeInBytes,
size_t sizeInBytes, int convBwdFilterAlgo) {}
int convBwdFilterAlgo) {}
inline void hl_convolution_backward_data(hl_tensor_descriptor input,
inline void hl_convolution_backward_data( real* input_data_grad,
hl_tensor_descriptor input, hl_tensor_descriptor output,
real* input_data_grad, real* output_grad_data,
hl_tensor_descriptor output, hl_filter_descriptor filter,
real* output_grad_data, real* filter_data,
hl_filter_descriptor filter, hl_convolution_descriptor conv,
real* filter_data, void* gpuWorkSpace,
hl_convolution_descriptor conv, size_t sizeInBytes,
void* gpuWorkSpace, int convBwdDataAlgo) {}
size_t sizeInBytes,
int convBwdDataAlgo) {}
inline void hl_convolution_backward_bias(hl_tensor_descriptor bias, inline void hl_convolution_backward_bias(hl_tensor_descriptor bias,
real* bias_grad_data, real* bias_grad_data,
hl_tensor_descriptor output, hl_tensor_descriptor output,
real* output_grad_data) {} real* output_grad_data) {}
inline void hl_softmax_forward(real *input, inline void hl_softmax_forward(real* input,
real *output, real* output,
int height,
int width) {}
inline void hl_softmax_backward(real *output_value,
real *output_grad,
int height, int height,
int width) {} int width) {}
inline void hl_softmax_backward(real* output_value,
real* output_grad,
int height,
int width) {}
inline void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, inline void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outputDesc, hl_tensor_descriptor outputDesc,
real *output, real* output,
hl_tensor_descriptor bnParamDesc, hl_tensor_descriptor bnParamDesc,
real *scale, real* scale,
real *bias, real* bias,
double factor, double factor,
real *runningMean, real* runningMean,
real *runningInvVar, real* runningInvVar,
double epsilon, double epsilon,
real *savedMean, real* savedMean,
real *savedVar) {} real* savedVar) {}
inline void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, inline void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outputDesc, hl_tensor_descriptor outputDesc,
real *output, real* output,
hl_tensor_descriptor bnParamDesc, hl_tensor_descriptor bnParamDesc,
real *scale, real* scale,
real *bias, real* bias,
real *estimatedMean, real* estimatedMean,
real *estimatedVar, real* estimatedVar,
double epsilon) {} double epsilon) {}
inline void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, inline void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outGradDesc, hl_tensor_descriptor outGradDesc,
real *outGrad, real* outGrad,
hl_tensor_descriptor inGradDesc, hl_tensor_descriptor inGradDesc,
real *inGrad, real* inGrad,
hl_tensor_descriptor dBnParamDesc, hl_tensor_descriptor dBnParamDesc,
real *scale, real* scale,
real *scaleGrad, real* scaleGrad,
real *biasGrad, real* biasGrad,
double epsilon, double epsilon,
real *savedMean, real* savedMean,
real *savedInvVar) {} real* savedInvVar) {}
#endif // HL_CUDA_CUDNN_STUB_H_ #endif // HL_CUDA_CUDNN_STUB_H_
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_STUB_H_ #ifndef HL_CUDA_STUB_H_
#define HL_CUDA_STUB_H_ #define HL_CUDA_STUB_H_
...@@ -24,29 +23,25 @@ inline void hl_specify_devices_start(int *device, int number) {} ...@@ -24,29 +23,25 @@ inline void hl_specify_devices_start(int *device, int number) {}
inline void hl_init(int device) {} inline void hl_init(int device) {}
inline int hl_get_cuda_lib_version(int device) { inline int hl_get_cuda_lib_version(int device) { return 0; }
return 0;
}
inline void hl_fini() {} inline void hl_fini() {}
inline void hl_set_sync_flag(bool flag) {} inline void hl_set_sync_flag(bool flag) {}
inline bool hl_get_sync_flag() { inline bool hl_get_sync_flag() { return false; }
return false;
}
inline int hl_get_device_count() { return 0; } inline int hl_get_device_count() { return 0; }
inline void hl_set_device(int device) {} inline void hl_set_device(int device) {}
inline int hl_get_device() { return 0; } inline int hl_get_device() { return 0; }
inline void* hl_malloc_device(size_t size) { return NULL; } inline void *hl_malloc_device(size_t size) { return NULL; }
inline void hl_free_mem_device(void *dest_d) {} inline void hl_free_mem_device(void *dest_d) {}
inline void* hl_malloc_host(size_t size) { return NULL; } inline void *hl_malloc_host(size_t size) { return NULL; }
inline void hl_free_mem_host(void *dest_h) {} inline void hl_free_mem_host(void *dest_h) {}
...@@ -64,7 +59,9 @@ inline void hl_rand(real *dest_d, size_t num) {} ...@@ -64,7 +59,9 @@ inline void hl_rand(real *dest_d, size_t num) {}
inline void hl_srand(unsigned int seed) {} inline void hl_srand(unsigned int seed) {}
inline void hl_memcpy_async(void *dst, void *src, size_t size, inline void hl_memcpy_async(void *dst,
void *src,
size_t size,
hl_stream_t stream) {} hl_stream_t stream) {}
inline void hl_stream_synchronize(hl_stream_t stream) {} inline void hl_stream_synchronize(hl_stream_t stream) {}
...@@ -83,11 +80,11 @@ inline void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {} ...@@ -83,11 +80,11 @@ inline void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {}
inline void hl_event_synchronize(hl_event_t event) {} inline void hl_event_synchronize(hl_event_t event) {}
inline int hl_get_device_last_error() { return 0; } inline int hl_get_device_last_error() { return 0; }
inline const char* hl_get_device_error_string() { return NULL; } inline const char *hl_get_device_error_string() { return NULL; }
inline const char* hl_get_device_error_string(size_t err) { return NULL; } inline const char *hl_get_device_error_string(size_t err) { return NULL; }
inline bool hl_cuda_event_is_ready(hl_event_t event) { return true; } inline bool hl_cuda_event_is_ready(hl_event_t event) { return true; }
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_LSTM_STUB_H_ #ifndef HL_LSTM_STUB_H_
#define HL_LSTM_STUB_H_ #define HL_LSTM_STUB_H_
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_MATRIX_STUB_H_ #ifndef HL_MATRIX_STUB_H_
#define HL_MATRIX_STUB_H_ #define HL_MATRIX_STUB_H_
...@@ -26,48 +25,30 @@ inline void hl_matrix_add(real* A_d, ...@@ -26,48 +25,30 @@ inline void hl_matrix_add(real* A_d,
real alpha, real alpha,
real beta) {} real beta) {}
inline void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN) {} inline void hl_matrix_softmax(real* A_d, real* C_d, int dimM, int dimN) {}
inline void hl_sequence_softmax_forward(real *A_d, inline void hl_sequence_softmax_forward(real* A_d,
real *C_d, real* C_d,
const int* index, const int* index,
int numSequence) {} int numSequence) {}
inline void hl_matrix_softmax_derivative(real* grad_d, inline void hl_matrix_softmax_derivative(
real* output_d, real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN) {}
real* sftmaxSum_d,
int dimM, inline void hl_matrix_classification_error(
int dimN) {} real* A_d, int* B_d, real* C_d, int dimM, int dimN) {}
inline void hl_matrix_classification_error(real* A_d, inline void hl_matrix_cross_entropy(
int* B_d, real* A_d, real* C_d, int* label_d, int dimM, int dimN) {}
real* C_d,
int dimM, inline void hl_matrix_cross_entropy_bp(
int dimN) {} real* grad_d, real* output_d, int* label_d, int dimM, int dimN) {}
inline void hl_matrix_cross_entropy(real* A_d, inline void hl_matrix_multi_binary_cross_entropy(
real* C_d, real* output, real* entropy, hl_sparse_matrix_s mat, int dimM, int dimN) {}
int* label_d,
int dimM, inline void hl_matrix_multi_binary_cross_entropy_bp(
int dimN) {} real* output, real* grad, hl_sparse_matrix_s mat, int dimM, int dimN) {}
inline void hl_matrix_cross_entropy_bp(real* grad_d,
real* output_d,
int* label_d,
int dimM,
int dimN) {}
inline void hl_matrix_multi_binary_cross_entropy(real* output,
real* entropy,
hl_sparse_matrix_s mat,
int dimM,
int dimN) {}
inline void hl_matrix_multi_binary_cross_entropy_bp(real* output,
real* grad,
hl_sparse_matrix_s mat,
int dimM,
int dimN) {}
inline void hl_matrix_zero_mem(real* data, int num) {} inline void hl_matrix_zero_mem(real* data, int num) {}
...@@ -101,7 +82,6 @@ inline void hl_cossim(real* output, ...@@ -101,7 +82,6 @@ inline void hl_cossim(real* output,
int input2_height, int input2_height,
real scale) {} real scale) {}
inline void hl_cossim_derivative(real* grad, inline void hl_cossim_derivative(real* grad,
real* output, real* output,
real* prevOutX, real* prevOutX,
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_SEQUENCE_STUB_H_ #ifndef HL_SEQUENCE_STUB_H_
#define HL_SEQUENCE_STUB_H_ #define HL_SEQUENCE_STUB_H_
...@@ -21,15 +20,12 @@ limitations under the License. */ ...@@ -21,15 +20,12 @@ limitations under the License. */
inline void hl_max_sequence_forward(real* input, inline void hl_max_sequence_forward(real* input,
const int* sequence, const int* sequence,
real* output, real* output,
int *index, int* index,
int numSequences, int numSequences,
int dim) {} int dim) {}
inline void hl_max_sequence_backward(real* outputGrad, inline void hl_max_sequence_backward(
int *index, real* outputGrad, int* index, real* inputGrad, int numSequences, int dim) {}
real* inputGrad,
int numSequences,
int dim) {}
inline void hl_context_projection_forward(real* input, inline void hl_context_projection_forward(real* input,
const int* sequence, const int* sequence,
...@@ -60,16 +56,16 @@ inline void hl_context_projection_backward_weight(real* outputGrad, ...@@ -60,16 +56,16 @@ inline void hl_context_projection_backward_weight(real* outputGrad,
int contextStart, int contextStart,
int beginPad) {} int beginPad) {}
inline void hl_sequence2batch_copy(real *batch, inline void hl_sequence2batch_copy(real* batch,
real *sequence, real* sequence,
const int *batchIndex, const int* batchIndex,
int seqWidth, int seqWidth,
int batchCount, int batchCount,
bool seq2batch) {} bool seq2batch) {}
inline void hl_sequence2batch_add(real *batch, inline void hl_sequence2batch_add(real* batch,
real *sequence, real* sequence,
int *batchIndex, int* batchIndex,
int seqWidth, int seqWidth,
int batchCount, int batchCount,
bool seq2batch) {} bool seq2batch) {}
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_SPARSE_STUB_H_ #ifndef HL_SPARSE_STUB_H_
#define HL_SPARSE_STUB_H_ #define HL_SPARSE_STUB_H_
...@@ -20,7 +19,7 @@ limitations under the License. */ ...@@ -20,7 +19,7 @@ limitations under the License. */
inline void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, inline void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d,
hl_matrix_format_t format, hl_matrix_format_t format,
hl_matrix_value_t value_type, hl_matrix_value_t value_type,
int dimM, int dimM,
int dimN, int dimN,
int nnz) {} int nnz) {}
...@@ -28,20 +27,20 @@ inline void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, ...@@ -28,20 +27,20 @@ inline void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d,
inline void hl_free_sparse_matrix(hl_sparse_matrix_s A_d) {} inline void hl_free_sparse_matrix(hl_sparse_matrix_s A_d) {}
inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
void * dest_d, void *dest_d,
size_t size, size_t size,
hl_matrix_format_t format, hl_matrix_format_t format,
hl_matrix_value_t value_type, hl_matrix_value_t value_type,
int dimM, int dimM,
int dimN, int dimN,
int nnz) {} int nnz) {}
inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
real* value_d, real *value_d,
int* rows_d, int *rows_d,
int* cols_d, int *cols_d,
hl_matrix_format_t format, hl_matrix_format_t format,
hl_matrix_value_t value_type, hl_matrix_value_t value_type,
int dimM, int dimM,
int dimN, int dimN,
int nnz) {} int nnz) {}
...@@ -87,10 +86,14 @@ inline void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d, ...@@ -87,10 +86,14 @@ inline void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d,
inline void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d, inline void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d,
hl_trans_op_t transa, hl_trans_op_t transa,
real *B_d, hl_trans_op_t transb, real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta) {} int dimN,
int dimK,
real alpha,
real beta) {}
inline void hl_matrix_dense_mul_csc(real *A_d, inline void hl_matrix_dense_mul_csc(real *A_d,
hl_trans_op_t transa, hl_trans_op_t transa,
...@@ -103,18 +106,27 @@ inline void hl_matrix_dense_mul_csc(real *A_d, ...@@ -103,18 +106,27 @@ inline void hl_matrix_dense_mul_csc(real *A_d,
real alpha, real alpha,
real beta) {} real beta) {}
inline void hl_sparse_matrix_mul(real* A_d, hl_trans_op_t transa, inline void hl_sparse_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
hl_sparse_matrix_s C_d, hl_sparse_matrix_s C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta) {} int dimN,
int dimK,
real alpha,
real beta) {}
inline void hl_matrix_dense_mul_csr(real *A_d, hl_trans_op_t transa, inline void hl_matrix_dense_mul_csr(real *A_d,
hl_trans_op_t transa,
hl_sparse_matrix_s B_d, hl_sparse_matrix_s B_d,
hl_trans_op_t transb, hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta) {} int dimN,
int dimK,
real alpha,
real beta) {}
inline void hl_memcpy_from_csc_matrix(real *csc_val, inline void hl_memcpy_from_csc_matrix(real *csc_val,
size_t val_size, size_t val_size,
...@@ -134,49 +146,39 @@ inline void hl_memcpy_from_csr_matrix(real *csr_val, ...@@ -134,49 +146,39 @@ inline void hl_memcpy_from_csr_matrix(real *csr_val,
hl_sparse_matrix_s csr_matrix, hl_sparse_matrix_s csr_matrix,
hl_stream_t stream) {} hl_stream_t stream) {}
inline void hl_sparse_matrix_column_sum(real* A_d, inline void hl_sparse_matrix_column_sum(
hl_sparse_matrix_s B_d, real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) {}
int dimM,
int dimN,
real scale) {}
inline void hl_matrix_csr_column_sum(real* A_d, inline void hl_matrix_csr_column_sum(
hl_sparse_matrix_s B_d, real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) {}
int dimM,
int dimN,
real scale) {}
inline void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d, inline void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
real scale) {} real scale) {}
inline void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, inline void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
real scale) {} real scale) {}
inline void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, inline void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
int dimM, int dimM,
int dimN, int dimN,
real alpha, real alpha,
real beta) {} real beta) {}
inline void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, inline void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d,
real* B_d, real *B_d,
int dimM, int dimM,
int dimN, int dimN,
real alpha, real alpha,
real beta) {} real beta) {}
inline int* hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat) { inline int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat) { return NULL; }
return NULL;
}
inline int* hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat) { inline int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat) { return NULL; }
return NULL;
}
inline real* hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat) { inline real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat) {
return NULL; return NULL;
} }
......
此差异已折叠。
...@@ -12,62 +12,58 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,62 +12,58 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <immintrin.h> #include <immintrin.h>
#include "hl_functions.h" #include "hl_functions.h"
namespace hppl { namespace hppl {
extern __m256 exp(__m256 a); extern __m256 exp(__m256 a);
__m256 relu(const __m256 a) { __m256 relu(const __m256 a) {
__m256 tmp = _mm256_set1_ps(0.0f); __m256 tmp = _mm256_set1_ps(0.0f);
return _mm256_max_ps(a, tmp); return _mm256_max_ps(a, tmp);
} }
__m256 sigmoid(const __m256 a) { __m256 sigmoid(const __m256 a) {
__m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX); __m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX);
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); __m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN);
__m256 tmp = _mm256_max_ps(a, min); __m256 tmp = _mm256_max_ps(a, min);
tmp = _mm256_min_ps(tmp, max); tmp = _mm256_min_ps(tmp, max);
tmp = _mm256_sub_ps(_mm256_set1_ps(0.0f), tmp); tmp = _mm256_sub_ps(_mm256_set1_ps(0.0f), tmp);
tmp = exp(tmp); tmp = exp(tmp);
tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp); tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp);
tmp = _mm256_div_ps(_mm256_set1_ps(1.0f), tmp); tmp = _mm256_div_ps(_mm256_set1_ps(1.0f), tmp);
return tmp; return tmp;
} }
__m256 tanh(const __m256 a) { __m256 tanh(const __m256 a) {
__m256 max = _mm256_set1_ps(EXP_MAX_INPUT); __m256 max = _mm256_set1_ps(EXP_MAX_INPUT);
__m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a); __m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a);
tmp = _mm256_min_ps(tmp, max); tmp = _mm256_min_ps(tmp, max);
tmp = exp(tmp); tmp = exp(tmp);
return _mm256_sub_ps( return _mm256_sub_ps(_mm256_div_ps(_mm256_set1_ps(2.0f),
_mm256_div_ps(_mm256_set1_ps(2.0f), _mm256_add_ps(_mm256_set1_ps(1.0f), tmp)),
_mm256_add_ps(_mm256_set1_ps(1.0f), tmp)), _mm256_set1_ps(1.0f)); _mm256_set1_ps(1.0f));
} }
__m256 linear(const __m256 a) { __m256 linear(const __m256 a) { return a; }
return a;
}
__m256 relu(const __m256 a, const __m256 b) { __m256 relu(const __m256 a, const __m256 b) {
return _mm256_mul_ps(a, return _mm256_mul_ps(
a,
_mm256_and_ps(_mm256_cmp_ps(b, _mm256_set1_ps(0.0f), _CMP_GT_OS), _mm256_and_ps(_mm256_cmp_ps(b, _mm256_set1_ps(0.0f), _CMP_GT_OS),
_mm256_set1_ps(1.0f))); _mm256_set1_ps(1.0f)));
} }
__m256 sigmoid(const __m256 a, const __m256 b) { __m256 sigmoid(const __m256 a, const __m256 b) {
return _mm256_mul_ps(_mm256_mul_ps(a, b), return _mm256_mul_ps(_mm256_mul_ps(a, b),
_mm256_sub_ps(_mm256_set1_ps(1.0f), b)); _mm256_sub_ps(_mm256_set1_ps(1.0f), b));
} }
__m256 tanh(const __m256 a, const __m256 b) { __m256 tanh(const __m256 a, const __m256 b) {
return _mm256_mul_ps(a, return _mm256_mul_ps(
_mm256_sub_ps(_mm256_set1_ps(1.0f), _mm256_mul_ps(b, b))); a, _mm256_sub_ps(_mm256_set1_ps(1.0f), _mm256_mul_ps(b, b)));
} }
__m256 linear(const __m256 a, const __m256 b) { __m256 linear(const __m256 a, const __m256 b) { return a; }
return a;
}
} // namespace hppl } // namespace hppl
...@@ -12,46 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,46 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <math.h> #include <math.h>
#include "hl_functions.h" #include "hl_functions.h"
namespace hppl { namespace hppl {
real relu(const real a) { real relu(const real a) { return a > 0.0f ? a : 0.0f; }
return a > 0.0f ? a : 0.0f;
} real sigmoid(const real a) {
const real min = SIGMOID_THRESHOLD_MIN;
real sigmoid(const real a) { const real max = SIGMOID_THRESHOLD_MAX;
const real min = SIGMOID_THRESHOLD_MIN; real tmp = (a < min) ? min : ((a > max) ? max : a);
const real max = SIGMOID_THRESHOLD_MAX; return 1.0 / (1.0 + exp(-tmp));
real tmp = (a < min) ? min : ((a > max) ? max : a); }
return 1.0 / (1.0 + exp(-tmp));
} real tanh(const real a) {
real tmp = -2.0 * a;
real tanh(const real a) { tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
real tmp = -2.0 * a; return (2.0 / (1.0 + exp(tmp))) - 1.0;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; }
return (2.0 / (1.0 + exp(tmp))) - 1.0;
} real linear(const real a) { return a; }
real linear(const real a) { real relu(const real a, const real b) { return a * (b > 0.0f ? 1.0f : 0.0f); }
return a;
} real sigmoid(const real a, const real b) { return a * b * (1 - b); }
real relu(const real a, const real b) { real tanh(const real a, const real b) { return a * (1.0f - b * b); }
return a * (b > 0.0f ? 1.0f : 0.0f);
} real linear(const real a, const real b) { return a; }
real sigmoid(const real a, const real b) {
return a * b * (1 - b);
}
real tanh(const real a, const real b) {
return a * (1.0f - b * b);
}
real linear(const real a, const real b) {
return a;
}
} // namespace hppl } // namespace hppl
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <sys/time.h> #include <sys/time.h>
#include <mutex> #include <mutex>
#include "hl_cuda.h" #include "hl_cuda.h"
...@@ -24,7 +23,7 @@ limitations under the License. */ ...@@ -24,7 +23,7 @@ limitations under the License. */
namespace dynload { namespace dynload {
std::once_flag cublas_dso_flag; std::once_flag cublas_dso_flag;
void* cublas_dso_handle = nullptr; void *cublas_dso_handle = nullptr;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
...@@ -34,38 +33,32 @@ void* cublas_dso_handle = nullptr; ...@@ -34,38 +33,32 @@ void* cublas_dso_handle = nullptr;
* note: default dynamic linked libs * note: default dynamic linked libs
*/ */
#ifdef PADDLE_USE_DSO #ifdef PADDLE_USE_DSO
#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ #define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
cublasStatus_t operator()(Args... args) { \ cublasStatus_t operator()(Args... args) { \
typedef cublasStatus_t (*cublasFunc)(Args...); \ typedef cublasStatus_t (*cublasFunc)(Args...); \
std::call_once(cublas_dso_flag, GetCublasDsoHandle, \ std::call_once(cublas_dso_flag, GetCublasDsoHandle, &cublas_dso_handle); \
&cublas_dso_handle); \ void *p_##__name = dlsym(cublas_dso_handle, #__name); \
void* p_##__name = dlsym(cublas_dso_handle, #__name); \ return reinterpret_cast<cublasFunc>(p_##__name)(args...); \
return reinterpret_cast<cublasFunc>(p_##__name)(args...); \ } \
} \
} __name; // struct DynLoad__##__name } __name; // struct DynLoad__##__name
#else #else
#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ #define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
cublasStatus_t operator()(Args... args) { \ cublasStatus_t operator()(Args... args) { \
return __name(args...); \ return __name(args...); \
} \ } \
} __name; // struct DynLoad__##__name } __name; // struct DynLoad__##__name
#endif #endif
#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) \ #define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name)
DYNAMIC_LOAD_CUBLAS_WRAP(__name)
// include all needed cublas functions in HPPL // include all needed cublas functions in HPPL
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ #define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasSgemv) \ __macro(cublasSgemv) __macro(cublasDgemv) __macro(cublasSgemm) \
__macro(cublasDgemv) \ __macro(cublasDgemm) __macro(cublasSgeam) __macro(cublasDgeam)
__macro(cublasSgemm) \
__macro(cublasDgemm) \
__macro(cublasSgeam) \
__macro(cublasDgeam) \
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate) DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate)
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy) DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy)
...@@ -88,41 +81,40 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) ...@@ -88,41 +81,40 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
} /* namespace dynload */ } /* namespace dynload */
#ifndef PADDLE_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
#define CUBLAS_GEAM dynload::cublasSgeam #define CUBLAS_GEAM dynload::cublasSgeam
#define CUBLAS_GEMV dynload::cublasSgemv #define CUBLAS_GEMV dynload::cublasSgemv
#define CUBLAS_GEMM dynload::cublasSgemm #define CUBLAS_GEMM dynload::cublasSgemm
#define CUBLAS_GETRF dynload::cublasSgetrfBatched #define CUBLAS_GETRF dynload::cublasSgetrfBatched
#define CUBLAS_GETRI dynload::cublasSgetriBatched #define CUBLAS_GETRI dynload::cublasSgetriBatched
#else #else
#define CUBLAS_GEAM dynload::cublasDgeam #define CUBLAS_GEAM dynload::cublasDgeam
#define CUBLAS_GEMV dynload::cublasDgemv #define CUBLAS_GEMV dynload::cublasDgemv
#define CUBLAS_GEMM dynload::cublasDgemm #define CUBLAS_GEMM dynload::cublasDgemm
#define CUBLAS_GETRF dynload::cublasDgetrfBatched #define CUBLAS_GETRF dynload::cublasDgetrfBatched
#define CUBLAS_GETRI dynload::cublasDgetriBatched #define CUBLAS_GETRI dynload::cublasDgetriBatched
#endif #endif
const char* hl_cublas_get_error_string(cublasStatus_t status) { const char *hl_cublas_get_error_string(cublasStatus_t status) {
switch (status) { switch (status) {
case CUBLAS_STATUS_NOT_INITIALIZED: case CUBLAS_STATUS_NOT_INITIALIZED:
return "[cublas status]: not initialized"; return "[cublas status]: not initialized";
case CUBLAS_STATUS_ALLOC_FAILED: case CUBLAS_STATUS_ALLOC_FAILED:
return "[cublas status]: allocate failed"; return "[cublas status]: allocate failed";
case CUBLAS_STATUS_INVALID_VALUE: case CUBLAS_STATUS_INVALID_VALUE:
return "[cublas status]: invalid value"; return "[cublas status]: invalid value";
case CUBLAS_STATUS_ARCH_MISMATCH: case CUBLAS_STATUS_ARCH_MISMATCH:
return "[cublas status]: arch mismatch"; return "[cublas status]: arch mismatch";
case CUBLAS_STATUS_MAPPING_ERROR: case CUBLAS_STATUS_MAPPING_ERROR:
return "[cublas status]: mapping error"; return "[cublas status]: mapping error";
case CUBLAS_STATUS_EXECUTION_FAILED: case CUBLAS_STATUS_EXECUTION_FAILED:
return "[cublas status]: execution failed"; return "[cublas status]: execution failed";
case CUBLAS_STATUS_INTERNAL_ERROR: case CUBLAS_STATUS_INTERNAL_ERROR:
return "[cublas status]: internal error"; return "[cublas status]: internal error";
case CUBLAS_STATUS_SUCCESS: case CUBLAS_STATUS_SUCCESS:
return "[cublas status]: success"; return "[cublas status]: success";
default: default:
return "[cublas status]: unknown error"; return "[cublas status]: unknown error";
} }
} }
...@@ -131,27 +123,21 @@ const char* hl_cublas_get_error_string(cublasStatus_t status) { ...@@ -131,27 +123,21 @@ const char* hl_cublas_get_error_string(cublasStatus_t status) {
* support << operator for more details error info. * support << operator for more details error info.
*/ */
cublasStatus_t g_cublasStat; cublasStatus_t g_cublasStat;
#define CHECK_CUBLAS(cublas_func) \ #define CHECK_CUBLAS(cublas_func) \
g_cublasStat = cublas_func; \ g_cublasStat = cublas_func; \
CHECK_EQ(CUBLAS_STATUS_SUCCESS, g_cublasStat) \ CHECK_EQ(CUBLAS_STATUS_SUCCESS, g_cublasStat) \
<< "Cublas Error: " \ << "Cublas Error: " << hl_cublas_get_error_string(g_cublasStat) << " "
<< hl_cublas_get_error_string(g_cublasStat) \
<< " "
void hl_cublas_init(cublasHandle_t *cublas_handle, cudaStream_t stream) { void hl_cublas_init(cublasHandle_t *cublas_handle, cudaStream_t stream) {
CHECK_CUBLAS(dynload::cublasCreate(cublas_handle)) CHECK_CUBLAS(dynload::cublasCreate(cublas_handle))
<< "[cublas init] Cublas create handle faild!"; << "[cublas init] Cublas create handle faild!";
CHECK_CUBLAS(dynload::cublasSetStream(*cublas_handle, stream)) CHECK_CUBLAS(dynload::cublasSetStream(*cublas_handle, stream))
<< "[cublas init] Cublas set stream faild!"; << "[cublas init] Cublas set stream faild!";
} }
void hl_matrix_transpose(real *A_d, void hl_matrix_transpose(
real *C_d, real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc) {
int dimM,
int dimN,
int lda,
int ldc) {
real alpha = 1.0; real alpha = 1.0;
real beta = 0.0; real beta = 0.0;
...@@ -159,11 +145,18 @@ void hl_matrix_transpose(real *A_d, ...@@ -159,11 +145,18 @@ void hl_matrix_transpose(real *A_d,
CHECK_NOTNULL(C_d); CHECK_NOTNULL(C_d);
CHECK_CUBLAS(CUBLAS_GEAM(t_resource.handle, CHECK_CUBLAS(CUBLAS_GEAM(t_resource.handle,
CUBLAS_OP_T, CUBLAS_OP_N, CUBLAS_OP_T,
dimM, dimN, CUBLAS_OP_N,
&alpha, A_d, lda, dimM,
&beta, nullptr, dimM, dimN,
C_d, ldc)); &alpha,
A_d,
lda,
&beta,
nullptr,
dimM,
C_d,
ldc));
CHECK_SYNC("hl_matrix_transpose failed"); CHECK_SYNC("hl_matrix_transpose failed");
} }
...@@ -188,13 +181,13 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) { ...@@ -188,13 +181,13 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
small-sized matrices. There may be a better way to reconstruct small-sized matrices. There may be a better way to reconstruct
the API for better performance. the API for better performance.
*/ */
CHECK_CUBLAS(CUBLAS_GETRF(t_resource.handle, CHECK_CUBLAS(
dimN, inout_d, lda, pivot_d, info_d, 1)); CUBLAS_GETRF(t_resource.handle, dimN, inout_d, lda, pivot_d, info_d, 1));
int info_h; int info_h;
hl_memcpy(&info_h, info_d, sizeof(int)); hl_memcpy(&info_h, info_d, sizeof(int));
if (info_h != 0) { if (info_h != 0) {
LOG(FATAL) << "Factorization of matrix failed: matrix may be singular.\n"; LOG(FATAL) << "Factorization of matrix failed: matrix may be singular.\n";
} }
/* Step 2: Compute the inverse of the matrix given its LU decomposition */ /* Step 2: Compute the inverse of the matrix given its LU decomposition */
...@@ -203,12 +196,18 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) { ...@@ -203,12 +196,18 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
hl_memcpy(out_d, out_h, sizeof(real *)); hl_memcpy(out_d, out_h, sizeof(real *));
CHECK_CUBLAS(CUBLAS_GETRI(t_resource.handle, CHECK_CUBLAS(CUBLAS_GETRI(t_resource.handle,
dimN, (const real **)inout_d, lda, pivot_d, dimN,
out_d, ldc, info_d, 1)); (const real **)inout_d,
lda,
pivot_d,
out_d,
ldc,
info_d,
1));
hl_memcpy(&info_h, info_d, sizeof(int)); hl_memcpy(&info_h, info_d, sizeof(int));
if (info_h != 0) { if (info_h != 0) {
LOG(FATAL) << "Inversion of matrix failed: matrix may be singular.\n"; LOG(FATAL) << "Inversion of matrix failed: matrix may be singular.\n";
} }
hl_free_mem_device(inout_d); hl_free_mem_device(inout_d);
...@@ -218,12 +217,19 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) { ...@@ -218,12 +217,19 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
CHECK_SYNC("hl_matrix_inverse failed"); CHECK_SYNC("hl_matrix_inverse failed");
} }
void hl_matrix_mul(real *A_d, hl_trans_op_t transa, void hl_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta, int dimN,
int lda, int ldb, int ldc) { int dimK,
real alpha,
real beta,
int lda,
int ldb,
int ldc) {
CHECK_NOTNULL(A_d); CHECK_NOTNULL(A_d);
CHECK_NOTNULL(B_d); CHECK_NOTNULL(B_d);
CHECK_NOTNULL(C_d); CHECK_NOTNULL(C_d);
...@@ -231,8 +237,8 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa, ...@@ -231,8 +237,8 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
if (dimN == 1 && dimM != 1 && dimK != 1 && transb == HPPL_OP_N) { if (dimN == 1 && dimM != 1 && dimK != 1 && transb == HPPL_OP_N) {
int m = (transa == HPPL_OP_N) ? dimM : dimK; int m = (transa == HPPL_OP_N) ? dimM : dimK;
int n = (transa == HPPL_OP_N) ? dimK : dimM; int n = (transa == HPPL_OP_N) ? dimK : dimM;
hl_matrix_mul_vector(A_d, transa, B_d, C_d, m, n, hl_matrix_mul_vector(
alpha, beta, lda, ldb, ldc); A_d, transa, B_d, C_d, m, n, alpha, beta, lda, ldb, ldc);
return; return;
} }
...@@ -240,8 +246,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa, ...@@ -240,8 +246,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
int m = (transb == HPPL_OP_N) ? dimK : dimN; int m = (transb == HPPL_OP_N) ? dimK : dimN;
int n = (transb == HPPL_OP_N) ? dimN : dimK; int n = (transb == HPPL_OP_N) ? dimN : dimK;
hl_trans_op_t trans = (transb == HPPL_OP_N) ? HPPL_OP_T : HPPL_OP_N; hl_trans_op_t trans = (transb == HPPL_OP_N) ? HPPL_OP_T : HPPL_OP_N;
hl_matrix_mul_vector(B_d, trans, A_d, C_d, m, n, hl_matrix_mul_vector(B_d, trans, A_d, C_d, m, n, alpha, beta, ldb, 1, 1);
alpha, beta, ldb, 1, 1);
return; return;
} }
...@@ -250,26 +255,47 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa, ...@@ -250,26 +255,47 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
stat = CUBLAS_GEMM(t_resource.handle, stat = CUBLAS_GEMM(t_resource.handle,
CUBLAS_OP_N, CUBLAS_OP_N,
CUBLAS_OP_N, CUBLAS_OP_N,
dimN, dimM, dimK, dimN,
&alpha, B_d, ldb, dimM,
A_d, lda, dimK,
&beta, C_d, ldc); &alpha,
B_d,
ldb,
A_d,
lda,
&beta,
C_d,
ldc);
} else if ((HPPL_OP_T == transa) && (HPPL_OP_N == transb)) { } else if ((HPPL_OP_T == transa) && (HPPL_OP_N == transb)) {
stat = CUBLAS_GEMM(t_resource.handle, stat = CUBLAS_GEMM(t_resource.handle,
CUBLAS_OP_N, CUBLAS_OP_N,
CUBLAS_OP_T, CUBLAS_OP_T,
dimN, dimM, dimK, dimN,
&alpha, B_d, ldb, dimM,
A_d, lda, dimK,
&beta, C_d, ldc); &alpha,
B_d,
ldb,
A_d,
lda,
&beta,
C_d,
ldc);
} else if ((HPPL_OP_N == transa) && (HPPL_OP_T == transb)) { } else if ((HPPL_OP_N == transa) && (HPPL_OP_T == transb)) {
stat = CUBLAS_GEMM(t_resource.handle, stat = CUBLAS_GEMM(t_resource.handle,
CUBLAS_OP_T, CUBLAS_OP_T,
CUBLAS_OP_N, CUBLAS_OP_N,
dimN, dimM, dimK, dimN,
&alpha, B_d, ldb, dimM,
A_d, lda, dimK,
&beta, C_d, ldc); &alpha,
B_d,
ldb,
A_d,
lda,
&beta,
C_d,
ldc);
} else { } else {
LOG(FATAL) << "parameter transa error!"; LOG(FATAL) << "parameter transa error!";
} }
...@@ -277,24 +303,46 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa, ...@@ -277,24 +303,46 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
CHECK_SYNC("hl_matrix_mul failed"); CHECK_SYNC("hl_matrix_mul failed");
} }
void hl_matrix_mul(real *A_d, hl_trans_op_t transa, void hl_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta) { int dimN,
int dimK,
real alpha,
real beta) {
int lda = (HPPL_OP_N == transa) ? dimK : dimM; int lda = (HPPL_OP_N == transa) ? dimK : dimM;
int ldb = (HPPL_OP_N == transb) ? dimN : dimK; int ldb = (HPPL_OP_N == transb) ? dimN : dimK;
int ldc = dimN; int ldc = dimN;
hl_matrix_mul(A_d, transa, B_d, transb, C_d, dimM, dimN, hl_matrix_mul(A_d,
dimK, alpha, beta, lda, ldb, ldc); transa,
B_d,
transb,
C_d,
dimM,
dimN,
dimK,
alpha,
beta,
lda,
ldb,
ldc);
} }
void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, void hl_matrix_mul_vector(real *A_d,
real *B_d, real *C_d, hl_trans_op_t trans,
int dimM, int dimN, real *B_d,
real alpha, real beta, real *C_d,
int lda, int incb, int incc) { int dimM,
int dimN,
real alpha,
real beta,
int lda,
int incb,
int incc) {
CHECK_NOTNULL(A_d); CHECK_NOTNULL(A_d);
CHECK_NOTNULL(B_d); CHECK_NOTNULL(B_d);
CHECK_NOTNULL(C_d); CHECK_NOTNULL(C_d);
...@@ -303,21 +351,29 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, ...@@ -303,21 +351,29 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
if (HPPL_OP_N == trans) { if (HPPL_OP_N == trans) {
stat = CUBLAS_GEMV(t_resource.handle, stat = CUBLAS_GEMV(t_resource.handle,
CUBLAS_OP_T, CUBLAS_OP_T,
dimN, dimM, dimN,
dimM,
&alpha, &alpha,
A_d, lda, A_d,
B_d, incb, lda,
B_d,
incb,
&beta, &beta,
C_d, incc); C_d,
incc);
} else if (HPPL_OP_T == trans) { } else if (HPPL_OP_T == trans) {
stat = CUBLAS_GEMV(t_resource.handle, stat = CUBLAS_GEMV(t_resource.handle,
CUBLAS_OP_N, CUBLAS_OP_N,
dimN, dimM, dimN,
dimM,
&alpha, &alpha,
A_d, lda, A_d,
B_d, incb, lda,
B_d,
incb,
&beta, &beta,
C_d, incc); C_d,
incc);
} else { } else {
LOG(FATAL) << "parameter transa error!"; LOG(FATAL) << "parameter transa error!";
} }
...@@ -326,10 +382,14 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, ...@@ -326,10 +382,14 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
CHECK_SYNC("hl_matrix_mul_vector"); CHECK_SYNC("hl_matrix_mul_vector");
} }
void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, void hl_matrix_mul_vector(real *A_d,
real *B_d, real *C_d, hl_trans_op_t trans,
int dimM, int dimN, real *B_d,
real alpha, real beta) { real *C_d,
hl_matrix_mul_vector(A_d, trans, B_d, C_d, dimM, dimN, int dimM,
alpha, beta, dimN, 1, 1); int dimN,
real alpha,
real beta) {
hl_matrix_mul_vector(
A_d, trans, B_d, C_d, dimM, dimN, alpha, beta, dimN, 1, 1);
} }
此差异已折叠。
此差异已折叠。
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_USE_DSO #ifdef PADDLE_USE_DSO
#include <mutex> #include <mutex>
...@@ -29,48 +28,46 @@ limitations under the License. */ ...@@ -29,48 +28,46 @@ limitations under the License. */
namespace dynload { namespace dynload {
extern std::once_flag cudart_dso_flag; extern std::once_flag cudart_dso_flag;
extern void* cudart_dso_handle; extern void *cudart_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
* (for each function) to dynamic load cuda routine * (for each function) to dynamic load cuda routine
* via operator overloading. * via operator overloading.
**/ **/
#define DYNAMIC_LOAD_CUDART_WRAP(__name, __type) \ #define DYNAMIC_LOAD_CUDART_WRAP(__name, __type) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
__type operator()(Args... args) { \ __type operator()(Args... args) { \
typedef __type (*cudartFunc)(Args...); \ typedef __type (*cudartFunc)(Args...); \
std::call_once(cudart_dso_flag, GetCudartDsoHandle, \ std::call_once(cudart_dso_flag, GetCudartDsoHandle, &cudart_dso_handle); \
&cudart_dso_handle); \ void *p_##__name = dlsym(cudart_dso_handle, #__name); \
void* p_##__name = dlsym(cudart_dso_handle, #__name); \ return reinterpret_cast<cudartFunc>(p_##__name)(args...); \
return reinterpret_cast<cudartFunc>(p_##__name)(args...); \ } \
} \ } __name; /* struct DynLoad__##__name */
} __name; /* struct DynLoad__##__name */
/* include all needed cuda functions in HPPL */ /* include all needed cuda functions in HPPL */
#define CUDA_ROUTINE_EACH(__macro) \ #define CUDA_ROUTINE_EACH(__macro) \
__macro(cudaLaunch, cudaError_t) \ __macro(cudaLaunch, cudaError_t) __macro(cudaSetupArgument, cudaError_t) \
__macro(cudaSetupArgument, cudaError_t) \ __macro(cudaConfigureCall, cudaError_t) \
__macro(cudaConfigureCall, cudaError_t) \ __macro(__cudaRegisterFatBinary, void **) \
__macro(__cudaRegisterFatBinary, void**) \ __macro(__cudaUnregisterFatBinary, void) \
__macro(__cudaUnregisterFatBinary, void) \ __macro(__cudaRegisterFunction, void) \
__macro(__cudaRegisterFunction, void) \ __macro(__cudaRegisterVar, void) \
__macro(__cudaRegisterVar, void) \ __macro(__cudaRegisterManagedVar, void) \
__macro(__cudaRegisterManagedVar, void) \ __macro(__cudaInitModule, char) \
__macro(__cudaInitModule, char) \ __macro(__cudaRegisterTexture, void) \
__macro(__cudaRegisterTexture, void) \ __macro(__cudaRegisterSurface, void)
__macro(__cudaRegisterSurface, void)
CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP) CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP)
#if CUDART_VERSION >= 7000 #if CUDART_VERSION >= 7000
DYNAMIC_LOAD_CUDART_WRAP(cudaLaunchKernel, cudaError_t) DYNAMIC_LOAD_CUDART_WRAP(cudaLaunchKernel, cudaError_t)
#endif #endif
#undef CUDA_ROUNTINE_EACH #undef CUDA_ROUNTINE_EACH
} /* namespace dynload */ } /* namespace dynload */
#if CUDART_VERSION >= 7000 #if CUDART_VERSION >= 7000
__host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func, __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
...@@ -79,12 +76,11 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func, ...@@ -79,12 +76,11 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
void **args, void **args,
size_t sharedMem, size_t sharedMem,
cudaStream_t stream) { cudaStream_t stream) {
return dynload::cudaLaunchKernel(func, gridDim, blockDim, return dynload::cudaLaunchKernel(
args, sharedMem, stream); func, gridDim, blockDim, args, sharedMem, stream);
} }
#endif /* CUDART_VERSION >= 7000 */ #endif /* CUDART_VERSION >= 7000 */
__host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) { __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) {
return dynload::cudaLaunch(func); return dynload::cudaLaunch(func);
} }
...@@ -99,13 +95,12 @@ __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, ...@@ -99,13 +95,12 @@ __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim,
dim3 blockDim, dim3 blockDim,
size_t sharedMem, size_t sharedMem,
cudaStream_t stream) { cudaStream_t stream) {
return dynload::cudaConfigureCall(gridDim, blockDim, return dynload::cudaConfigureCall(gridDim, blockDim, sharedMem, stream);
sharedMem, stream);
} }
extern "C" { extern "C" {
void** CUDARTAPI __cudaRegisterFatBinary(void *fatCubin) { void **CUDARTAPI __cudaRegisterFatBinary(void *fatCubin) {
return dynload::__cudaRegisterFatBinary(fatCubin); return dynload::__cudaRegisterFatBinary(fatCubin);
} }
...@@ -113,86 +108,87 @@ void CUDARTAPI __cudaUnregisterFatBinary(void **fatCubinHandle) { ...@@ -113,86 +108,87 @@ void CUDARTAPI __cudaUnregisterFatBinary(void **fatCubinHandle) {
return dynload::__cudaUnregisterFatBinary(fatCubinHandle); return dynload::__cudaUnregisterFatBinary(fatCubinHandle);
} }
void CUDARTAPI __cudaRegisterFunction( void CUDARTAPI __cudaRegisterFunction(void **fatCubinHandle,
void **fatCubinHandle, const char *hostFun,
const char *hostFun, char *deviceFun,
char *deviceFun, const char *deviceName,
const char *deviceName, int thread_limit,
int thread_limit, uint3 *tid,
uint3 *tid, uint3 *bid,
uint3 *bid, dim3 *bDim,
dim3 *bDim, dim3 *gDim,
dim3 *gDim, int *wSize) {
int *wSize return dynload::__cudaRegisterFunction(fatCubinHandle,
) { hostFun,
return dynload::__cudaRegisterFunction( deviceFun,
fatCubinHandle, hostFun, deviceFun, deviceName, deviceName,
thread_limit, tid, bid, bDim, gDim, wSize); thread_limit,
tid,
bid,
bDim,
gDim,
wSize);
} }
void CUDARTAPI __cudaRegisterVar( void CUDARTAPI __cudaRegisterVar(void **fatCubinHandle,
void **fatCubinHandle, char *hostVar,
char *hostVar, char *deviceAddress,
char *deviceAddress, const char *deviceName,
const char *deviceName, int ext,
int ext, int size,
int size, int constant,
int constant, int global) {
int global return dynload::__cudaRegisterVar(fatCubinHandle,
) { hostVar,
return dynload::__cudaRegisterVar( deviceAddress,
fatCubinHandle, hostVar, deviceAddress, deviceName,
deviceName, ext, size, constant, global); ext,
size,
constant,
global);
} }
extern void CUDARTAPI __cudaRegisterManagedVar(void **fatCubinHandle,
void **hostVarPtrAddress,
extern void CUDARTAPI __cudaRegisterManagedVar( char *deviceAddress,
void **fatCubinHandle, const char *deviceName,
void **hostVarPtrAddress, int ext,
char *deviceAddress, int size,
const char *deviceName, int constant,
int ext, int global) {
int size, return dynload::__cudaRegisterManagedVar(fatCubinHandle,
int constant, hostVarPtrAddress,
int global deviceAddress,
) { deviceName,
return dynload::__cudaRegisterManagedVar( ext,
fatCubinHandle, hostVarPtrAddress, deviceAddress, size,
deviceName, ext, size, constant, global); constant,
global);
} }
char CUDARTAPI __cudaInitModule( char CUDARTAPI __cudaInitModule(void **fatCubinHandle) {
void **fatCubinHandle
) {
return dynload::__cudaInitModule(fatCubinHandle); return dynload::__cudaInitModule(fatCubinHandle);
} }
void CUDARTAPI __cudaRegisterTexture( void CUDARTAPI __cudaRegisterTexture(void **fatCubinHandle,
void **fatCubinHandle, const struct textureReference *hostVar,
const struct textureReference *hostVar, const void **deviceAddress,
const void **deviceAddress, const char *deviceName,
const char *deviceName, int dim,
int dim, int norm,
int norm, int ext) {
int ext
) {
return dynload::__cudaRegisterTexture( return dynload::__cudaRegisterTexture(
fatCubinHandle, hostVar, deviceAddress, fatCubinHandle, hostVar, deviceAddress, deviceName, dim, norm, ext);
deviceName, dim, norm, ext);
} }
void CUDARTAPI __cudaRegisterSurface( void CUDARTAPI __cudaRegisterSurface(void **fatCubinHandle,
void **fatCubinHandle, const struct surfaceReference *hostVar,
const struct surfaceReference *hostVar, const void **deviceAddress,
const void **deviceAddress, const char *deviceName,
const char *deviceName, int dim,
int dim, int ext) {
int ext
) {
return dynload::__cudaRegisterSurface( return dynload::__cudaRegisterSurface(
fatCubinHandle, hostVar, deviceAddress, fatCubinHandle, hostVar, deviceAddress, deviceName, dim, ext);
deviceName, dim, ext);
} }
} /* extern "C" */ } /* extern "C" */
......
...@@ -12,24 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,24 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "avx_mathfun.h" #include "avx_mathfun.h"
namespace hppl { namespace hppl {
__m256 exp(__m256 a) { __m256 exp(__m256 a) { return exp256_ps(a); }
return exp256_ps(a);
}
__m256 log(__m256 a) { __m256 log(__m256 a) { return log256_ps(a); }
return log256_ps(a);
}
__m256 sin(__m256 a) { __m256 sin(__m256 a) { return sin256_ps(a); }
return sin256_ps(a);
}
__m256 cos(__m256 a) { __m256 cos(__m256 a) { return cos256_ps(a); }
return cos256_ps(a);
}
} // namespace hppl } // namespace hppl
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <chrono> #include <chrono>
#include <stdlib.h> #include <stdlib.h>
#include <iostream> #include <iostream>
...@@ -21,8 +20,7 @@ limitations under the License. */ ...@@ -21,8 +20,7 @@ limitations under the License. */
using std::chrono::high_resolution_clock; using std::chrono::high_resolution_clock;
int64_t getCurrentTimeStick() { int64_t getCurrentTimeStick() {
high_resolution_clock::time_point tp = high_resolution_clock::now(); high_resolution_clock::time_point tp = high_resolution_clock::now();
high_resolution_clock::duration dtn = tp.time_since_epoch(); high_resolution_clock::duration dtn = tp.time_since_epoch();
return dtn.count(); return dtn.count();
} }
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "DataProvider.h" #include "DataProvider.h"
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
...@@ -57,7 +56,7 @@ void BufferBatch::clone(DataBatch* srcBatch, bool useGpu) { ...@@ -57,7 +56,7 @@ void BufferBatch::clone(DataBatch* srcBatch, bool useGpu) {
} }
} }
DoubleBuffer::DoubleBuffer(DataProvider *dataPool, DoubleBuffer::DoubleBuffer(DataProvider* dataPool,
bool useGpu, bool useGpu,
int64_t batchSize) { int64_t batchSize) {
batchSize_ = batchSize; batchSize_ = batchSize;
...@@ -155,7 +154,7 @@ void DoubleBuffer::startAsyncLoad() { ...@@ -155,7 +154,7 @@ void DoubleBuffer::startAsyncLoad() {
} }
ClassRegistrar<DataProvider, DataConfig, ModelConfig, bool> ClassRegistrar<DataProvider, DataConfig, ModelConfig, bool>
DataProvider::registrar_; DataProvider::registrar_;
DataProvider* DataProvider::create(const DataConfig& config, DataProvider* DataProvider::create(const DataConfig& config,
const ModelConfig& modelConfig, const ModelConfig& modelConfig,
...@@ -182,7 +181,8 @@ int64_t DataProvider::getNextBatch(int64_t size, DataBatch* batch) { ...@@ -182,7 +181,8 @@ int64_t DataProvider::getNextBatch(int64_t size, DataBatch* batch) {
for (int i = 0; i < config_.constant_slots_size(); ++i) { for (int i = 0; i < config_.constant_slots_size(); ++i) {
MemoryHandlePtr handle = MemoryHandlePtr handle =
constantSlots[i] ? constantSlots[i]->getMemoryHandle() : nullptr; constantSlots[i] ? constantSlots[i]->getMemoryHandle() : nullptr;
Matrix::resizeOrCreate(constantSlots[i], batchSize, Matrix::resizeOrCreate(constantSlots[i],
batchSize,
1, // = width 1, // = width
false, // = trans false, // = trans
useGpu_); // = useGpu useGpu_); // = useGpu
...@@ -216,7 +216,8 @@ void DataProvider::initAsyncLoader() { ...@@ -216,7 +216,8 @@ void DataProvider::initAsyncLoader() {
} }
SimpleDataProviderBase::SimpleDataProviderBase(const DataConfig& config, SimpleDataProviderBase::SimpleDataProviderBase(const DataConfig& config,
bool useGpu, bool withInfo) bool useGpu,
bool withInfo)
: DataProvider(config, useGpu) { : DataProvider(config, useGpu) {
/* initialize the size of a sample, and the buffer */ /* initialize the size of a sample, and the buffer */
sampleDim_ = config_.feat_dim() * (2 * config_.context_len() + 1); sampleDim_ = config_.feat_dim() * (2 * config_.context_len() + 1);
...@@ -337,7 +338,8 @@ int64_t SimpleDataProviderBase::fillBuffer() { ...@@ -337,7 +338,8 @@ int64_t SimpleDataProviderBase::fillBuffer() {
sampleNumInBuf_ = sampleNumInBuf_ =
n + fillBufferImp(hInputDataBuf_->getData() + n * sampleDim_, n + fillBufferImp(hInputDataBuf_->getData() + n * sampleDim_,
hInputLabelBuf_->getData() + n, hInputLabelBuf_->getData() + n,
hInputInfoBuf_->getData() + n, bufferCapacity_ - n); hInputInfoBuf_->getData() + n,
bufferCapacity_ - n);
/* for stachastic gradient training */ /* for stachastic gradient training */
if (!skipShuffle_) { if (!skipShuffle_) {
...@@ -357,11 +359,14 @@ SimpleDataProvider::SimpleDataProvider(const DataConfig& config, bool useGpu) ...@@ -357,11 +359,14 @@ SimpleDataProvider::SimpleDataProvider(const DataConfig& config, bool useGpu)
SimpleDataProvider::~SimpleDataProvider() {} SimpleDataProvider::~SimpleDataProvider() {}
int64_t SimpleDataProvider::fillBufferImp(real* data, int* label, int* info, int64_t SimpleDataProvider::fillBufferImp(real* data,
int* label,
int* info,
int64_t size) { int64_t size) {
(void)info; (void)info;
int64_t n = std::min<int64_t>(labels_.size() - currentSampleIndex_, size); int64_t n = std::min<int64_t>(labels_.size() - currentSampleIndex_, size);
memcpy(data, &data_[currentSampleIndex_ * sampleDim_], memcpy(data,
&data_[currentSampleIndex_ * sampleDim_],
n * sampleDim_ * sizeof(real)); n * sampleDim_ * sizeof(real));
memcpy(label, &labels_[currentSampleIndex_], sizeof(int) * n); memcpy(label, &labels_[currentSampleIndex_], sizeof(int) * n);
currentSampleIndex_ += n; currentSampleIndex_ += n;
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "DataProvider.h" #include "DataProvider.h"
...@@ -65,8 +64,8 @@ void DataProviderGroup<T>::reset() { ...@@ -65,8 +64,8 @@ void DataProviderGroup<T>::reset() {
provider_ = nullptr; provider_ = nullptr;
// shuffle file list // shuffle file list
std::shuffle(fileList_.begin(), fileList_.end(), std::shuffle(
ThreadLocalRandomEngine::get()); fileList_.begin(), fileList_.end(), ThreadLocalRandomEngine::get());
startLoader(); startLoader();
DataProvider::reset(); DataProvider::reset();
...@@ -113,8 +112,9 @@ void DataProviderGroup<T>::startLoader() { ...@@ -113,8 +112,9 @@ void DataProviderGroup<T>::startLoader() {
size_t endPos = std::min(fileList_.size(), startPos + loadFileCount); size_t endPos = std::min(fileList_.size(), startPos + loadFileCount);
std::vector<std::string> fileVec(fileList_.begin() + startPos, std::vector<std::string> fileVec(fileList_.begin() + startPos,
fileList_.begin() + endPos); fileList_.begin() + endPos);
loader_->addJob([this, fileVec]() loader_->addJob([this, fileVec]() -> ProviderPtrType {
-> ProviderPtrType { return this->loadFile(fileVec); }); return this->loadFile(fileVec);
});
} }
loader_->stopAddJob(); loader_->stopAddJob();
} }
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册