提交 0a0e4b60 编写于 作者: M Megvii Engine Team

Merge branch 'master' into release-1.0

......@@ -53,9 +53,11 @@ option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
option(MGE_BUILD_SDK "Build load_and_run" ON)
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_PYTHON_MODULE "Build MegEngine legacy Python Module." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
message("build with BIN REDUCE")
if(MGE_WITH_MINIMUM_SIZE)
......@@ -152,6 +154,14 @@ if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} S
endif()
if(MSVC OR WIN32)
# for cmake after 3.15.2
cmake_policy(SET CMP0091 NEW)
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
endif()
add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
message("-- into windows build...")
message("-- CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
......@@ -285,7 +295,6 @@ if(MGE_WITH_TEST)
endif()
if(MGE_BUILD_IMPERATIVE_RT)
add_compile_definitions(MGB_ENABLE_IMPERATIVE_RUNTIME)
set(CMAKE_CXX_STANDARD 17)
endif()
......@@ -701,7 +710,8 @@ endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
set(MGB_ENABLE_IMPERATIVE ${MGE_BUILD_IMPERATIVE_RT})
set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")
# Write out megbrain_build_config.h
# It defines macros needed by both megbrain and dnn
configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
......@@ -831,3 +841,8 @@ if(MSVC OR WIN32)
endif()
endforeach()
endif()
if(MGE_WITH_JIT_MLIR)
add_subdirectory(tools/mlir/mgb-opt)
add_subdirectory(tools/mlir/mgb-file-check)
endif()
......@@ -682,6 +682,53 @@ protected:
size_t workspace_in_bytes);
};
/**
* \brief base class for AdaptivePooling
*/
class AdaptivePoolingBase : public OperatorBase {
DEF_OPR_IMPL_CTOR(AdaptivePoolingBase, OperatorBase);
DEF_OPR_PARAM(AdaptivePooling);
protected:
param::Pooling deduce_pooling_param(const TensorLayout& src,
const TensorLayout& dst);
};
class AdaptivePoolingForward : public AdaptivePoolingBase {
DEF_OPR_IMPL(AdaptivePoolingForward, AdaptivePoolingBase, 1, 1);
public:
/**
* \param[in] src input tensor
* \param[out] dst output tensor
*/
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) = 0;
virtual size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) = 0;
};
using AdaptivePooling = AdaptivePoolingForward;
class AdaptivePoolingBackward : public AdaptivePoolingBase {
DEF_OPR_IMPL(AdaptivePoolingBackward, AdaptivePoolingBase, 3, 1);
public:
/**
* \param[in] src the `src' parameter in AdaptivePoolingForward::exec
* \param[in] dst the `dst' parameter in AdaptivePoolingForward::exec
* \param[in] diff the backpropagated gradient wrt. dst
* \param[out] grad the backpropagated gradient wrt. src
*/
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) = 0;
virtual size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst,
const TensorLayout& diff,
const TensorLayout& grad) = 0;
};
/**
* \brief base class for Local
*/
......
......@@ -179,6 +179,11 @@ pdef('Axis').add_fields('int32', 'axis', 0)
add_enum_alias('Format', 'ConvolutionV0')
)
(pdef('AdaptivePooling').
add_enum_alias('Mode', 'Pooling').
add_enum_alias('Format', 'ConvolutionV0')
)
(pdef('LRN',
'see ImageNet Classification with Deep Convolutional Neural Networks for'
' meaning of the fields').
......
......@@ -55,8 +55,12 @@ void AtlasComputingContext::memcpy(void* dst, const void* src,
default:
megdnn_throw("bad atlas memcpy kind");
}
#if MGB_USE_ATLAS_ASYNC_API
acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes,
atlas_kind, m_ctx.stream));
#else
acl_check(aclrtMemcpy(dst, size_in_bytes, src, size_in_bytes, atlas_kind));
#endif
}
void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) {
......@@ -65,7 +69,11 @@ void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) {
}
void AtlasComputingContext::synchronize() {
#if MGB_USE_ATLAS_ASYNC_API
acl_check(aclrtSynchronizeStream(m_ctx.stream));
#else
return;
#endif
}
// vim: syntax=cpp.doxygen
/**
* \file dnn/src/common/adaptive_pooling.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs.h"
#include "src/common/utils.h"
namespace megdnn {
param::Pooling AdaptivePoolingBase::deduce_pooling_param(
const TensorLayout& src, const TensorLayout& dst) {
megdnn_assert(param().format == param::AdaptivePooling::Format::NCHW);
size_t IH = src.shape[2], IW = src.shape[3], OH = dst.shape[2],
OW = dst.shape[3];
param::Pooling ret;
ret.mode = param().mode;
ret.format = param().format;
ret.pad_h = ret.pad_w = 0;
ret.stride_h = floor(IH / OH);
ret.stride_w = floor(IW / OW);
ret.window_h = IH - (OH - 1) * ret.stride_h;
ret.window_w = IW - (OW - 1) * ret.stride_w;
return ret;
}
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -392,8 +392,6 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const {
TensorLayout result{dtype, format};
result.ndim = tshape.ndim;
for (size_t i = 0; i < tshape.ndim; i++) {
megdnn_throw_if(!tshape.shape[i], tensor_reshape_error,
megdnn_mangle("target shape is 0"));
result.shape[i] = tshape.shape[i];
result.stride[i] = (tshape.shape[i] == 1);
}
......@@ -409,8 +407,6 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const {
for (size_t i = 0; i < tshape.ndim; ++i) {
int target_idx = tshape.ndim - i - 1;
int cur_idx = ndim - i - 1;
megdnn_throw_if(!tshape.shape[target_idx], tensor_reshape_error,
megdnn_mangle("target shape is 0"));
size_t cur_shape = (cur_idx >= 0 ? shape[cur_idx] : 1),
cur_stride = (cur_idx >= 0 ? stride[cur_idx] : 0);
if (tshape.shape[target_idx] != cur_shape) {
......@@ -434,10 +430,16 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const {
bool TensorLayout::try_reshape(TensorLayout& result,
const TensorShape& tshp) const {
megdnn_assert(tshp.ndim);
bool is_empty_shape = false;
for (size_t i = 0; i < tshp.ndim; ++i) {
megdnn_throw_if(!tshp.shape[i], tensor_reshape_error,
megdnn_mangle(ssprintf("bad target tshp: %s",
tshp.to_string().c_str())));
if (!tshp.shape[i]) {
megdnn_throw_if(!format.is_default(), tensor_reshape_error,
megdnn_mangle(ssprintf("bad target tshp: %s",
tshp.to_string().c_str())));
is_empty_shape = true;
break;
}
}
megdnn_throw_if(
......@@ -454,6 +456,11 @@ bool TensorLayout::try_reshape(TensorLayout& result,
result.format = this->format;
result.TensorShape::operator=(tshp);
if (is_empty_shape) {
result.init_contiguous_stride();
return true;
}
size_t sdim = 0, prod = 1, cont_sdim = 0;
for (size_t i = 0; i < tshp.ndim; ++i) {
megdnn_assert(cont_sdim < cont.ndim);
......
......@@ -199,6 +199,8 @@ private:
cb(Remap) \
cb(RemapBackwardData) \
cb(RemapBackwardMat) \
cb(AdaptivePoolingForward) \
cb(AdaptivePoolingBackward) \
/*!
* \brief specialize HandleImpl::create_operator for a single opr type;
......
/**
* \file dnn/src/cuda/adaptive_pooling/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/adaptive_pooling/opr_impl.h"
#include "src/cuda/utils.h"
namespace megdnn {
namespace cuda {
void AdaptivePoolingForwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
auto opr = handle()->create_operator<PoolingForward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, workspace);
}
size_t AdaptivePoolingForwardImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst) {
auto opr = handle()->create_operator<PoolingForward>();
opr->param() = deduce_pooling_param(src, dst);
return opr->get_workspace_in_bytes(src, dst);
}
void AdaptivePoolingBackwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_in dst,
_megdnn_tensor_in diff,
_megdnn_tensor_out grad,
_megdnn_workspace workspace) {
auto opr = handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, diff, grad, workspace);
}
size_t AdaptivePoolingBackwardImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst,
const TensorLayout& diff, const TensorLayout& grad) {
auto opr = handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src, dst);
return opr->get_workspace_in_bytes(src, dst, diff, grad);
}
} // namespace cuda
} // namespace megdnn
// vim: syntax=cpp.doxygen
/**
* \file dnn/src/cuda/adaptive_pooling/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/utils.h"
namespace megdnn {
namespace cuda {
class AdaptivePoolingForwardImpl final : public AdaptivePoolingForward {
public:
using AdaptivePoolingForward::AdaptivePoolingForward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) override;
};
class AdaptivePoolingBackwardImpl final : public AdaptivePoolingBackward {
public:
using AdaptivePoolingBackward::AdaptivePoolingBackward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst,
const TensorLayout& diff,
const TensorLayout& grad) override;
};
} // namespace cuda
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -11,6 +11,7 @@
#include "src/common/handle_impl.h"
#include "src/cuda/adaptive_pooling/opr_impl.h"
#include "src/cuda/add_update/opr_impl.h"
#include "src/cuda/argmxx/opr_impl.h"
#include "src/cuda/argsort/opr_impl.h"
......
......@@ -72,6 +72,7 @@ namespace indexing_multi_axis_vec {
#define cb0(_dtype) \
MEGDNN_FOREACH_TENSOR_NDIM(INST, DTypeTrait<_dtype>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE(cb0)
cb0(::megdnn::dtype::Bool)
#undef cb0
#undef INST
......
......@@ -39,6 +39,11 @@ __device__ void atomicAdd(megdnn::dt_int16 *, megdnn::dt_int16) {
((int*)0)[0] = 1;
}
__device__ void atomicAdd(megdnn::dt_bool *, megdnn::dt_bool) {
__trap();
((int*)0)[0] = 1;
}
#define KERN_APPLY_OPR_OPR \
::megdnn::cuda::indexing_multi_axis_vec::OprAtomicIncr
#include "./kern_apply_opr_impl.cuinl"
......
......@@ -120,6 +120,7 @@ void ExecImpl<Opr>::dispatch_exec() {
case DTypeTrait<_dtype>::enumv: \
return dispatch_exec_ctype<DTypeTrait<_dtype>::ctype>();
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
cb(::megdnn::dtype::Bool)
#undef cb
default:
megdnn_throw("bad dtype");
......
/**
* \file dnn/src/naive/adaptive_pooling/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/naive/adaptive_pooling/opr_impl.h"
#include "src/common/opr_delegate.h"
#include "src/common/utils.h"
#include "src/naive/handle.h"
namespace megdnn {
namespace naive {
void AdaptivePoolingForwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
MEGDNN_DISPATCH_CPU_KERN(static_cast<naive::HandleImpl*>(handle()), {
auto opr = inplace_cpu_handle()->create_operator<PoolingForward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, workspace);
});
}
void AdaptivePoolingBackwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_in dst,
_megdnn_tensor_in diff,
_megdnn_tensor_out grad,
_megdnn_workspace workspace) {
MEGDNN_DISPATCH_CPU_KERN(static_cast<naive::HandleImpl*>(handle()), {
auto opr = inplace_cpu_handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, diff, grad, workspace);
});
}
size_t AdaptivePoolingBackwardImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst,
const TensorLayout& diff, const TensorLayout& grad) {
auto opr = inplace_cpu_handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src, dst);
return opr->get_workspace_in_bytes(src, dst, diff, grad);
}
} // namespace naive
} // namespace megdnn
// vim: syntax=cpp.doxygen
/**
* \file dnn/src/naive/adaptive_pooling/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
#include "src/common/utils.h"
namespace megdnn {
namespace naive {
class AdaptivePoolingForwardImpl : public AdaptivePoolingForward {
public:
using AdaptivePoolingForward::AdaptivePoolingForward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout&,
const TensorLayout&) override {
return 0;
}
};
class AdaptivePoolingBackwardImpl : public AdaptivePoolingBackward {
public:
using AdaptivePoolingBackward::AdaptivePoolingBackward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst,
const TensorLayout& diff,
const TensorLayout& grad) override;
};
} // namespace naive
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -13,6 +13,7 @@
#include "src/common/handle_impl.h"
#include "src/naive/adaptive_pooling/opr_impl.h"
#include "src/naive/add_update/opr_impl.h"
#include "src/naive/argmxx/opr_impl.h"
#include "src/naive/argsort/opr_impl.h"
......
......@@ -88,6 +88,7 @@ void dispatch_exec(HandleImpl *handle,
}
switch (data.layout.dtype.enumv()) {
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
cb(::megdnn::dtype::Bool)
default:
megdnn_throw(megdnn_mangle("bad dtype"));
}
......
/**
* \file dnn/test/common/adaptive_pooling.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include <cstddef>
#include "megdnn/basic_types.h"
#include "megdnn/opr_param_defs.h"
namespace megdnn {
namespace test {
namespace adaptive_pooling {
struct TestArg {
param::AdaptivePooling param;
TensorShape ishape;
TensorShape oshape;
TestArg(param::AdaptivePooling param, TensorShape ishape,
TensorShape oshape)
: param(param), ishape(ishape), oshape(oshape) {}
};
inline std::vector<TestArg> get_args() {
std::vector<TestArg> args;
using Param = param::AdaptivePooling;
using Mode = param::AdaptivePooling::Mode;
for (size_t i = 36; i < 40; ++i) {
args.emplace_back(Param{Mode::AVERAGE}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 4, i - 2});
args.emplace_back(Param{Mode::MAX}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 4, i - 2});
}
for (size_t i = 5; i < 10; ++i) {
args.emplace_back(Param{Mode::AVERAGE}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 3, i - 2});
args.emplace_back(Param{Mode::MAX}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 3, i - 2});
}
return args;
}
} // namespace adaptive_pooling
} // namespace test
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -41,6 +41,8 @@ DEF(Images2NeibsForward, 2, true, true);
DEF(Images2NeibsBackward, 2, true, false);
DEF(PoolingForward, 2, true, true);
DEF(PoolingBackward, 4, true, false);
DEF(AdaptivePoolingForward, 2, true, false);
DEF(AdaptivePoolingBackward, 4, true, false);
DEF(LocalForward, 3, true, true);
DEF(LocalBackwardData, 3, true, false);
DEF(LocalBackwardFilter, 3, true, false);
......
/**
* \file dnn/test/cuda/adaptive_pooling.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "test/cuda/fixture.h"
#include "megdnn/tensor_iter.h"
#include "test/common/adaptive_pooling.h"
#include "test/common/checker.h"
#include "src/common/utils.h"
#include "test/cuda/utils.h"
#include <cudnn.h>
#include "test/cuda/benchmark.h"
namespace megdnn {
namespace test {
TEST_F(CUDA, ADAPTIVE_POOLING_FORWARD) {
auto args = adaptive_pooling::get_args();
using Format = param::AdaptivePooling::Format;
DType dtype = dtype::Float32();
for (auto&& arg : args) {
auto param = arg.param;
auto src = arg.ishape;
auto dst = arg.oshape;
param.format = Format::NCHW;
Checker<AdaptivePooling> checker(handle_cuda());
checker.set_epsilon(1e-2);
checker.set_param(param).set_dtype(0, dtype).set_dtype(1, dtype).exec(
TensorShapeArray{src, dst, {}});
}
}
TEST_F(CUDA, ADAPTIVE_POOLING_BACKWARD) {
auto args = adaptive_pooling::get_args();
for (auto&& arg : args) {
Checker<AdaptivePoolingBackward> checker(handle_cuda());
TensorLayout ilayout = TensorLayout(arg.ishape, dtype::Float32());
TensorLayout olayout = TensorLayout(arg.oshape, dtype::Float32());
auto constraint = [this,
arg](CheckerHelper::TensorValueArray& tensors_orig) {
megdnn_assert(tensors_orig.size() == 4);
auto opr = handle_cuda()->create_operator<AdaptivePoolingForward>();
opr->param() = arg.param;
auto tensors_cuda_storage = CheckerHelper::alloc_tensors(
handle_cuda(),
{tensors_orig[0].layout, tensors_orig[1].layout}, 0);
auto&& tensors_cuda = *tensors_cuda_storage;
auto span = tensors_cuda[0].layout.span();
auto dst = static_cast<dt_byte*>(tensors_cuda[0].raw_ptr) +
span.low_byte;
auto src = static_cast<const dt_byte*>(tensors_orig[0].raw_ptr) +
span.low_byte;
megdnn_memcpy_H2D(handle_cuda(), dst, src, span.dist_byte());
auto workspace_size = opr->get_workspace_in_bytes(
tensors_cuda[0].layout, tensors_cuda[1].layout);
auto workspace_cuda = megdnn_malloc(handle_cuda(), workspace_size);
Workspace workspace{static_cast<dt_byte*>(workspace_cuda),
workspace_size};
opr->exec(tensors_cuda[0], tensors_cuda[1], workspace);
megdnn_free(handle_cuda(), workspace_cuda);
span = tensors_cuda[1].layout.span();
dst = static_cast<dt_byte*>(tensors_orig[1].raw_ptr) +
span.low_byte;
src = static_cast<const dt_byte*>(tensors_cuda[1].raw_ptr) +
span.low_byte;
megdnn_memcpy_D2H(handle_cuda(), dst, src, span.dist_byte());
};
DType dtype = dtype::Float32();
checker.set_tensors_constraint(constraint)
.set_dtype(0, dtype)
.set_dtype(1, dtype)
.set_dtype(2, dtype)
.set_dtype(3, dtype)
.set_param(arg.param)
.exec(TensorShapeArray{ilayout, olayout, olayout, ilayout});
}
}
} // namespace test
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megdnn/oprs/nn.h"
......@@ -37,7 +38,7 @@ std::vector<BenchArgs> get_resnet50_bench_args(size_t batch = 64) {
args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 1});
args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 2});
args.emplace_back(BenchArgs{batch, 4, 256, 256, 32, 7, 2});
args.emplace_back(BenchArgs{batch, 256, 56, 56, 64, 1, 1});
args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 1, 1});
args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 3, 1});
......@@ -614,11 +615,8 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_HSWISH) {
param.stride_h = param.stride_w = 1;
param.format = param::ConvBias::Format::CHWN4;
param.nonlineMode = param::ConvBias::NonlineMode::H_SWISH;
checker.set_param(param).execs({{4, 12, 12, 32, 4},
{4, 3, 3, 16, 4},
{4, 1, 1, 1, 4},
{},
{}});
checker.set_param(param).execs(
{{4, 12, 12, 32, 4}, {4, 3, 3, 16, 4}, {4, 1, 1, 1, 4}, {}, {}});
}
TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_CHECK_BOUNDS) {
......@@ -1076,7 +1074,6 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) {
}
#if CUDA_VERSION >= 10020
/// \note: we only check several cases and block sizes in megdnn_test, the full
/// testcases are written in cutlass repository
......@@ -1234,8 +1231,7 @@ TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW4) {
handle_cuda(), get_resnet50_bench_args(64),
dtype::QuantizedS8{1.2f}, dtype::QuantizedS8{1.3f},
dtype::QuantizedS32{1.2f * 1.3f}, dtype::QuantizedS8{1.0f},
"INT8_NCHW4_DOTPROD_IMPLICIT_GEMM",
param::ConvBias::Format::NCHW4);
"INT8_NCHW4_DOTPROD_IMPLICIT_GEMM", param::ConvBias::Format::NCHW4);
}
#endif
} // namespace test
......
......@@ -47,8 +47,7 @@ add_custom_target(gen_opr_py DEPENDS ${GEN_OPS_FILE})
##################### end of opdef generation #########################
set(VERSION_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/version.ld)
add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT})
add_custom_target(_version_ld SOURCES ${MGE_VERSION_SCRIPT})
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11)
pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS})
......@@ -57,8 +56,21 @@ if (APPLE)
elseif (MSVC OR WIN32)
# Windows does not support implicitly importing data members from DLL.
target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn)
message("-- CMAKE_MSVC_RUNTIME_LIBRARY: ${CMAKE_MSVC_RUNTIME_LIBRARY}")
set_target_properties(${MODULE_NAME} PROPERTIES MSVC_RUNTIME_LIBRARY "${CMAKE_MSVC_RUNTIME_LIBRARY}")
else()
target_link_libraries(${MODULE_NAME} PRIVATE megengine_export -Wl,--version-script=${VERSION_SCRIPT})
if (MGE_WITH_PYTHON_MODULE)
# use to fix runtime crash when build both mgb(MGE_WITH_PYTHON_MODULE) and imperative(MGE_BUILD_IMPERATIVE_RT)
target_link_libraries(${MODULE_NAME} PRIVATE megengine_export -Wl,--version-script=${MGE_VERSION_SCRIPT})
else()
# use to reduce whl size by depend on megbrain/dnn directly, caused by cmake create two cuda fatbin
# elf section on both megengine_export and target which depend on megengine_export
target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn -Wl,--version-script=${MGE_VERSION_SCRIPT})
if (MGE_WITH_DISTRIBUTED)
message("-- Imperative configured to link megray")
target_link_libraries(${MODULE_NAME} PRIVATE megray)
endif()
endif()
endif()
target_include_directories(${MODULE_NAME} PUBLIC src/include PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR})
......
......@@ -76,7 +76,7 @@ from .logger import enable_debug_log, get_logger, set_log_file, set_log_level
from .serialization import load, save
from .tensor import Parameter, Tensor, tensor
from .version import __version__
from .core import cgtools
from .utils import comp_graph_tools as cgtools
_set_fork_exec_path_for_timed_func(
sys.executable,
......
......@@ -20,7 +20,7 @@ class GradManager:
the forward operations start and when all resources should be released. A typical usage of
GradManager is as follows:
.. codeblock::
.. code-block::
gm = GradManager()
gm.attach(model.parameters())
......@@ -32,7 +32,7 @@ class GradManager:
You can also use `record()` and `release()` method instead of `with` context:
.. codeblock::
.. code-block::
gm = GradManager()
gm.attach(model.parameters())
......@@ -50,7 +50,7 @@ class GradManager:
processes. Users will finally get the averaged gradients if an "AllReduce"
callback is registered as follows:
.. codeblock::
.. code-block::
import megengine.distributed as dist
......@@ -71,7 +71,7 @@ class GradManager:
r"""Registers parameters that gradients should be calculated with respect to.
Callback Functions should have a signature like this:
.. codeblock::
.. code-block::
def cb(param: Tensor, grad: Tensor) -> Tensor:
# do something
......@@ -100,6 +100,8 @@ class GradManager:
:param ys: outputs of forward operators, e.g., the loss tensor
:param dys: derivatives of ys
"""
from ..functional import ones_like
global backwarding_grad_manager
cache = backwarding_grad_manager
backwarding_grad_manager = self
......@@ -113,7 +115,7 @@ class GradManager:
if not isinstance(ys, (tuple, list)):
ys = [ys]
if dys is None:
dys = [tensor(1.0).broadcast(y.shape) for y in ys]
dys = [ones_like(y) for y in ys]
if not isinstance(dys, (tuple, list)):
dys = [dys]
try:
......
......@@ -11,4 +11,3 @@ import sys
from .tensor import Tensor
from .tensor.megbrain_graph import Graph
from .utils import comp_graph_tools as cgtools
......@@ -22,11 +22,13 @@ class Device:
else:
self._cn = CompNode(device)
self.logical_name = self._cn.logical_name
def to_c(self):
return self._cn
def __repr__(self):
return "{}({})".format(type(self).__qualname__, self)
return "{}({})".format(type(self).__qualname__, repr(self._cn))
def __str__(self):
return str(self._cn)
......
......@@ -160,7 +160,7 @@ def subtensor_grad_fn(op, inputs, outputs, input_requires_grad):
def make_grad(grad_op, dy):
grad = (
TensorWrapper(0, dtype=dy.dtype, device=dy.device)
.broadcast(TensorWrapper(input_shape))
._broadcast(TensorWrapper(input_shape))
.__wrapped__
)
(dx,) = apply(grad_op, grad, dy, *params)
......@@ -186,7 +186,7 @@ def indexingMultiAxisVec_grad_fn(op, inputs, outputs, input_requires_grad):
def make_grad(grad_op, dy):
grad = (
TensorWrapper(0, dtype=dy.dtype, device=dy.device)
.broadcast(TensorWrapper(input_shape))
._broadcast(TensorWrapper(input_shape))
.__wrapped__
)
(dx,) = apply(grad_op, grad, dy, *params)
......
......@@ -50,8 +50,8 @@ class Function:
"""
Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses.
:param input: Input tensors.
:return: A tuple of Tensor or a single Tensor.
:param input: input tensors.
:return: a tuple of Tensor or a single Tensor.
.. note::
......@@ -64,12 +64,12 @@ class Function:
"""
Compute the gradient of the forward function. It must be overriden by all subclasses.
:param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward`
:param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward`.
.. note::
.. note::
In case when some tensors of outputs are not related to loss function, the corresponding
values in ``output_grads`` would be ``None``.
In case when some tensors of outputs are not related to loss function, the corresponding
values in ``output_grads`` would be ``None``.
.. note::
......
......@@ -173,7 +173,7 @@ def unpack_getitem(inp, tuple_val, *, allow_newaxis=True):
item.append(True)
v = get_index(v)
assert np.issubdtype(v.dtype, np.integer) or np.issubdtype(
v.dtype, np.bool
v.dtype, np.bool_
), "var type in the subscript must be int or bool"
tensors.append(v)
......@@ -267,7 +267,7 @@ def setitem(tensor, index, value):
value.shape, tmp_result.shape
)
)
value = value.broadcast(tmp_result.shape)
value = value._broadcast(tmp_result.shape)
if use_subtensor:
op = builtin.SetSubtensor(items=items)
else:
......
......@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import collections
import json
import os
import threading
import weakref
from concurrent.futures import Future, ThreadPoolExecutor
......@@ -49,7 +50,16 @@ class Graph(_imperative_rt.ComputingGraph):
def execute(self, *args):
assert self._future is None
self._future = self._executor.submit(self._function.execute, *args)
def wrapped(*args):
try:
self._function.execute(*args)
except Exception as exc:
for i in self._function._all_rendezvous:
i.set_exception(str(exc))
raise exc
self._future = self._executor.submit(wrapped, *args)
def wait(self):
assert self._future is not None
......@@ -275,6 +285,7 @@ def dump_graph(
keep_param_name: bool = False,
keep_opr_priority: bool = False,
strip_info_file=None,
append_json=False
):
"""serialize the computing graph of `output_vars` and get byte result.
......@@ -295,6 +306,9 @@ def dump_graph(
:param keep_opr_priority: whether to keep priority setting for operators
:param strip_info_file: a string for path or a file handler. if is not None,
then the dump information for code strip would be written to ``strip_info_file``
:param append_json: will be check when `strip_info_file` is not None. if set
true, the information for code strip will be append to strip_info_file.
if set false, will rewrite strip_info_file
:return: dump result as byte string, and an instance of namedtuple
:class:`CompGraphDumpResult`, whose fields are:
......@@ -342,10 +356,25 @@ def dump_graph(
if strip_info_file is not None:
if isinstance(strip_info_file, str):
strip_info_file = open(strip_info_file, "w")
strip_info = json.loads(_imperative_rt.get_info_for_strip(ov))
strip_info["hash"] = dump_info.content_hash
json.dump(strip_info, strip_info_file)
if not os.path.exists(strip_info_file):
os.mknod(strip_info_file)
strip_info_file = open(strip_info_file, "r+")
new_strip_dict = json.loads(_imperative_rt.get_info_for_strip(ov))
ori_strip_dict = new_strip_dict
json_content = strip_info_file.read()
if append_json and len(json_content) != 0:
# if there are contents in json file. Read them first and then append new information
ori_strip_dict = json.loads(json_content)
for k in ori_strip_dict:
new_strip_dict_v = new_strip_dict.get(k)
if new_strip_dict_v is not None:
for value in new_strip_dict_v:
if not value in ori_strip_dict[k]:
ori_strip_dict[k].append(value)
ori_strip_dict["hash"] = dump_info.content_hash
strip_info_file.seek(0)
strip_info_file.truncate()
json.dump(ori_strip_dict, strip_info_file)
return dump_content, dump_info
......@@ -358,7 +387,7 @@ CompGraphLoadResult = collections.namedtuple(
def load_graph(fpath):
"""Load a serialized computing graph from file.
:parma fpath: Path or Handle for the output file
:param fpath: Path or Handle of the input file
:return: An instance of namedtuple :class:`CompGraphLoadResult`,
whose fields are:
......
......@@ -40,6 +40,8 @@
# All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved.
# --------------------------------------------------------------------------------------
from collections import OrderedDict
from .utils import _toposort, groupby
from .variadic import isvariadic
......@@ -159,5 +161,5 @@ def ordering(signatures):
for s in signatures:
if s not in edges:
edges[s] = []
edges = dict((k, [b for a, b in v]) for k, v in edges.items())
edges = OrderedDict((k, [b for a, b in v]) for k, v in edges.items())
return _toposort(edges)
......@@ -100,6 +100,8 @@ def _(data: DeviceTensorND):
@as_raw_tensor.register(np.ndarray)
def _(array: np.ndarray, dtype=None, device=None):
device = None if device is None else as_device(device).to_c()
if 0 in array.strides:
array = array.squeeze().reshape(array.shape)
return RawTensor(put(array, dtype=dtype, device=device))
......
......@@ -57,7 +57,29 @@ def _transpose(data, axes):
def _broadcast(inp, shape):
def valid_broadcast(src, tar):
def failed():
raise ValueError(
"the input shape {} can not be broadcasted to target shape {}".format(
src, tar
)
)
if isinstance(src, (TensorBase, TensorWrapperBase)):
src = src.numpy()
if isinstance(tar, (TensorBase, TensorWrapperBase)):
tar = tar.numpy()
if len(src) > len(tar):
failed()
for i in range(min(len(src), len(tar))):
if src[-i - 1] != 1 and src[-i - 1] != tar[-i - 1]:
failed()
shape = utils.astensor1d(shape, inp, dtype="int32", device=inp.device)
valid_broadcast(inp.shape, shape)
(result,) = apply(builtin.Broadcast(), inp, shape)
return result
......@@ -158,6 +180,10 @@ def _reduce(mode):
def f(self, axis=None, keepdims: bool = False):
data = self
(data,) = utils.convert_inputs(data)
if mode == "MEAN":
data = data.astype("float32")
elif self.dtype == np.bool_:
data = data.astype("int32")
if axis is None:
data = data.reshape(-1)
assert not keepdims, "can not set axis=None and keepdims=True"
......@@ -180,6 +206,9 @@ def _reduce(mode):
if not keepdims:
result = _remove_axis(result, axis)
if self.dtype == np.bool_:
if mode in ["MIN", "MAX"]:
result = result.astype("bool")
return result
return f
......@@ -203,7 +232,8 @@ def _todo(*_):
def _expand_args(args):
if len(args) == 1:
if isinstance(
args[0], (collections.abc.Sequence, TensorBase, TensorWrapperBase)
args[0],
(collections.abc.Sequence, TensorBase, TensorWrapperBase, np.ndarray),
):
args = args[0]
return args
......@@ -366,7 +396,8 @@ class ArrayMethodMixin(abc.ABC):
def reshape(self, *args):
return _reshape(self, _expand_args(args))
def broadcast(self, *args):
# FIXME: remove this method
def _broadcast(self, *args):
return _broadcast(self, _expand_args(args))
def transpose(self, *args):
......@@ -377,7 +408,38 @@ class ArrayMethodMixin(abc.ABC):
def flatten(self):
return self.reshape(-1)
sum = _reduce("SUM")
def sum(self, axis=None, keepdims: bool = False):
r"""Returns the sum of each row of the input tensor in the given dimension ``axis``.
If ``axis`` is a list of axises, reduce over all of them.
If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`).
Same for prod/mean/max/min.
:param axis: the dimension or dimensions to reduce.
:param keepdim: whether the output tensor has ndim retained or not.
:return: output tensor.
Examples:
.. testcode::
from megengine import tensor
a = tensor([False, True, True, False])
b = tensor([1.0, 2.0, 3.0, 4.0])
print(a.sum().numpy())
print(b.sum().numpy())
Outputs:
.. testoutput::
[2]
[10.]
"""
return _reduce("SUM")(self, axis, keepdims)
prod = _reduce("PRODUCT")
min = _reduce("MIN")
max = _reduce("MAX")
......
......@@ -16,39 +16,74 @@ from ..ops.special import Const
from ..tensor.core import OpBase, TensorBase, TensorWrapperBase, apply
def dtype_promotion(raw_inputs):
def add_dtype(i):
if type(i) == int:
return np.array(i, dtype=np.int32)
if type(i) == float:
return np.array(i, dtype=np.float32)
if type(i) == bool:
return np.array(i, dtype=np.bool_)
return None
scalar_inputs = [
add_dtype(i) for i in raw_inputs if not hasattr(i, "dtype") and add_dtype(i)
]
inputs = [i for i in raw_inputs if hasattr(i, "dtype")]
assert len(scalar_inputs + inputs) > 0
dtype = None
if len(inputs) > 0:
dtype = np.result_type(*inputs)
dtype_all = np.result_type(*(inputs + scalar_inputs))
assert (
dtype != np.float64 and dtype != np.int64
), "unsupport dtype {} by dtype_promotion, please use explict type convert".format(
dtype
)
if dtype_all == np.bool_:
for i in raw_inputs:
if not hasattr(i, "dtype") or i.dtype != np.bool_:
raise TypeError(
"bool dtype can not be operated with an element without bool dtype"
)
if dtype_all == np.float64:
dtype_all = np.float32
return dtype_all
def dtype_promotion(inputs):
"""
Returns the dtype that would result from performing an arithmetic
operation on the provided input tensors and scalars.
"""
# map numpy.dtype.kind to priority
category_priority = {
"f": 3, # floating-point
"i": 2, # signed integer
"u": 2, # unsigned integer
"b": 1, # boolean
}
def scalar2dtype(x):
"""
For scalar `x`, returns its corresponding type. A floating point scalar
has dtype 'float32'. An integral non-boolean scalar has dtype 'int32'.
A boolean scalar has dtype 'bool'.
"""
if isinstance(x, bool):
return np.bool_
if isinstance(x, int):
return np.int32
if isinstance(x, float):
return np.float32
def promote_types(types, cat):
"""
Returns the data type with sufficient size to hold all types of
category `cat` in the list `types`.
"""
used_types = [
i for i in types if category_priority.get(np.dtype(i).kind, 0) == cat
]
assert len(used_types) > 0
res = used_types[0]
for i in used_types:
res = np.promote_types(res, i)
return res
def max_priority(types):
"""
Returns the maximum value of the priority of each type in the list
`types`.
"""
if not types:
return 0
else:
return max([category_priority.get(np.dtype(i).kind, 0) for i in types])
scalars = []
tensors = []
for data in inputs:
if hasattr(data, "dtype"):
tensors.append(data.dtype)
elif isinstance(data, (float, int, bool)):
scalars.append(scalar2dtype(data))
max_pri_scalars = max_priority(scalars)
max_pri_tensors = max_priority(tensors)
assert max_pri_scalars > 0 or max_pri_tensors > 0
if max_pri_scalars > max_pri_tensors:
return promote_types(scalars, max_pri_scalars)
else:
return promote_types(tensors, max_pri_tensors)
def get_device(inputs):
......
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from .comp_graph_tools import *
......@@ -26,7 +26,7 @@ def _clear_plasma_store():
# `_PlasmaStoreManager.__del__` will not be called automaticly in subprocess,
# so this function should be called explicitly
global MGE_PLASMA_STORE_MANAGER
if MGE_PLASMA_STORE_MANAGER is not None:
if MGE_PLASMA_STORE_MANAGER is not None and MGE_PLASMA_STORE_MANAGER.refcount == 0:
del MGE_PLASMA_STORE_MANAGER
MGE_PLASMA_STORE_MANAGER = None
......@@ -50,6 +50,7 @@ class _PlasmaStoreManager:
stderr=None if debug_flag else subprocess.DEVNULL,
)
self.__initialized = True
self.refcount = 1
def __del__(self):
if self.__initialized and self.plasma_store.returncode is None:
......@@ -83,6 +84,8 @@ class PlasmaShmQueue:
"Exception happened in starting plasma_store: {}\n"
"Tips: {}".format(str(e), err_info)
)
else:
MGE_PLASMA_STORE_MANAGER.refcount += 1
self.socket_name = MGE_PLASMA_STORE_MANAGER.socket_name
......@@ -133,6 +136,8 @@ class PlasmaShmQueue:
def close(self):
self.queue.close()
self.disconnect_client()
global MGE_PLASMA_STORE_MANAGER
MGE_PLASMA_STORE_MANAGER.refcount -= 1
_clear_plasma_store()
def cancel_join_thread(self):
......
......@@ -34,14 +34,14 @@ default_collate_err_msg_format = (
class Collator:
r"""
Used for merge a list of samples to form a mini-batch of Tenor(s). Used when using batched loading from a dataset.
modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py
Used for merging a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a dataset.
Modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py
"""
def apply(self, inputs):
"""
input : sequence_N(tuple(CHW, C, CK))
output : tuple(NCHW, NC, NCK)
:param input: sequence_N(tuple(CHW, C, CK)).
:return: tuple(NCHW, NC, NCK).
"""
elem = inputs[0]
elem_type = type(elem)
......
......@@ -43,7 +43,7 @@ class DataLoader:
):
r"""Provides a convenient way to iterate on a given dataset.
`DataLoader` combines a dataset with sampler, transform and collator,
`DataLoader` combines a dataset with `sampler`, `transform` and `collator`,
make it flexible to get minibatch continually from a dataset.
:type dataset: Dataset
......@@ -53,21 +53,21 @@ class DataLoader:
If specified, :attr:`shuffle` must be ``False``.
:type transform: Transform
:param transform: defined the transforming strategy for a sampled batch.
(default: ``None``)
Default: None
:type collator: Collator
:param collator: defined the merging strategy for a transformed batch.
(default: ``None``)
Default: None
:type num_workers: int
:param num_workers: the number of sub-process to load, transform and collate
the batch. ``0`` means using single-process. (default: ``0``)
the batch. ``0`` means using single-process. Default: 0
:type timeout: int
:param timeout: if positive, means the timeout value(second) for collecting a
batch from workers. (default: 0)
batch from workers. Default: 0
:type divide: bool
:param divide: define the paralleling strategy in multi-processing mode.
``True`` means one batch is divided into :attr:`num_workers` pieces, and
the workers will process these pieces parallelly. ``False`` means
different sub-process will process different batch. (default: ``False``)
different sub-process will process different batch. Default: False
"""
......
......@@ -12,7 +12,7 @@ from typing import Tuple
class Dataset(ABC):
r"""
An abstract class for all Datasets
An abstract class for all Datasets.
"""
@abstractmethod
......@@ -22,8 +22,8 @@ class Dataset(ABC):
class MapDataset(Dataset):
r"""
An abstract class for map data
__getitem__ and __len__ method are aditionally needed
An abstract class for map data.
__getitem__ and __len__ method are aditionally needed.
"""
@abstractmethod
......@@ -41,8 +41,8 @@ class MapDataset(Dataset):
class StreamDataset(Dataset):
r"""
An abstract class for stream data
__iter__ method is aditionally needed
An abstract class for stream data.
__iter__ method is aditionally needed.
"""
@abstractmethod
......
......@@ -21,7 +21,7 @@ logger = get_logger(__name__)
class CIFAR10(VisionDataset):
r""" ``Dataset`` for CIFAR10 meta data
r""" ``Dataset`` for CIFAR10 meta data.
"""
url_path = "http://www.cs.utoronto.ca/~kriz/"
......
......@@ -118,7 +118,7 @@ class COCO(VisionDataset):
self.ids = ids
self.json_category_id_to_contiguous_id = {
v: i + 1 for i, v in enumerate(self.cats.keys())
v: i + 1 for i, v in enumerate(sorted(self.cats.keys()))
}
self.contiguous_category_id_to_json_id = {
......
......@@ -30,19 +30,18 @@ class ImageFolder(VisionDataset):
r"""
ImageFolder is a class for loading image data and labels from a organized folder.
the folder is expected to be organized as followed
root/cls/xxx.img_ext
The folder is expected to be organized as followed: root/cls/xxx.img_ext
labels are indices of sorted classes in the root directory
Labels are indices of sorted classes in the root directory.
:param root: root directory of an image folder
:param root: root directory of an image folder.
:param loader: a function used to load image from path,
if ``None``, default function that loads
images with PILwill be called
images with PIL will be called.
:param check_valid_func: a function used to check if files in folder are
expected image files, if ``None``, default function
that checks file extensions will be called
:param class_name: if ``True``, return class name instead of class index
that checks file extensions will be called.
:param class_name: if ``True``, return class name instead of class index.
"""
super().__init__(root, order=("image", "image_category"))
......
......@@ -31,7 +31,7 @@ logger = get_logger(__name__)
class ImageNet(ImageFolder):
r"""
Load ImageNet from raw files or folder, expected folder looks like
Load ImageNet from raw files or folder. Expected folder looks like:
.. code-block:: bash
......@@ -60,25 +60,25 @@ class ImageNet(ImageFolder):
def __init__(self, root: str = None, train: bool = True, **kwargs):
r"""
initialization:
Initialization:
* if ``root`` contains ``self.target_folder`` depent on ``train``:
* if ``root`` contains ``self.target_folder`` depending on ``train``:
* initialize ImageFolder with target_folder
* initialize ImageFolder with target_folder.
* else:
* if all raw files are in ``root``:
* parse ``self.target_folder`` from raw files
* initialize ImageFolder with ``self.target_folder``
* parse ``self.target_folder`` from raw files.
* initialize ImageFolder with ``self.target_folder``.
* else:
* raise error
* raise error.
:param root: root directory of imagenet data, if root is ``None``, used default_dataset_root
:param train: if ``True``, load the train split, otherwise load the validation split
:param root: root directory of imagenet data, if root is ``None``, use default_dataset_root.
:param train: if ``True``, load the train split, otherwise load the validation split.
"""
# process the root path
......
......@@ -22,12 +22,12 @@ logger = get_logger(__name__)
class MNIST(VisionDataset):
r""" ``Dataset`` for MNIST meta data
r""" ``Dataset`` for MNIST meta data.
"""
url_path = "http://yann.lecun.com/exdb/mnist/"
"""
url prefix for downloading raw file
Url prefix for downloading raw file.
"""
raw_file_name = [
"train-images-idx3-ubyte.gz",
......@@ -36,7 +36,7 @@ class MNIST(VisionDataset):
"t10k-labels-idx1-ubyte.gz",
]
"""
raw file names of both training set and test set (10k)
Raw file names of both training set and test set (10k).
"""
raw_file_md5 = [
"f68b3c2dcbeaaa9fbdd348bbdeb94873",
......@@ -45,7 +45,7 @@ class MNIST(VisionDataset):
"ec29112dd5afa0611ce80d1b7f02629c",
]
"""
md5 for checking raw files
Md5 for checking raw files.
"""
def __init__(
......@@ -57,10 +57,10 @@ class MNIST(VisionDataset):
):
r"""
:param root: path for mnist dataset downloading or loading, if ``None``,
set ``root`` to the ``_default_root``
:param train: if ``True``, loading trainingset, else loading test set
set ``root`` to the ``_default_root``.
:param train: if ``True``, loading trainingset, else loading test set.
:param download: if raw files do not exists and download sets to ``True``,
download raw files and process, otherwise raise ValueError, default is True
download raw files and process, otherwise raise ValueError, default is True.
"""
super().__init__(root, order=("image", "image_category"))
......
......@@ -81,7 +81,7 @@ class Objects365(VisionDataset):
self.ids = ids
self.json_category_id_to_contiguous_id = {
v: i + 1 for i, v in enumerate(self.cats.keys())
v: i + 1 for i, v in enumerate(sorted(self.cats.keys()))
}
self.contiguous_category_id_to_json_id = {
......
......@@ -75,6 +75,8 @@ class PascalVOC(VisionDataset):
else:
raise NotImplementedError
self.img_infos = dict()
def __getitem__(self, index):
target = []
for k in self.order:
......@@ -107,9 +109,8 @@ class PascalVOC(VisionDataset):
mask = mask[:, :, np.newaxis]
target.append(mask)
elif k == "info":
if image is None:
image = cv2.imread(self.images[index], cv2.IMREAD_COLOR)
info = [image.shape[0], image.shape[1], self.file_names[index]]
info = self.get_img_info(index, image)
info = [info["height"], info["width"], info["file_name"]]
target.append(info)
else:
raise NotImplementedError
......@@ -119,6 +120,17 @@ class PascalVOC(VisionDataset):
def __len__(self):
return len(self.images)
def get_img_info(self, index, image=None):
if index not in self.img_infos:
if image is None:
image = cv2.imread(self.images[index], cv2.IMREAD_COLOR)
self.img_infos[index] = dict(
height=image.shape[0],
width=image.shape[1],
file_name=self.file_names[index],
)
return self.img_infos[index]
def _trans_mask(self, mask):
label = np.ones(mask.shape[:2]) * 255
for i in range(len(self.class_colors)):
......@@ -171,25 +183,3 @@ class PascalVOC(VisionDataset):
"train",
"tvmonitor",
)
class_colors = [
[0, 0, 128],
[0, 128, 0],
[0, 128, 128],
[128, 0, 0],
[128, 0, 128],
[128, 128, 0],
[128, 128, 128],
[0, 0, 64],
[0, 0, 192],
[0, 128, 64],
[0, 128, 192],
[128, 0, 64],
[128, 0, 192],
[128, 128, 64],
[128, 128, 192],
[0, 64, 0],
[0, 64, 128],
[0, 192, 0],
[0, 192, 128],
[128, 64, 0],
]
......@@ -28,25 +28,25 @@ class Sampler(ABC):
seed=None,
):
r"""
An abstract class for all sampler
An abstract class for all sampler.
:type dataset: `dataset`
:param dataset: dataset to sample from
:param dataset: dataset to sample from.
:type batch_size: positive integer
:param batch_size: batch size for batch method
:param batch_size: batch size for batch method.
:type drop_last: bool
:param drop_last: set ``True`` to drop the last incomplete batch,
if the dataset size is not divisible by the batch size. If ``False`` and
the size of dataset is not divisible by the batch_size, then the last batch will
be smaller. (default: ``False``)
be smaller. Default: False
:type num_samples: positive integer
:param num_samples: number of samples assigned to one rank
:param num_samples: number of samples assigned to one rank.
:type world_size: positive integer
:param world_size: number of ranks
:param world_size: number of ranks.
:type rank: non-negative integer within 0 and world_size
:param rank: rank id, non-negative interger within 0 and ``world_size``
:param rank: rank id, non-negative interger within 0 and ``world_size``.
:type seed: non-negative integer
:param seed: seed for random operators
:param seed: seed for random operators.
"""
if (
not isinstance(batch_size, int)
......@@ -103,15 +103,15 @@ class Sampler(ABC):
def sample(self):
"""
return a list contains all sample indices
Return a list contains all sample indices.
"""
raise NotImplementedError
def scatter(self, indices) -> List:
r"""
scatter method is used for splitting indices into subset, each subset
Scatter method is used for splitting indices into subset, each subset
will be assigned to a rank. Indices are evenly splitted by default.
If customized indices assignment method is needed, please rewrite this method
If customized indices assignment method is needed, please rewrite this method.
"""
total_size = self.num_samples * self.world_size
......@@ -127,7 +127,7 @@ class Sampler(ABC):
def batch(self) -> Iterator[List[Any]]:
r"""
batch method provides a batch indices generator
Batch method provides a batch indices generator.
"""
indices = list(self.sample())
......@@ -156,7 +156,7 @@ class SequentialSampler(Sampler):
rank=None,
):
r"""
Sample elements sequentially
Sample elements sequentially.
"""
super().__init__(dataset, batch_size, drop_last, None, world_size, rank)
if indices is not None and not isinstance(indices, collections.abc.Sequence):
......@@ -168,7 +168,7 @@ class SequentialSampler(Sampler):
def sample(self) -> Iterator[Any]:
r"""
return a generator
Return a generator.
"""
if self.indices is None:
return iter(range(len(self.dataset)))
......@@ -188,7 +188,7 @@ class RandomSampler(Sampler):
seed=None,
):
r"""
Sample elements randomly without replacement
Sample elements randomly without replacement.
"""
super().__init__(dataset, batch_size, drop_last, None, world_size, rank, seed)
if indices is not None and not isinstance(indices, collections.abc.Sequence):
......@@ -218,10 +218,10 @@ class ReplacementSampler(Sampler):
seed=None,
):
r"""
Sample elements randomly with replacement
Sample elements randomly with replacement.
:type weights: List
:param weights: weights for sampling indices, it could be unnormalized weights
:param weights: weights for sampling indices, it could be unnormalized weights.
"""
super().__init__(
dataset, batch_size, drop_last, num_samples, world_size, rank, seed
......@@ -250,7 +250,7 @@ class ReplacementSampler(Sampler):
class Infinite(Sampler):
r"""Infinite Sampler warper for basic sampler"""
r"""Infinite Sampler warper for basic sampler."""
def sample(self):
raise NotImplementedError("sample method not supported in Infinite")
......
......@@ -12,7 +12,7 @@ from typing import Sequence, Tuple
class Transform(ABC):
"""
rewrite apply method in subclass
Rewrite apply method in subclass.
"""
def apply_batch(self, inputs: Sequence[Tuple]):
......
......@@ -15,7 +15,7 @@ import numpy as np
def wrap_keepdims(func):
"""Wraper to keep the dimension of input images unchanged"""
"""Wraper to keep the dimension of input images unchanged."""
@functools.wraps(func)
def wrapper(image, *args, **kwargs):
......@@ -34,10 +34,10 @@ def wrap_keepdims(func):
@wrap_keepdims
def to_gray(image):
r"""
Change BGR format image's color space to gray
Change BGR format image's color space to gray.
:param image: Input BGR format image, with (H, W, C) shape
:return: Gray format image, with (H, W, C) shape
:param image: input BGR format image, with `(H, W, C)` shape.
:return: gray format image, with `(H, W, C)` shape.
"""
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
......@@ -45,10 +45,10 @@ def to_gray(image):
@wrap_keepdims
def to_bgr(image):
r"""
Change gray format image's color space to BGR
Change gray format image's color space to BGR.
:param image: input Gray format image, with (H, W, C) shape
:return: BGR format image, with (H, W, C) shape
:param image: input Gray format image, with `(H, W, C)` shape.
:return: BGR format image, with `(H, W, C)` shape.
"""
return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
......@@ -56,18 +56,18 @@ def to_bgr(image):
@wrap_keepdims
def pad(input, size, value):
r"""
Pad input data with *value* and given *size*
Pad input data with *value* and given *size*.
:param input: Input data, with (H, W, C) shape
:param size: Padding size of input data, it could be integer or sequence.
If it's an integer, the input data will be padded in four directions.
If it's a sequence contains two integer, the bottom and right side
:param input: input data, with `(H, W, C)` shape.
:param size: padding size of input data, it could be integer or sequence.
If it is an integer, the input data will be padded in four directions.
If it is a sequence contains two integer, the bottom and right side
of input data will be padded.
If it's a sequence contains four integer, the top, bottom, left, right
If it is a sequence contains four integer, the top, bottom, left, right
side of input data will be padded with given size.
:param value: Padding value of data, could be a sequence of int or float.
if it's float value, the dtype of image will be casted to float32 also.
:return: Padded image
:param value: padding value of data, could be a sequence of int or float.
If it is float value, the dtype of image will be casted to float32 also.
:return: padded image.
"""
if isinstance(size, int):
size = (size, size, size, size)
......@@ -81,14 +81,18 @@ def pad(input, size, value):
@wrap_keepdims
def flip(image, flipCode):
r"""
Accordding to the flipCode (the type of flip), flip the input image
Accordding to the flipCode (the type of flip), flip the input image.
:param image: Input image, with (H, W, C) shape
:param image: input image, with `(H, W, C)` shape.
:param flipCode: code that indicates the type of flip.
1 : Flip horizontally
0 : Flip vertically
-1 : Flip horizontally and vertically
:return: BGR format image, with (H, W, C) shape
* 1 : Flip horizontally
* 0 : Flip vertically
* -1: Flip horizontally and vertically
:return: BGR format image, with `(H, W, C)` shape.
"""
return cv2.flip(image, flipCode=flipCode)
......@@ -96,12 +100,12 @@ def flip(image, flipCode):
@wrap_keepdims
def resize(input, size, interpolation=cv2.INTER_LINEAR):
r"""
resize the input data to given size
Resize the input data to given size.
:param input: Input data, could be image or masks, with (H, W, C) shape
:param size: Target size of input data, with (height, width) shape.
:param interpolation: Interpolation method.
:return: Resized data, with (H, W, C) shape
:param input: input data, could be image or masks, with `(H, W, C)` shape.
:param size: target size of input data, with (height, width) shape.
:param interpolation: interpolation method.
:return: resized data, with `(H, W, C)` shape.
"""
if len(size) != 2:
raise ValueError("resize needs (h, w), but got {}".format(size))
......
......@@ -44,26 +44,26 @@ __all__ = [
class VisionTransform(Transform):
r"""
Base class of all transforms used in computer vision.
calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*()
Calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*()
method. If you want to implement a self-defined transform method for image,
rewrite _apply_image method in subclass.
:param order: Input type order. Input is a tuple contains different structures,
:param order: input type order. Input is a tuple containing different structures,
order is used to specify the order of structures. For example, if your input
is (image, boxes) type, then the order should be ("image", "boxes").
Current available strings & data type are describe below:
is (image, boxes) type, then the ``order`` should be ("image", "boxes").
Current available strings and data type are describe below:
* "image": input image, with shape of (H, W, C)
* "coords": coordinates, with shape of (N, 2)
* "boxes": bounding boxes, with shape of (N, 4), "xyxy" format,
* "image": input image, with shape of `(H, W, C)`.
* "coords": coordinates, with shape of `(N, 2)`.
* "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format,
the 1st "xy" represents top left point of a box,
the 2nd "xy" represents right bottom point.
* "mask": map used for segmentation, with shape of (H, W, 1)
* "keypoints": keypoints with shape of (N, K, 3), N for number of instances,
* "mask": map used for segmentation, with shape of `(H, W, 1)`.
* "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances,
and K for number of keypoints in one instance. The first two dimensions
of last axis is coordinate of keypoints and the the 3rd dimension is
the label of keypoints.
* "polygons": A sequence contains numpy array, its length is number of instances.
* "polygons": a sequence containing numpy arrays, its length is the number of instances.
Each numpy array represents polygon coordinate of one instance.
* "category": categories for some data type. For example, "image_category"
means category of the input image and "boxes_category" means categories of
......@@ -94,11 +94,11 @@ class VisionTransform(Transform):
self.order = order
def apply_batch(self, inputs: Sequence[Tuple]):
r"""Apply transform on batch input data"""
r"""Apply transform on batch input data."""
return tuple(self.apply(input) for input in inputs)
def apply(self, input: Tuple):
r"""Apply transform on single input data"""
r"""Apply transform on single input data."""
if not isinstance(input, tuple):
input = (input,)
......@@ -156,10 +156,10 @@ class VisionTransform(Transform):
class ToMode(VisionTransform):
r"""Change input data to a target mode.
For example, most transforms use HWC mode image,
while the Neural Network might use CHW mode input tensor
while the neural network might use CHW mode input tensor.
:param mode: Output mode of input. Use "CHW" mode by default.
:param order: The same with :class:`VisionTransform`
:param mode: output mode of input. Default: "CHW"
:param order: the same with :class:`VisionTransform`
"""
def __init__(self, mode="CHW", *, order=None):
......@@ -185,14 +185,14 @@ class Compose(VisionTransform):
r"""
Composes several transforms together.
:param transforms: List of :class:`VisionTransform` to compose.
:param batch_compose: Whether use shuffle_indices for batch data or not.
:param transforms: list of :class:`VisionTransform` to compose.
:param batch_compose: whether use shuffle_indices for batch data or not.
If True, use original input sequence.
Otherwise, the shuffle_indices will be used for transforms.
:param shuffle_indices: Indices used for random shuffle, start at 1.
:param shuffle_indices: indices used for random shuffle, start at 1.
For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform
will be random shuffled, the 2nd and 4th transform will also be shuffled.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`
Examples:
......@@ -264,8 +264,8 @@ class TorchTransformCompose(VisionTransform):
some transforms with tensor in torchvision are not supported,
such as Normalize and ToTensor in torchvision.
:param transforms: The same with ``Compose``
:param order: The same with :class:`VisionTransform`
:param transforms: the same with ``Compose``.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, transforms, *, order=None):
......@@ -303,16 +303,16 @@ class TorchTransformCompose(VisionTransform):
class Pad(VisionTransform):
r"""Pad the input data.
:param size: Padding size of input image, it could be integer or sequence.
If it's an integer, the input image will be padded in four directions.
If it's a sequence contains two integer, the bottom and right side
:param size: padding size of input image, it could be integer or sequence.
If it is an integer, the input image will be padded in four directions.
If it is a sequence containing two integers, the bottom and right side
of image will be padded.
If it's a sequence contains four integer, the top, bottom, left, right
If it is a sequence containing four integers, the top, bottom, left, right
side of image will be padded with given size.
:param value: Padding value of image, could be a sequence of int or float.
if it's float value, the dtype of image will be casted to float32 also.
:param mask_value: Padding value of segmentation map.
:param order: The same with :class:`VisionTransform`
:param value: padding value of image, could be a sequence of int or float.
if it is float value, the dtype of image will be casted to float32 also.
:param mask_value: padding value of segmentation map.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, size=0, value=0, mask_value=0, *, order=None):
......@@ -350,15 +350,15 @@ class Pad(VisionTransform):
class Resize(VisionTransform):
r"""Resize the input data.
:param output_size: Target size of image, with (height, width) shape.
:param interpolation: Interpolation method. All methods are listed below:
:param output_size: target size of image, with (height, width) shape.
:param interpolation: interpolation method. All methods are listed below:
* cv2.INTER_NEAREST – a nearest-neighbor interpolation.
* cv2.INTER_LINEAR – a bilinear interpolation (used by default).
* cv2.INTER_AREA – resampling using pixel area relation.
* cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood.
* cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None):
......@@ -476,8 +476,8 @@ class ShortestEdgeResize(VisionTransform):
class RandomResize(VisionTransform):
r"""Resize the input data randomly.
:param scale_range: .
:param order: The same with :class:`VisionTransform`
:param scale_range: range of scaling.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None):
......@@ -519,13 +519,13 @@ class RandomResize(VisionTransform):
class RandomCrop(VisionTransform):
r"""Crop the input data randomly. Before applying the crop transform,
pad the image first. And if target size is still bigger than the size of
pad the image first. If target size is still bigger than the size of
padded image, pad the image size to target size.
:param output_size: Target size of output image, with (height, width) shape.
:param padding_size: The same with `size` in ``Pad``
:param padding_value: The same with `value` in ``Pad``
:param order: The same with :class:`VisionTransform`
:param output_size: target size of output image, with (height, width) shape.
:param padding_size: the same with `size` in ``Pad``.
:param padding_value: the same with `value` in ``Pad``.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(
......@@ -580,10 +580,10 @@ class RandomResizedCrop(VisionTransform):
aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made.
After applying crop transfrom, the input data will be resized to given size.
:param output_size: Target size of output image, with (height, width) shape.
:param scale_range: Range of size of the origin size cropped. Default: (0.08, 1.0)
:param ratio_range: Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
:param order: The same with :class:`VisionTransform`
:param output_size: target size of output image, with (height, width) shape.
:param scale_range: range of size of the origin size cropped. Default: (0.08, 1.0)
:param ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
:param order: the same with :class:`VisionTransform`.
"""
def __init__(
......@@ -666,8 +666,8 @@ class RandomResizedCrop(VisionTransform):
class CenterCrop(VisionTransform):
r"""Crops the given the input data at the center.
:param output_size: Target size of output image, with (height, width) shape.
:param order: The same with :class:`VisionTransform`
:param output_size: target size of output image, with (height, width) shape.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, output_size, *, order=None):
......@@ -710,7 +710,7 @@ class RandomHorizontalFlip(VisionTransform):
r"""Horizontally flip the input data randomly with a given probability.
:param p: probability of the input data being flipped. Default: 0.5
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, prob: float = 0.5, *, order=None):
......@@ -742,7 +742,7 @@ class RandomVerticalFlip(VisionTransform):
r"""Vertically flip the input data randomly with a given probability.
:param p: probability of the input data being flipped. Default: 0.5
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, prob: float = 0.5, *, order=None):
......@@ -776,9 +776,9 @@ class Normalize(VisionTransform):
this transform will normalize each channel of the input data.
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
:param mean: Sequence of means for each channel.
:param std: Sequence of standard deviations for each channel.
:param order: The same with :class:`VisionTransform`
:param mean: sequence of means for each channel.
:param std: sequence of standard deviations for each channel.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, mean=0.0, std=1.0, *, order=None):
......@@ -802,7 +802,7 @@ class GaussianNoise(VisionTransform):
:param mean: Gaussian mean used to generate noise.
:param std: Gaussian standard deviation used to generate noise.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`
"""
def __init__(self, mean=0.0, std=1.0, *, order=None):
......@@ -826,9 +826,9 @@ class GaussianNoise(VisionTransform):
class BrightnessTransform(VisionTransform):
r"""Adjust brightness of the input data.
:param value: How much to adjust the brightness. Can be any
non negative number. 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the brightness. Can be any
non negative number. 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, value, *, order=None):
......@@ -857,9 +857,9 @@ class BrightnessTransform(VisionTransform):
class ContrastTransform(VisionTransform):
r"""Adjust contrast of the input data.
:param value: How much to adjust the contrast. Can be any
non negative number. 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the contrast. Can be any
non negative number. 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, value, *, order=None):
......@@ -888,9 +888,9 @@ class ContrastTransform(VisionTransform):
class SaturationTransform(VisionTransform):
r"""Adjust saturation of the input data.
:param value: How much to adjust the saturation. Can be any
non negative number. 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the saturation. Can be any
non negative number. 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, value, *, order=None):
......@@ -919,9 +919,9 @@ class SaturationTransform(VisionTransform):
class HueTransform(VisionTransform):
r"""Adjust hue of the input data.
:param value: How much to adjust the hue. Can be any number
between 0 and 0.5, 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the hue. Can be any number
between 0 and 0.5, 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, value, *, order=None):
......@@ -957,19 +957,19 @@ class HueTransform(VisionTransform):
class ColorJitter(VisionTransform):
r"""Randomly change the brightness, contrast, saturation and hue of an image.
:param brightness: How much to jitter brightness.
:param brightness: how much to jitter brightness.
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
or the given [min, max]. Should be non negative numbers.
:param contrast: How much to jitter contrast.
:param contrast: how much to jitter contrast.
Chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
or the given [min, max]. Should be non negative numbers.
:param saturation: How much to jitter saturation.
:param saturation: how much to jitter saturation.
Chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
or the given [min, max]. Should be non negative numbers.
:param hue: How much to jitter hue.
:param hue: how much to jitter hue.
Chosen uniformly from [-hue, hue] or the given [min, max].
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None):
......
......@@ -7,6 +7,7 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os
import re
from .core._imperative_rt.common import CompNode, DeviceType
from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config
......@@ -22,10 +23,8 @@ __all__ = [
def _valid_device(inp):
if isinstance(inp, str) and len(inp) == 4:
if inp[0] in {"x", "c", "g"} and inp[1:3] == "pu":
if inp[3] == "x" or inp[3].isdigit():
return True
if isinstance(inp, str) and re.match("^[cxg]pu(\d+|\d+:\d+|x)$", inp):
return True
return False
......@@ -71,11 +70,11 @@ def set_default_device(device: str = "xpux"):
'multithread' device type is avaliable when inference, which implements
multi-threading parallelism at the operator level. For example,
'multithread4' will compute with 4 threads. which implements
'multithread4' will compute with 4 threads.
The default value is 'xpux' to specify any device available. The priority of using gpu is higher when both gpu and cpu are available.
It can also be set by environmental variable `MGE_DEFAULT_DEVICE`.
It can also be set by environment variable `MGE_DEFAULT_DEVICE`.
"""
assert _valid_device(device), "Invalid device name {}".format(device)
CompNode._set_default_device(device)
......@@ -99,13 +98,13 @@ def set_prealloc_config(
growth_factor=2.0,
device_type=DeviceType.CUDA,
):
"""specifies how to pre-allocate from raw dev allocator
"""Specifies how to pre-allocate from raw device allocator.
:param alignment: specifies the alignment in bytes.
:param min_req: min request size in bytes.
:param max_overhead: max overhead above required size in bytes.
:growth_factor: request size / cur allocated
:device_type: the device type
:param growth_factor: `request size / cur allocated`
:param device_type: the device type
"""
assert alignment > 0
......
......@@ -102,7 +102,7 @@ def _(op: RemoteRecv):
def collective_comm(inp, mode, group, device):
"""Helper function for applying collective communication functions"""
"""Helper function for applying collective communication functions."""
assert isinstance(group, Group)
if group is None:
return inp
......@@ -123,11 +123,11 @@ def collective_comm(inp, mode, group, device):
def reduce_sum(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create reduce_sum operator for collective communication
"""Create reduce_sum operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.REDUCE_SUM
return collective_comm(inp, mode, group, device)
......@@ -136,11 +136,11 @@ def reduce_sum(
def broadcast(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create broadcast operator for collective communication
"""Create broadcast operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.BROADCAST
return collective_comm(inp, mode, group, device)
......@@ -149,11 +149,11 @@ def broadcast(
def all_gather(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create all_gather operator for collective communication
"""Create all_gather operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.ALL_GATHER
return collective_comm(inp, mode, group, device)
......@@ -162,11 +162,11 @@ def all_gather(
def reduce_scatter_sum(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create reduce_scatter_sum operator for collective communication
"""Create reduce_scatter_sum operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.REDUCE_SCATTER_SUM
return collective_comm(inp, mode, group, device)
......@@ -175,11 +175,11 @@ def reduce_scatter_sum(
def all_reduce_sum(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create all_reduce_sum operator for collective communication
"""Create all_reduce_sum operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.ALL_REDUCE_SUM
return collective_comm(inp, mode, group, device)
......@@ -188,11 +188,11 @@ def all_reduce_sum(
def all_reduce_max(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create all_reduce_max operator for collective communication
"""Create all_reduce_max operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.ALL_REDUCE_MAX
return collective_comm(inp, mode, group, device)
......@@ -201,11 +201,11 @@ def all_reduce_max(
def all_reduce_min(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create all_reduce_min operator for collective communication
"""Create all_reduce_min operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.ALL_REDUCE_MIN
return collective_comm(inp, mode, group, device)
......@@ -214,11 +214,11 @@ def all_reduce_min(
def gather(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create gather operator for collective communication
"""Create gather operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.GATHER
return collective_comm(inp, mode, group, device)
......@@ -227,11 +227,11 @@ def gather(
def scatter(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create scatter operator for collective communication
"""Create scatter operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.SCATTER
return collective_comm(inp, mode, group, device)
......@@ -240,21 +240,21 @@ def scatter(
def all_to_all(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor:
"""Create all_to_all operator for collective communication
"""Create all_to_all operator for collective communication.
:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
"""
mode = CollectiveCommMode.ALL_TO_ALL
return collective_comm(inp, mode, group, device)
def remote_send(inp: Tensor, dest_rank: int) -> Tensor:
"""Send a Tensor to a remote process
"""Send a Tensor to a remote process.
:param inp: tensor to send
:param dest_rank: destination process rank
:param inp: tensor to send.
:param dest_rank: destination process rank.
"""
op = RemoteSend()
op.key = "{}->{}".format(get_rank(), dest_rank)
......@@ -266,12 +266,12 @@ def remote_send(inp: Tensor, dest_rank: int) -> Tensor:
def remote_recv(
src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None
) -> Tensor:
"""Receive a Tensor from a remote process
"""Receive a Tensor from a remote process.
:param src_rank: source process rank
:param shape: the shape of the tensor to receive
:param dtype: the data type of the tensor to receive
:param device: the device to place the received tensor
:param src_rank: source process rank.
:param shape: the shape of the tensor to receive.
:param dtype: the data type of the tensor to receive.
:param device: the device to place the received tensor.
"""
key = "{}->{}".format(src_rank, get_rank())
......
......@@ -83,12 +83,12 @@ def init_process_group(
) -> None:
"""Initialize the distributed process group and specify the device used in the current process
:param master_ip: IP address of the master node
:param port: Port available for all processes to communicate
:param world_size: Total number of processes participating in the job
:param rank: Rank of the current process
:param device: The GPU device id to bind this process to
:param backend: Communicator backend, currently support 'nccl' and 'ucx'
:param master_ip: ip address of the master node.
:param port: port available for all processes to communicate.
:param world_size: total number of processes participating in the job.
:param rank: rank of the current process.
:param device: the GPU device id to bind this process to.
:param backend: communicator backend, currently support 'nccl' and 'ucx'.
"""
if not isinstance(master_ip, str):
raise TypeError("Expect type str but got {}".format(type(master_ip)))
......@@ -127,50 +127,50 @@ def init_process_group(
def is_distributed() -> bool:
"""Return True if the distributed process group has been initialized"""
"""Return True if the distributed process group has been initialized."""
return _sd is not None
def get_rank() -> int:
"""Get the rank of the current process"""
"""Get the rank of the current process."""
return _sd.proc_rank if _sd is not None else 0
def get_world_size() -> int:
"""Get the total number of processes participating in the job"""
"""Get the total number of processes participating in the job."""
return _sd.world_size if _sd is not None else 1
def get_backend() -> str:
"""Get the backend str"""
"""Get the backend str."""
assert _sd is not None, "please call init_process_group first"
return _sd.backend if _sd is not None else None
def get_py_server_addr() -> Tuple[str, int]:
"""Get master_ip and port of python XML RPC server"""
"""Get master_ip and port of python XML RPC server."""
assert _sd is not None, "please call init_process_group first"
return _sd.master_ip, _sd.py_server_port
def get_mm_server_addr() -> Tuple[str, int]:
"""Get master_ip and port of C++ mm_server"""
"""Get master_ip and port of C++ mm_server."""
assert _sd is not None, "please call init_process_group first"
return _sd.master_ip, _sd.mm_server_port
def get_client() -> Client:
"""Get client of python XML RPC server"""
"""Get client of python XML RPC server."""
assert _sd is not None, "please call init_process_group first"
return _sd.client
def new_group(proc_ranks: List[int]) -> Group:
"""Build a subgroup containing certain ranks"""
"""Build a subgroup containing certain ranks."""
return Group(proc_ranks)
def group_barrier(group: Optional[Group] = WORLD) -> None:
"""Block until all ranks in the group reach this barrier"""
"""Block until all ranks in the group reach this barrier."""
assert isinstance(group, Group)
_sd.client.group_barrier(group.key, group.size)
......@@ -17,11 +17,112 @@ import numpy as np
from megengine.autodiff.grad_manager import GradManager, get_backwarding_grad_manager
from megengine.device import get_default_device, get_device_count
from ..functional.param_pack import get_offsets, pack_allreduce_split
from ..core.ops.builtin import ParamPackConcat, ParamPackSplit
from ..core.tensor.core import apply
from ..functional.utils import copy
from ..tensor import Tensor
from ..utils.future import Future
from .functional import all_reduce_sum, broadcast
from .group import WORLD, group_barrier, is_distributed
from .group import WORLD, Group, group_barrier, is_distributed
def param_pack_split(inp: Tensor, offsets: list, shapes: list):
r"""
Returns split tensor to tensor list as offsets and shapes described,
only used for ``parampack``.
:param inp: input tensor.
:param offsets: offsets of outputs, length of `2 * n`,
while n is tensor nums you want to split,
format `[begin0, end0, begin1, end1]`.
:param shapes: tensor shapes of outputs.
:return: splitted tensors.
Examples:
.. testcode::
import numpy as np
from megengine import tensor
from megengine.distributed.helper import param_pack_split
a = tensor(np.ones((10,), np.int32))
b, c = param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)])
print(b.numpy())
print(c.numpy())
Outputs:
.. testoutput::
[1]
[[1 1 1]
[1 1 1]
[1 1 1]]
"""
op = ParamPackSplit()
op.offsets = offsets
op.shapes = shapes
return apply(op, inp)
def param_pack_concat(inps: list, offsets: Tensor, offsets_val: list):
r"""
Returns concated tensor, only used for ``parampack``.
:param inps: input tensors.
:param offsets: device value of offsets.
:param offsets_val: offsets of inputs, length of `2 * n`,
format `[begin0, end0, begin1, end1]`.
:return: concated tensor.
Examples:
.. testcode::
import numpy as np
from megengine import tensor
from megengine.distributed.helper import param_pack_concat
a = tensor(np.ones((1,), np.int32))
b = tensor(np.ones((3, 3), np.int32))
offsets_val = [0, 1, 1, 10]
offsets = tensor(offsets_val, np.int32)
c = param_pack_concat([a, b], offsets, offsets_val)
print(c.numpy())
Outputs:
.. testoutput::
[1 1 1 1 1 1 1 1 1 1]
"""
op = ParamPackConcat()
op.offsets = offsets_val
return apply(op, *inps, offsets)[0]
def get_offsets(shapes):
offsets = []
offset = 0
for shape in shapes:
offsets.append(offset)
offset += int(np.prod(shape))
offsets.append(offset)
return offsets
def pack_allreduce_split(pack_list, shapes, group, reduce_method):
offsets_val = get_offsets(shapes)
offsets = Tensor(offsets_val)
packed_grads = param_pack_concat(pack_list, offsets, offsets_val)
packed_grads = all_reduce_sum(packed_grads, group, group.comp_node)
if reduce_method == "mean":
packed_grads /= group.size
grads = param_pack_split(packed_grads, offsets_val, shapes)
return grads
class TensorFuture(Future):
......@@ -54,28 +155,43 @@ def synchronized(func: Callable):
return wrapper
def get_device_count_by_fork(device_type: str):
q = mp.Queue()
def _get_device_count_worker(queue, device_type):
num = get_device_count(device_type)
queue.put(num)
def worker(queue):
num = get_device_count(device_type)
queue.put(num)
p = mp.Process(target=worker, args=(q,))
def get_device_count_by_fork(device_type: str):
"""Get device count in fork thread.
See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork
for more information.
"""
q = mp.Queue()
p = mp.Process(target=_get_device_count_worker, args=(q, device_type))
p.start()
p.join()
return q.get()
def bcast_list_(params, group):
for p in params:
p._reset(broadcast(p, group))
def bcast_list_(inps: list, group: Group = WORLD):
"""Broadcast tensors between given group.
:param inps: input tensors.
:param group: communication group.
"""
for inp in inps:
inp._reset(broadcast(inp, group))
class AllreduceCallback:
def __init__(self, reduce_method, group=WORLD):
"""Allreduce Callback with tensor fusion optimization.
:param reduce_method: the method to reduce gradiants.
:param group: communication group.
"""
def __init__(self, reduce_method: str, group: Group = WORLD):
reduce_method = reduce_method.lower()
assert reduce_method in ["sum", "mean"]
assert reduce_method in ["sum", "mean"], "reduce_method should be sum or mean"
self._reduce_method = reduce_method
self._group = group
self._marked_gm = WeakSet()
......@@ -88,6 +204,7 @@ class AllreduceCallback:
self._futures_dict = dict()
self._packing_list = defaultdict(list)
self._packing_size = defaultdict(int)
self._grad_origin_device = dict()
def _pack(self, dtype):
grad_list = [self._gradients_dict[p] for p in self._packing_list[dtype]]
......@@ -109,6 +226,7 @@ class AllreduceCallback:
self._params.append(param)
self._futures_dict[param] = TensorFuture(ack=False)
self._gradients_dict[param] = grad
self._grad_origin_device[param] = str(grad.device)
dtype_str = str(np.dtype(param.dtype))
dtype_size = np.dtype(param.dtype).itemsize
......@@ -123,6 +241,7 @@ class AllreduceCallback:
self._pack(dtype)
for param in self._params:
grad = self._gradients_dict[param]
grad = copy(grad, self._grad_origin_device[param])
self._futures_dict[param].set(grad)
self._reset()
......
......@@ -15,7 +15,7 @@ from .util import get_free_ports
def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs):
"""init distributed process group and run wrapped function"""
"""Init distributed process group and run wrapped function."""
init_process_group(
master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=dev
)
......@@ -23,7 +23,7 @@ def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs):
def launcher(func):
"""decorator for launching multiple processes in single-machine multi-gpu training"""
"""Decorator for launching multiple processes in single-machine multi-gpu training."""
n_gpus = get_device_count_by_fork("gpu")
......
......@@ -21,6 +21,12 @@ from .util import get_free_ports
class Methods:
"""Distributed Server Method.
Used for exchange information between distributed nodes.
:param mm_server_port: multiple machine rpc server port.
"""
def __init__(self, mm_server_port):
self.lock = threading.Lock()
self.mm_server_port = mm_server_port
......@@ -31,51 +37,65 @@ class Methods:
self.dict_barrier_event = defaultdict(threading.Event)
def connect(self):
"""Method for checking connection success."""
return True
def get_mm_server_port(self):
"""Get multiple machine rpc server port."""
return self.mm_server_port
def set_is_grad(self, rank_peer, is_grad):
def set_is_grad(self, key, is_grad):
"""Mark send/recv need gradiants by key.
:param key: key to match send/recv op.
:param is_grad: whether this op need grad.
"""
with self.lock:
future = self.dict_is_grad[rank_peer]
future = self.dict_is_grad[key]
future.set(is_grad)
return True
def check_is_grad(self, rank_peer):
def check_is_grad(self, key):
"""Check whether send/recv need gradiants.
:param key: key to match send/recv op.
"""
with self.lock:
future = self.dict_is_grad[rank_peer]
future = self.dict_is_grad[key]
ret = future.get()
with self.lock:
del self.dict_is_grad[rank_peer]
del self.dict_is_grad[key]
return ret
def set_remote_tracer(self, rank_peer, tracer_set):
def set_remote_tracer(self, key, tracer_set):
"""Set tracer dict for tracing send/recv op.
:param key: key to match send/recv op.
:param tracer_set: valid tracer set.
"""
with self.lock:
future = self.dict_remote_tracer[rank_peer]
future = self.dict_remote_tracer[key]
future.set(tracer_set)
return True
def check_remote_tracer(self, rank_peer):
def check_remote_tracer(self, key):
"""Get tracer dict for send/recv op.
:param key: key to match send/recv op.
"""
with self.lock:
future = self.dict_remote_tracer[rank_peer]
future = self.dict_remote_tracer[key]
ret = future.get()
with self.lock:
del self.dict_remote_tracer[rank_peer]
del self.dict_remote_tracer[key]
return ret
def set_pack_list(self, key, pack_list):
with self.lock:
future = self.dict_pack_list[key]
future.set(pack_list)
return True
def get_pack_list(self, key):
with self.lock:
future = self.dict_pack_list[key]
return future.get()
def group_barrier(self, key, size):
"""A barrier wait for all group member.
:param key: group key to match each other.
:param size: group size.
"""
with self.lock:
self.dict_barrier_counter[key] += 1
counter = self.dict_barrier_counter[key]
......@@ -94,12 +114,23 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer):
def start_server(py_server_port, mm_server_port):
"""Start python distributed server and multiple machine server.
:param py_server_port: python server port.
:param mm_server_port: multiple machine server port.
"""
server = ThreadXMLRPCServer(("0.0.0.0", py_server_port), logRequests=False)
server.register_instance(Methods(mm_server_port))
server.serve_forever()
class Server:
"""Distributed Server for distributed training.
Should be running at master node.
:param port: python server port.
"""
def __init__(self, port):
self.py_server_port = get_free_ports(1)[0] if port == 0 else port
self.mm_server_port = create_mm_server("0.0.0.0", 0)
......@@ -112,12 +143,19 @@ class Server:
class Client:
"""Distributed Client for distributed training.
:param master_ip: ip address of master node.
:param port: port of server at master node.
"""
def __init__(self, master_ip, port):
self.master_ip = master_ip
self.port = port
self.connect()
def connect(self):
"""Check connection success."""
while True:
try:
self.proxy = ServerProxy(
......@@ -129,25 +167,43 @@ class Client:
time.sleep(1)
def get_mm_server_port(self):
"""Get multiple machine server port."""
return self.proxy.get_mm_server_port()
def set_is_grad(self, rank_peer, is_grad):
self.proxy.set_is_grad(rank_peer, is_grad)
def check_is_grad(self, rank_peer):
return self.proxy.check_is_grad(rank_peer)
def set_remote_tracer(self, rank_peer, tracer_set):
self.proxy.set_remote_tracer(rank_peer, tracer_set)
def check_remote_tracer(self, rank_peer):
return self.proxy.check_remote_tracer(rank_peer)
def set_pack_list(self, key, pack_list):
self.proxy.set_pack_list(key, pack_list)
def get_pack_list(self, key):
return self.proxy.get_pack_list(key)
def set_is_grad(self, key, is_grad):
"""Mark send/recv need gradiants by key.
:param key: key to match send/recv op.
:param is_grad: whether this op need grad.
"""
self.proxy.set_is_grad(key, is_grad)
def check_is_grad(self, key):
"""Check whether send/recv need gradiants.
:param key: key to match send/recv op.
"""
return self.proxy.check_is_grad(key)
def set_remote_tracer(self, key, tracer_set):
"""Set tracer dict for tracing send/recv op.
:param key: key to match send/recv op.
:param tracer_set: valid tracer set.
"""
self.proxy.set_remote_tracer(key, tracer_set)
def check_remote_tracer(self, key):
"""Get tracer dict for send/recv op.
:param key: key to match send/recv op.
"""
return self.proxy.check_remote_tracer(key)
def group_barrier(self, key, size):
"""A barrier wait for all group member.
:param key: group key to match each other.
:param size: group size.
"""
self.proxy.group_barrier(key, size)
......@@ -8,13 +8,10 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# pylint: disable=redefined-builtin
from .elemwise import *
from .graph import add_update
from .loss import *
from .math import *
from .nn import *
from .quantized import conv_bias_activation
from .tensor import *
from .utils import accuracy, copy
from .utils import *
from . import distributed # isort:skip
......
......@@ -26,14 +26,14 @@ def set_conv_execution_strategy(option: str):
Available values:
* 'HEURISTIC' uses heuristic to choose the fastest algorithm.
* 'PROFILE' runs possible algorithms on real device to find the best.
* 'PROFILE_HEURISTIC' uses profile result and heuristic to choose the fastest algorithm.
* 'PROFILE_REPRODUCIBLE' uses the fastest of profile result that is also reproducible.
* 'PROFILE' runs possible algorithms on real device to find the best one.
* 'PROFILE_HEURISTIC' uses profiling result and heuristic to choose the fastest algorithm.
* 'PROFILE_REPRODUCIBLE' uses the fastest of profiling result that is also reproducible.
* 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible.
The default strategy is 'HEURISTIC'.
It can also be set through the environmental variable 'MEGENGINE_CONV_EXECUTION_STRATEGY'.
It can also be set through the environment variable 'MEGENGINE_CONV_EXECUTION_STRATEGY'.
"""
valid_option = (
"HEURISTIC",
......
......@@ -26,23 +26,22 @@ __all__ = [
"acosh",
"atanh",
"ceil",
"clamp",
"clip",
"cos",
"cosh",
"div",
"eq",
"equal",
"exp",
"expm1",
"fast_tanh",
"floor",
"floor_div",
"gt",
"ge",
"greater",
"greater_equal",
"hswish",
"hsigmoid",
"left_shift",
"lt",
"le",
"less",
"less_equal",
"log",
"log1p",
"logical_and",
......@@ -54,7 +53,7 @@ __all__ = [
"mod",
"mul",
"neg",
"ne",
"not_equal",
"pow",
"relu",
"relu6",
......@@ -88,13 +87,6 @@ def _elwise(*args, mode):
return result
def _logical(*args, mode):
op = builtin.CondExecPredLogical(mode=mode)
args = utils.convert_inputs(*args)
(result,) = apply(op, *args)
return result
def _elemwise_multi_type(*args, mode, **kwargs):
op = builtin.ElemwiseMultiType(mode=mode, **kwargs)
args = utils.convert_inputs(*args)
......@@ -106,9 +98,10 @@ def _elemwise_multi_type(*args, mode, **kwargs):
def add(x, y):
"""Element-wise addition.
"""Element-wise `addition`.
At least one operand should be tensor.
Same for sub/mul/div/floor_div/pow/mod/atan2/eq/ne/lt/le/gt/ge/maximum/minmium.
Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium.
:param x: input tensor.
:return: computed tensor.
......@@ -138,68 +131,68 @@ def add(x, y):
def sub(x, y):
"""Element-wise subtraction."""
"""Element-wise `subtraction`."""
return _elwise(x, y, mode="sub")
def mul(x, y):
"""Element-wise multiplication."""
"""Element-wise `multiplication`."""
return _elwise(x, y, mode="mul")
def div(x, y):
"""Element-wise (x / y)."""
"""Element-wise `(x / y)`."""
return _elwise(x, y, mode="true_div")
def floor_div(x, y):
"""Element-wise floor(x / y)."""
"""Element-wise `floor(x / y)`."""
return _elwise(x, y, mode="floor_divide")
def neg(x):
"""Element-wise negation."""
"""Element-wise `negation`."""
return _elwise(x, mode="negate")
def pow(x, y):
"""Element-wise power."""
"""Element-wise `power`."""
return _elwise(x, y, mode="pow")
def mod(x, y):
"""Element-wise remainder of division."""
"""Element-wise `remainder of division`."""
return _elwise(x, y, mode="mod")
def abs(x):
"""Element-wise absolute value."""
"""Element-wise `absolute value`."""
return _elwise(x, mode="abs")
def exp(x):
"""Element-wise exponential."""
"""Element-wise `exponential`."""
return _elwise(x, mode="exp")
def expm1(x):
"""Element-wise exp(x)-1."""
"""Element-wise `exp(x)-1`."""
return _elwise(x, mode="expm1")
def log(x):
"""Element-wise logarithm (base `e`)."""
"""Element-wise `logarithm (base e)`."""
return _elwise(x, mode="log")
def log1p(x):
"""Element-wise log(x+1) (base `e`)."""
"""Element-wise `log(x+1) (base e)`."""
return _elwise(x, mode="log1p")
def sqrt(x: Tensor) -> Tensor:
"""Element-wise sqrt.
For negative input value, return ``NaN``.
"""Element-wise `sqrt`.
Returns ``NaN`` for negative input value.
:param x: input tensor.
:return: computed tensor.
......@@ -229,10 +222,10 @@ def sqrt(x: Tensor) -> Tensor:
def square(x: Tensor) -> Tensor:
"""
Return a new tensor with the square of the elements of input tensor.
Returns a new tensor with the square of the elements of input tensor.
:param inp: The input tensor
:return: The computed tensor
:param inp: input tensor.
:return: computed tensor.
Examples:
......@@ -258,27 +251,27 @@ def square(x: Tensor) -> Tensor:
def round(x):
"""Element-wise rounding to int."""
"""Element-wise `rounding to int`."""
return _elwise(x, mode="round")
def ceil(x):
"""Element-wise ceiling."""
"""Element-wise `ceiling`."""
return _elwise(x, mode="ceil")
def floor(x):
"""Element-wise floor."""
"""Element-wise `floor`."""
return _elwise(x, mode="floor")
def maximum(x, y):
"""Element-wise maximum of array elements."""
"""Element-wise `maximum of array elements`."""
return _elwise(x, y, mode="max")
def minimum(x, y):
"""Element-wise minimum of array elements."""
"""Element-wise `minimum of array elements`."""
return _elwise(x, y, mode="min")
......@@ -286,7 +279,7 @@ def minimum(x, y):
def cos(x):
"""Element-wise cosine.
"""Element-wise `cosine`.
:param x: input tensor.
:return: computed tensor.
......@@ -315,80 +308,71 @@ def cos(x):
def sin(x):
"""Element-wise sine."""
"""Element-wise `sine`."""
return _elwise(x, mode="sin")
def tan(x):
"""Element-wise tangent."""
"""Element-wise `tangent`."""
return sin(x) / cos(x)
def acos(x):
"""Element-wise inverse cosine."""
"""Element-wise `inverse cosine`."""
return _elwise(x, mode="acos")
def asin(x):
"""Element-wise inverse sine."""
"""Element-wise `inverse sine`."""
return _elwise(x, mode="asin")
def atan(x):
"""Element-wise inverse tangent."""
"""Element-wise `inverse tangent`."""
return _elwise(x, 1, mode="atan2")
def atan2(y, x):
"""Element-wise 2-argument arctangent."""
"""Element-wise `2-argument arctangent`."""
return _elwise(y, x, mode="atan2")
def cosh(x):
r"""Element-wise hyperbolic cosine."""
r"""Element-wise `hyperbolic cosine`."""
return 0.5 * (exp(x) + exp(-x))
def sinh(x):
r"""Element-wise hyperbolic sine."""
r"""Element-wise `hyperbolic sine`."""
u = expm1(x)
return 0.5 * u / (u + 1) * (u + 2)
def tanh(x):
r"""Element-wise hyperbolic tangent."""
r"""Element-wise `hyperbolic tangent`."""
return _elwise(x, mode="tanh")
def asinh(x):
r"""Element-wise inverse hyperbolic sine."""
r"""Element-wise `inverse hyperbolic sine`."""
return log(x + (x ** 2 + 1) ** 0.5)
def acosh(x):
r"""Element-wise inverse hyperbolic cosine."""
r"""Element-wise `inverse hyperbolic cosine`."""
return log(x + (x ** 2 - 1) ** 0.5)
def atanh(x):
r"""Element-wise inverse hyperbolic tangent."""
r"""Element-wise `inverse hyperbolic tangent`."""
return log1p(2 * x / (1 - x)) / 2
def fast_tanh(x):
r"""Element-wise fast tanh; this is an approximation:
.. math::
\text{fast_tanh}(x) = x * (27. + x * x) / (27. + 9. * x * x)
"""
return _elwise(x, mode="fast_tanh")
# bit-twiddling functions
def left_shift(x, y):
"""Element-wise bitwise binary: x << y.
"""Element-wise `bitwise binary: x << y`.
:param x: input tensor, should be int.
:param y: how many bits to be left-shifted.
......@@ -418,7 +402,7 @@ def left_shift(x, y):
def right_shift(x, y):
"""Element-wise bitwise binary: x >> y."""
"""Element-wise `bitwise binary: x >> y`."""
return _elwise(x, y, mode="shr")
......@@ -426,30 +410,30 @@ def right_shift(x, y):
def logical_and(x, y):
"""Element-wise logical and: x && y."""
"""Element-wise `logical and: x && y`."""
return _elwise(x, y, mode="AND")
def logical_not(x):
"""Element-wise logical not: ~x."""
"""Element-wise `logical not: ~x`."""
return _elwise(x, mode="NOT")
def logical_or(x, y):
"""Element-wise logical or: x || y."""
"""Element-wise `logical or: x || y`."""
return _elwise(x, y, mode="OR")
def logical_xor(x, y):
"""Element-wise logical xor: x ^ y."""
"""Element-wise `logical xor: x ^ y`."""
return _elwise(x, y, mode="XOR")
# comparison functions
def eq(x, y):
"""Element-wise (x == y).
def equal(x, y):
"""Element-wise `(x == y)`.
:param x: input tensor 1.
:param y: input tensor 2.
......@@ -465,7 +449,7 @@ def eq(x, y):
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3))
y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3))
out = F.eq(x, y)
out = F.equal(x, y)
print(out.numpy())
Outputs:
......@@ -479,28 +463,28 @@ def eq(x, y):
return _elwise(x, y, mode="eq")
def ne(x, y):
"""Element-wise (x != y)."""
def not_equal(x, y):
"""Element-wise `(x != y)`."""
return x != y
def lt(x, y):
"""Element-wise (x < y)."""
def less(x, y):
"""Element-wise `(x < y)`."""
return _elwise(x, y, mode="lt")
def le(x, y):
"""Element-wise (x <= y)."""
def less_equal(x, y):
"""Element-wise `(x <= y)`."""
return _elwise(x, y, mode="leq")
def gt(x, y):
"""Element-wise (x > y)."""
def greater(x, y):
"""Element-wise `(x > y)`."""
return _elwise(y, x, mode="lt")
def ge(x, y):
"""Element-wise (x >= y)."""
def greater_equal(x, y):
"""Element-wise `(x >= y)`."""
return _elwise(y, x, mode="leq")
......@@ -508,7 +492,7 @@ def ge(x, y):
def hswish(x):
"""Element-wise x * relu6(x + 3) / 6.
"""Element-wise `x * relu6(x + 3) / 6`.
:param x: input tensor.
:return: computed tensor.
......@@ -534,7 +518,7 @@ def hswish(x):
def hsigmoid(x):
"""Element-wise relu6(x + 3) / 6."""
"""Element-wise `relu6(x + 3) / 6`."""
return relu6(x + 3) / 6
......@@ -544,16 +528,16 @@ def relu(x):
def relu6(x):
"""Element-wise min(max(x, 0), 6)."""
"""Element-wise `min(max(x, 0), 6)`."""
return minimum(maximum(x, 0), 6)
def sigmoid(x):
"""Element-wise 1 / ( 1 + exp( -x ) )."""
"""Element-wise `1 / ( 1 + exp( -x ) )`."""
return _elwise(x, mode="sigmoid")
def clamp(x: Tensor, lower=None, upper=None) -> Tensor:
def clip(x: Tensor, lower=None, upper=None) -> Tensor:
r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns
a resulting tensor:
......@@ -578,9 +562,9 @@ def clamp(x: Tensor, lower=None, upper=None) -> Tensor:
import megengine.functional as F
a = tensor(np.arange(5).astype(np.int32))
print(F.clamp(a, 2, 4).numpy())
print(F.clamp(a, lower=3).numpy())
print(F.clamp(a, upper=3).numpy())
print(F.clip(a, 2, 4).numpy())
print(F.clip(a, lower=3).numpy())
print(F.clip(a, upper=3).numpy())
Outputs:
......@@ -596,7 +580,7 @@ def clamp(x: Tensor, lower=None, upper=None) -> Tensor:
), "At least one of 'lower' or 'upper' must not be None"
if lower is not None:
if upper is not None:
assert lower <= upper, "clamp lower bound is bigger that upper bound"
assert lower <= upper, "clip lower bound is bigger that upper bound"
return minimum(maximum(x, lower), upper)
else:
return maximum(x, lower)
......
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# pylint: disable=too-many-lines
from typing import List
from ..tensor import Tensor
def cambricon_subgraph(
inputs: List[Tensor], data: bytes, symbol: str, tensor_dim_mutable: bool,
) -> List[Tensor]:
"""Loads a serialized Cambricon subgraph (i.e. cnrtModel_t) and
execute the operations defined in the subgraph.
:param inputs: list of input tensors of the subgraph.
:param data: the serialized subgraph.
:param symbol: the name of the function in the subgraph.
The function is corresponding to a cnmlFusionOp
which is added to the cnmlModel_t/cnrtModel_t.
:param tensor_dim_mutable: whether the input tensors' shapes are mutalbe
in cnrtModel_t.
"""
raise NotImplementedError
def extern_opr_subgraph(
inputs, output_shapes: List[tuple], dump_name: str, dump_data: bytes,
) -> List[Tensor]:
"""Loads a serialized extern opr subgraph and fake execute the operator.
:param inputs: tensor or list of input tensors.
:param output_shapes: the output shapes.
:param dump_name: the serialized subgraph name.
:param dump_data: the serialized subgraph.
:return: list of tensors.
"""
raise NotImplementedError
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import collections
from typing import Iterable, Optional, Union
from ..tensor import Tensor
def add_update(
dest: Tensor,
delta: Tensor,
*,
alpha: Union[Tensor, float, int] = 1.0,
beta: Union[Tensor, float, int] = 1.0,
bias: Union[Tensor, float, int] = 0.0
):
r"""Modify ``dest`` inplace as follows:
.. math::
dest = alpha * dest + beta * delta + bias
:param dest: input data that will be inplace modified.
:param delta: update value that will be added to ``dest``.
:param alpha: weight ratio of ``dest``. Default: 1.0
:param beta: weight ratio of ``delta``. Default: 1.0
:param bias: bias value appended to the result. Default: 0.0
"""
if beta is not None and beta != 1.0:
delta = delta * beta
if bias is not None and bias != 0.0:
delta = delta + bias
if alpha is not None and alpha != 1.0:
dest *= alpha
dest += delta
return dest
......@@ -10,14 +10,14 @@ import numpy as np
from ..core.tensor.utils import make_shape_tuple
from ..tensor import Tensor
from .elemwise import abs, eq, exp, log, maximum, pow, relu
from .nn import indexing_one_hot
from .elemwise import abs, equal, exp, log, maximum, pow, relu
from .nn import indexing_one_hot, logsigmoid, logsumexp
from .tensor import where
__all__ = [
"l1_loss",
"square_loss",
"cross_entropy_with_softmax",
"cross_entropy",
"binary_cross_entropy",
"hinge_loss",
]
......@@ -55,7 +55,7 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor:
ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
loss = F.l1_loss(ipt, tgt)
loss = F.nn.l1_loss(ipt, tgt)
print(loss.numpy())
Outputs:
......@@ -106,7 +106,7 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor:
ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
loss = F.square_loss(ipt, tgt)
loss = F.nn.square_loss(ipt, tgt)
print(loss.numpy())
Outputs:
......@@ -120,10 +120,16 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor:
return (diff ** 2).mean()
def cross_entropy_with_softmax(
pred: Tensor, label: Tensor, axis: int = 1, label_smooth: float = 0
def cross_entropy(
pred: Tensor,
label: Tensor,
axis: int = 1,
with_logits: bool = True,
label_smooth: float = 0,
) -> Tensor:
r"""Returns loss after applying :func:`~.softmax` + :func:`~.cross_entropy`.
r"""Compute the multi-class cross entropy loss (using logits by default).
By default, prediction is assumed to be logits, whose softmax gives probabilities.
It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`.
......@@ -132,11 +138,12 @@ def cross_entropy_with_softmax(
.. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K
where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively.
k is the index of label distribution. :math:`\alpha` is label_smooth and :math:`K` is the number of classes.
k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes.
:param pred: input tensor representing the predicted probability.
:param label: input tensor representing the classification label.
:param axis: an axis along which softmax will be applied. Default: 1
:param with_logits: whether to apply softmax first. Default: True
:param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0
:return: loss value.
......@@ -150,9 +157,9 @@ def cross_entropy_with_softmax(
data_shape = (1, 2)
label_shape = (1, )
pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(data_shape))
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape))
label = tensor(np.ones(label_shape, dtype=np.int32))
loss = F.cross_entropy_with_softmax(pred, label)
loss = F.nn.cross_entropy(pred, label)
print(loss.numpy())
Outputs:
......@@ -170,26 +177,41 @@ def cross_entropy_with_softmax(
)
num_classes = pred.shape[axis]
no_label_smooth = (
label_smooth is None or type(label_smooth) in (int, float) and label_smooth == 0
)
if not with_logits:
if no_label_smooth:
return -log(indexing_one_hot(pred, label, axis)).mean()
pred = log(pred)
return (
label_smooth * pred.mean()
- (1 - label_smooth) * indexing_one_hot(pred, label, axis).mean()
)
# Denominator of the softmax
offset = pred.max(axis=axis, keepdims=True).detach()
pred = pred - offset
down = exp(pred).sum(axis=axis, keepdims=True)
down = logsumexp(pred, axis=axis, keepdims=True)
up = indexing_one_hot(pred, label, axis)
if label_smooth != 0:
if not no_label_smooth:
factor = label_smooth / num_classes
up = up * (1 - label_smooth) + pred.sum(axis=axis, keepdims=True) * factor
return (log(down) - up).mean()
return (down - up).mean()
def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor:
r"""Function that measures the Binary Cross Entropy between the target and the prediction.
def binary_cross_entropy(
pred: Tensor, label: Tensor, with_logits: bool = True
) -> Tensor:
r"""Compute the binary cross entropy loss (using logits by default).
By default, prediction is assumed to be logits, whose sigmoid gives probabilities.
:param pred: `(N, *)` where `*` means any number of additional dimensions.
:param pred: `(N, *)`, where `*` means any number of additional dimensions.
:param label: `(N, *)`, same shape as the input.
:param with_logits: bool, whether to apply sigmoid first. Default: True
:return: loss value.
Examples:
......@@ -200,9 +222,9 @@ def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor:
from megengine import tensor
import megengine.functional as F
pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(1, 2))
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2))
label = tensor(np.ones((1, 2), dtype=np.float32))
loss = F.binary_cross_entropy(pred, label)
loss = F.nn.binary_cross_entropy(pred, label)
print(loss.numpy())
Outputs:
......@@ -212,11 +234,15 @@ def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor:
[0.6931]
"""
return -1.0 * (label * log(pred) + (1.0 - label) * log(1 - pred)).mean()
if not with_logits:
return -(label * log(pred) + (1 - label) * log(1 - pred)).mean()
# logsigmoid(pred) and logsigmoid(-pred) has common sub-expression
# hopefully the backend would optimize this
return -(label * logsigmoid(pred) + (1 - label) * logsigmoid(-pred)).mean()
def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor:
r"""Caculate the hinge loss which is often used in SVMs.
r"""Caculates the hinge loss which is often used in SVM.
The hinge loss can be described as:
......@@ -236,7 +262,7 @@ def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor:
pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32")
label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32")
loss = F.hinge_loss(pred, label)
loss = F.nn.hinge_loss(pred, label)
print(loss.numpy())
Outputs:
......
......@@ -14,11 +14,12 @@ from typing import Optional, Sequence, Tuple, Union
from ..core.ops import builtin
from ..core.ops._internal import param_defs as P
from ..core.ops.special import Const
from ..core.tensor import utils
from ..core.tensor.core import apply
from ..core.tensor.core import TensorBase, TensorWrapperBase, apply
from ..tensor import Tensor
from .elemwise import clamp, exp, log, log1p
from .tensor import add_axis, remove_axis, reshape
from .elemwise import clip, exp, log, log1p
from .tensor import reshape, squeeze
__all__ = [
"argmax",
......@@ -45,7 +46,7 @@ def isnan(inp: Tensor) -> Tensor:
r"""Returns a new tensor representing if each element is ``NaN`` or not.
:param inp: input tensor.
:return: a new tensor representing if each element in inp is NaN or not.
:return: result tensor.
Examples:
......@@ -71,7 +72,7 @@ def isinf(inp: Tensor) -> Tensor:
r"""Returns a new tensor representing if each element is ``Inf`` or not.
:param inp: input tensor.
:return: a new tensor representing if each element in inp is Inf or not.
:return: result tensor.
Examples:
......@@ -84,7 +85,7 @@ def isinf(inp: Tensor) -> Tensor:
print(F.isinf(x).numpy())
Outputs:
.. testoutput::
[False True False]
......@@ -108,7 +109,7 @@ def sign(inp: Tensor):
x = tensor([1, -1, 0])
print(F.sign(x).numpy())
Outputs:
.. testoutput::
......@@ -128,7 +129,7 @@ def sum(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced.
:param axis: dimension to reduce. If None, all dimensions will be reduced.
Default: None
:param keepdims: whether the output tensor has axis retained or not.
Default: False
......@@ -163,7 +164,7 @@ def prod(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -199,7 +200,7 @@ def mean(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -235,7 +236,7 @@ def var(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -275,7 +276,7 @@ def std(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -310,7 +311,7 @@ def min(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -346,7 +347,7 @@ def max(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -373,18 +374,14 @@ def max(
def norm(
inp: Tensor,
p: int = 2,
axis: Optional[Union[int, Sequence[int]]] = None,
keepdims=False,
inp: Tensor, ord: float = None, axis: int = None, keepdims=False,
):
"""Calculates ``p``-norm of input tensor along
given axis. If axis is a list of dimensions,
reduce over all of them.
given axis.
:param inp: input tensor.
:param p: power of value applied to inp. Default: 2
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param ord: power of value applied to inp. Default: 2
:param axis: dimension to reduce. If None, input must be a vector. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -396,7 +393,7 @@ def norm(
from megengine import tensor
import megengine.functional as F
x = tensor(np.arange(-3, 3, dtype=np.float32).reshape(2,3))
x = tensor(np.arange(-3, 3, dtype=np.float32))
out = F.norm(x)
print(out.numpy())
......@@ -407,13 +404,18 @@ def norm(
[4.3589]
"""
if p == 0:
if axis is None:
if inp.ndim != 1:
raise TypeError("axis is required unless input is a vector")
if ord is None:
ord = 2
if ord == 0:
return sum(inp != 0, axis=axis, keepdims=keepdims)
if p == math.inf:
if ord == math.inf:
return max(abs(inp))
if p == -math.inf:
if ord == -math.inf:
return min(abs(inp))
return sum(abs(inp) ** p, axis=axis, keepdims=keepdims) ** (1.0 / p)
return sum(abs(inp) ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord)
def argmin(
......@@ -426,7 +428,7 @@ def argmin(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -458,7 +460,7 @@ def argmin(
(inp,) = apply(op, inp)
if not keepdims:
inp = remove_axis(inp, ai)
inp = squeeze(inp, ai)
return inp
......@@ -470,7 +472,7 @@ def argmin(
op = builtin.Argmin(axis=axis)
(result,) = apply(op, inp)
if not keepdims:
result = remove_axis(result, axis)
result = squeeze(result, axis)
return result
......@@ -484,7 +486,7 @@ def argmax(
reduce over all of them.
:param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor.
......@@ -516,7 +518,7 @@ def argmax(
(inp,) = apply(op, inp)
if not keepdims:
inp = remove_axis(inp, ai)
inp = squeeze(inp, ai)
return inp
......@@ -528,45 +530,40 @@ def argmax(
op = builtin.Argmax(axis=axis)
(result,) = apply(op, inp)
if not keepdims:
result = remove_axis(result, axis)
result = squeeze(result, axis)
return result
def normalize(
inp: Tensor,
p: int = 2,
axis: Optional[Union[int, Sequence[int]]] = None,
eps: float = 1e-12,
inp: Tensor, ord: float = None, axis: int = None, eps: float = 1e-12,
) -> Tensor:
r"""Performs :math:`L_p` normalization of input tensor along
given axis. If axis is a list of dimensions,
reduce over all of them.
given axis.
For a tensor inp of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each
For a tensor of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each
:math:`n_{dim}` -element vector :math:`v` along dimension :attr:`axis` is transformed as:
.. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}.
:param inp: input tensor.
:param p: power of value applied to inp. Default: 2
:param axis: dimension to reduce. If None, all the dimensions will be reduced
to calculate the norm. Default: None
:param ord: power of value applied to input tensor. Default: 2
:param axis: dimension to reduce.If None, input must be a vector. Default: None
:param eps: a small value to avoid division by zero. Default: 1e-12
:return: normalized output tensor.
"""
if axis is None:
return inp / clamp(norm(inp, p, axis), lower=eps)
return inp / clip(norm(inp, ord, axis), lower=eps)
else:
return inp / clamp(norm(inp, p, axis, keepdims=True), lower=eps)
return inp / clip(norm(inp, ord, axis, keepdims=True), lower=eps)
def argsort(inp: Tensor, descending: bool = False) -> Tensor:
r"""Sorts the target 2d matrix by row, return both the sorted tensor and indices.
r"""Returns the indices that would sort the input tensor.
:param inp: input tensor, if 2d, each row will be sorted.
:param descending: Sort in descending order, where the largest comes first. Default: False
:return: Tuple of two tensors `(sorted_tensor, indices_of_int32)`.
:param inp: input tensor. If it's 2d, the result would be array of indices show how to sort each row in the input tensor.
:param descending: sort in descending order, where the largest comes first. Default: False
:return: indices of int32 indicates how to sort the input.
Examples:
......@@ -603,6 +600,31 @@ def argsort(inp: Tensor, descending: bool = False) -> Tensor:
def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]:
r"""Returns sorted tensor and the indices would sort the input tensor.
:param inp: input tensor. If it's 2d, the result would be sorted by row.
:param descending: sort in descending order, where the largest comes first. Default: False
:return: tuple of two tensors `(sorted_tensor, indices_of_int32)`.
Examples:
.. testcode::
import numpy as np
from megengine import tensor
import megengine.functional as F
x = tensor(np.array([1,2], dtype=np.float32))
out, indices = F.sort(x)
print(out.numpy())
Outputs:
.. testoutput::
[1. 2.]
"""
assert len(inp.shape) <= 2, "Input should be 1d or 2d"
if descending:
order = P.Argsort.Order.DESCENDING
......@@ -625,13 +647,13 @@ def topk(
kth_only: bool = False,
no_sort: bool = False,
) -> Tuple[Tensor, Tensor]:
r"""Selects the ``Top-K(by default)`` smallest elements of 2d matrix by row.
r"""Selects the ``Top-K``(by default) smallest elements of 2d matrix by row.
:param inp: input tensor, if 2d, each row will be sorted.
:param inp: input tensor. If input tensor is 2d, each row will be sorted.
:param k: number of elements needed.
:param descending: if true, return the largest elements instead. Default: False
:param kth_only: if true, only the k-th element will be returned. Default: False
:param no_sort: if true, the returned elements can be unordered. Default: False
:param descending: if True, return the largest elements instead. Default: False
:param kth_only: if True, only the k-th element will be returned. Default: False
:param no_sort: if True, the returned elements can be unordered. Default: False
:return: tuple of two tensors `(topk_tensor, indices_of_int32)`.
Examples:
......@@ -665,15 +687,18 @@ def topk(
mode = Mode.VALUE_IDX_SORTED
op = builtin.TopK(mode=mode)
if not isinstance(k, (TensorBase, TensorWrapperBase)):
(k,) = Const(k, dtype="int32", device=inp.device)(inp)
if len(inp.shape) == 1:
inp = inp.reshape(1, -1)
res = apply(op, inp, Tensor(k, dtype="int32"))
res = apply(op, inp, k)
if kth_only:
tns = res[0]
else:
tns, ind = res[0][0], res[1][0]
else:
res = apply(op, inp, Tensor(k, dtype="int32"))
res = apply(op, inp, k)
if kth_only:
tns = res
else:
......
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np
from ..tensor import Tensor
from .distributed import all_reduce_sum
from .tensor import param_pack_concat, param_pack_split
def get_offsets(shapes):
offsets = []
offset = 0
for shape in shapes:
offsets.append(offset)
offset += int(np.prod(shape))
offsets.append(offset)
return offsets
def pack_allreduce_split(pack_list, shapes, group, reduce_method):
offsets_val = get_offsets(shapes)
offsets = Tensor(offsets_val)
packed_grads = param_pack_concat(pack_list, offsets, offsets_val)
packed_grads = all_reduce_sum(packed_grads, group)
if reduce_method == "mean":
packed_grads /= group.size
grads = param_pack_split(packed_grads, offsets_val, shapes)
return grads
......@@ -34,26 +34,23 @@ def conv_bias_activation(
:param weight: convolution kernel.
:param bias: bias added to the result of convolution
:param stride: stride of the 2D convolution operation. Default: 1
:param padding: size of the paddings added to the input on both sides of its
spatial dimensions. Only zero-padding is supported. Default: 0
:param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0
:param dilation: dilation of the 2D convolution operation. Default: 1
:param groups: number of groups to divide input and output channels into,
so as to perform a "grouped convolution". When groups is not 1,
in_channels and out_channels must be divisible by groups,
:param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1,
``in_channels`` and ``out_channels`` must be divisible by ``groups``,
and the shape of weight should be `(groups, out_channel // groups,
in_channels // groups, height, width)`.
:type conv_mode: string or :class:`P.Convolution.Mode`.
:param conv_mode: supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default:
'CROSS_CORRELATION'
:param dtype: support for np.dtype, Default: np.int8
:param dtype: support for ``np.dtype``, Default: np.int8
:param scale: scale if use quantization, Default: 0.0
:param zero_point: scale if use quantization quint8, Default: 0.0
:type compute_mode: string or
:class:`P.Convolution.ComputeMode`.
:param compute_mode: when set to 'DEFAULT', no special requirements will be
placed on the precision of intermediate results. When set to 'FLOAT32',
Float32 would be used for accumulator and intermediate result, but only
effective when input and output are of Float16 dtype.
:param compute_mode: when set to "DEFAULT", no special requirements will be
placed on the precision of intermediate results. When set to "FLOAT32",
"Float32" would be used for accumulator and intermediate result, but only effective when input and output are of Float16 dtype.
"""
ph, pw = _pair(padding)
......
......@@ -11,18 +11,24 @@ from typing import Iterable, Union
import numpy as np
from ..core.ops.builtin import Copy
from ..core._wrap import device as as_device
from ..core.ops.builtin import Copy, Identity
from ..core.tensor import Tensor
from ..core.tensor.core import apply
from .math import topk as _topk
from .tensor import transpose as _transpose
from .tensor import broadcast_to, transpose
__all__ = [
"topk_accuracy",
"copy",
]
def accuracy(
def topk_accuracy(
logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1
) -> Union[Tensor, Iterable[Tensor]]:
r"""
Calculate the classification accuracy given predicted logits and ground-truth labels.
Calculates the classification accuracy given predicted logits and ground-truth labels.
:param logits: model predictions of shape `[batch_size, num_classes]`,
representing the probability (likelyhood) of each class.
......@@ -40,7 +46,7 @@ def accuracy(
logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10))
target = tensor(np.arange(8, dtype=np.int32))
top1, top5 = F.accuracy(logits, target, (1, 5))
top1, top5 = F.topk_accuracy(logits, target, (1, 5))
print(top1.numpy(), top5.numpy())
Outputs:
......@@ -54,8 +60,8 @@ def accuracy(
_, pred = _topk(logits, k=max(topk), descending=True)
accs = []
for k in topk:
correct = pred[:, :k].detach() == _transpose(target, (0, "x")).broadcast(
target.shape[0], k
correct = pred[:, :k].detach() == broadcast_to(
transpose(target, (0, "x")), (target.shape[0], k)
)
accs.append(correct.astype(np.float32).sum() / target.shape[0])
if len(topk) == 1: # type: ignore[arg-type]
......@@ -63,25 +69,12 @@ def accuracy(
return accs
def zero_grad(inp: Tensor) -> Tensor:
r"""
Returns a tensor which is treated as constant during backward gradient calcuation,
i.e. its gradient is zero.
:param inp: Input tensor.
See implementation of :func:`~.softmax` for example.
"""
print("zero_grad is obsoleted, please use detach instead")
raise NotImplementedError
def copy(inp, cn):
def copy(inp, device=None):
r"""
Copy tensor to another device.
Copies tensor to another device.
:param inp: input tensor.
:param cn: device that you copy to.
:param device: destination device.
Examples:
......@@ -101,4 +94,6 @@ def copy(inp, cn):
[1 2 3]
"""
return apply(Copy(comp_node=cn), inp)[0]
if device is None:
return apply(Identity(), inp)[0]
return apply(Copy(comp_node=as_device(device).to_c()), inp)[0]
......@@ -19,12 +19,12 @@ class InvalidGitHost(FetcherError):
class GitPullError(FetcherError):
"""A git pull error occurred"""
"""A git pull error occurred."""
class GitCheckoutError(FetcherError):
"""A git checkout error occurred"""
"""A git checkout error occurred."""
class InvalidProtocol(FetcherError):
"""The protocol provided was somehow invalid"""
"""The protocol provided was somehow invalid."""
......@@ -106,20 +106,20 @@ class GitSSHFetcher(RepoFetcherBase):
:param git_host:
host address of git repo.
example: github.com
Example: github.com
:param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified.
example: ``"brain_sdk/MegBrain[:hub]"``
Example: ``"brain_sdk/MegBrain[:hub]"``
:param use_cache:
whether to use locally fetched code or completely re-fetch
whether to use locally fetched code or completely re-fetch.
:param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param silent:
whether to accept the stdout and stderr of the subprocess with PIPE, instead of
displaying on the screen
displaying on the screen.
:return:
directory where the repo code is stored
directory where the repo code is stored.
"""
if not cls._check_git_host(git_host):
raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host))
......@@ -215,24 +215,24 @@ class GitHTTPSFetcher(RepoFetcherBase):
silent: bool = True,
) -> str:
"""
Fetches git repo by HTTPS protocol
Fetches git repo by HTTPS protocol.
:param git_host:
host address of git repo
example: github.com
host address of git repo.
Example: github.com
:param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified.
example: ``"brain_sdk/MegBrain[:hub]"``
Example: ``"brain_sdk/MegBrain[:hub]"``
:param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param silent:
whether to accept the stdout and stderr of the subprocess with PIPE, instead of
displaying on the screen
displaying on the screen.
:return:
directory where the repo code is stored
directory where the repo code is stored.
"""
if not cls._check_git_host(git_host):
raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host))
......
......@@ -94,24 +94,24 @@ def _init_hub(
commit: str = None,
protocol: str = DEFAULT_PROTOCOL,
):
"""Imports hubmodule like python import
"""Imports hubmodule like python import.
:param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified.
Example: ``"brain_sdk/MegBrain[:hub]"``
:param git_host:
host address of git repo
host address of git repo.
Example: github.com
:param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH.
:return:
hubconf.py as a python module
a python module.
"""
cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub"))
os.makedirs(cache_dir, exist_ok=True)
......@@ -137,24 +137,24 @@ def list(
commit: str = None,
protocol: str = DEFAULT_PROTOCOL,
) -> List[str]:
"""Lists all entrypoints available in repo hubconf
"""Lists all entrypoints available in repo hubconf.
:param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified.
Example: ``"brain_sdk/MegBrain[:hub]"``
:param git_host:
host address of git repo
host address of git repo.
Example: github.com
:param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH.
:return:
all entrypoint names of the model
all entrypoint names of the model.
"""
hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol)
......@@ -182,14 +182,14 @@ def load(
tag/branch. The default branch is ``master`` if not specified.
Example: ``"brain_sdk/MegBrain[:hub]"``
:param entry:
an entrypoint defined in hubconf
an entrypoint defined in hubconf.
:param git_host:
host address of git repo
host address of git repo.
Example: github.com
:param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH.
......@@ -217,9 +217,9 @@ def help(
) -> str:
"""This function returns docstring of entrypoint ``entry`` by following steps:
1. Pull the repo code specified by git and repo_info
1. Pull the repo code specified by git and repo_info.
2. Load the entry defined in repo's hubconf.py
3. Return docstring of function entry
3. Return docstring of function entry.
:param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
......@@ -228,17 +228,17 @@ def help(
:param entry:
an entrypoint defined in hubconf.py
:param git_host:
host address of git repo
host address of git repo.
Example: github.com
:param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH.
:return:
docstring of entrypoint ``entry``
docstring of entrypoint ``entry``.
"""
hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol)
......@@ -255,10 +255,10 @@ def load_serialized_obj_from_url(url: str, model_dir=None) -> Any:
If the object is already present in ``model_dir``, it's deserialized and
returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``.
:param url: url to serialized object
:param model_dir: dir to cache target serialized file
:param url: url to serialized object.
:param model_dir: dir to cache target serialized file.
:return: loaded object
:return: loaded object.
"""
if model_dir is None:
model_dir = os.path.join(_get_megengine_home(), "serialized")
......
......@@ -15,10 +15,10 @@ from typing import Iterator
def load_module(name: str, path: str) -> types.ModuleType:
"""
Loads module specified by name and path
Loads module specified by name and path.
:param name: module name
:param path: module path
:param name: module name.
:param path: module path.
"""
spec = importlib.util.spec_from_file_location(name, path)
module = importlib.util.module_from_spec(spec)
......@@ -27,18 +27,18 @@ def load_module(name: str, path: str) -> types.ModuleType:
def check_module_exists(module: str) -> bool:
"""Checks whether python module exists or not
"""Checks whether python module exists or not.
:param module: name of module
:param module: name of module.
"""
return importlib.util.find_spec(module) is not None
@contextmanager
def cd(target: str) -> Iterator[None]:
"""Changes current directory to target
"""Changes current directory to target.
:param target: target directory
:param target: target directory.
"""
prev = os.getcwd()
os.chdir(os.path.expanduser(target))
......
......@@ -12,7 +12,7 @@ import os
import sys
_all_loggers = []
_default_level_name = os.getenv("MEGENGINE_LOGGING_LEVEL", "ERROR")
_default_level_name = os.getenv("MEGENGINE_LOGGING_LEVEL", "INFO")
_default_level = logging.getLevelName(_default_level_name.upper())
......
......@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from .activation import LeakyReLU, PReLU, ReLU, Sigmoid, Softmax
from .adaptive_pooling import AdaptiveAvgPool2d, AdaptiveMaxPool2d
from .batchnorm import BatchNorm1d, BatchNorm2d, SyncBatchNorm
from .concat import Concat
from .conv import Conv2d, ConvRelu2d, ConvTranspose2d, LocalConv2d
......
......@@ -20,10 +20,10 @@ class Softmax(Module):
.. math::
\text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)}
It is applied to an n-dimensional input Tensor and rescaling them so that the elements of the
n-dimensional output Tensor lie in the range of `[0, 1]` and sum to 1.
It is applied to all elements along axis, and rescales elements so that
they stay in the range `[0, 1]` and sum to 1.
:param axis: An axis along which softmax will be applied. By default,
:param axis: Along which axis softmax will be applied. By default,
softmax will apply along the highest ranked axis.
Examples:
......@@ -55,6 +55,9 @@ class Softmax(Module):
def forward(self, inputs):
return softmax(inputs, self.axis)
def _module_info_string(self) -> str:
return "axis={axis}".format(axis=self.axis)
class Sigmoid(Module):
r"""
......@@ -138,8 +141,7 @@ class PReLU(Module):
\end{cases}
Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses
a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`,
a seperate :math:`a` is used for each input channle.
a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`, each input channle will has it's own :math:`a`.
:param num_parameters: number of :math:`a` to learn, there is only two
values are legitimate: 1, or the number of channels at input. Default: 1
......
此差异已折叠。
......@@ -11,7 +11,7 @@ from typing import Optional
import numpy as np
from ..distributed.group import WORLD, Group
from ..functional import batch_norm2d, sync_batch_norm
from ..functional.nn import batch_norm, sync_batch_norm
from ..tensor import Parameter, Tensor
from . import init
from .module import Module
......@@ -96,7 +96,7 @@ class _BatchNorm(Module):
else:
exponential_average_factor = 0.0 # useless
output = batch_norm2d(
output = batch_norm(
inp,
self.running_mean if self.track_running_stats else None,
self.running_var if self.track_running_stats else None,
......@@ -113,6 +113,13 @@ class _BatchNorm(Module):
return output
def _module_info_string(self) -> str:
s = (
"{num_features}, eps={eps}, momentum={momentum}, affine={affine}, "
"track_running_stats={track_running_stats}"
)
return s.format(**self.__dict__)
class SyncBatchNorm(_BatchNorm):
r"""
......@@ -213,8 +220,8 @@ class BatchNorm2d(_BatchNorm):
of 0.9.
If :attr:`track_running_stats` is set to ``False``, this layer will not
keep running estimates, and batch statistics are instead used during
evaluation time.
keep running estimates, batch statistics is used during
evaluation time instead.
.. note::
This :attr:`momentum` argument is different from one used in optimizer
......@@ -229,15 +236,14 @@ class BatchNorm2d(_BatchNorm):
Spatial Batch Normalization.
:type num_features: int
:param num_features: usually the :math:`C` from an input of size
:math:`(N, C, H, W)` or the highest ranked dimension of an input with
:param num_features: usually :math:`C` from an input of shape
:math:`(N, C, H, W)` or the highest ranked dimension of an input
less than 4D.
:type eps: float
:param eps: a value added to the denominator for numerical stability.
Default: 1e-5
:type momentum: float
:param momentum: the value used for the `running_mean` and `running_var`
computation.
:param momentum: the value used for the ``running_mean`` and ``running_var`` computation.
Default: 0.9
:type affine: bool
:param affine: a boolean value that when set to True, this module has
......
......@@ -11,7 +11,7 @@ from .module import Module
class Dropout(Module):
r"""Randomly set input elements to zeros with the probability :math:`drop\_prob` during training.
r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training.
Commonly used in large networks to prevent overfitting.
Note that we perform dropout only during training, we also rescale(multiply) the output tensor
by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`.
......@@ -28,3 +28,6 @@ class Dropout(Module):
return dropout(inputs, self.drop_prob, training=True)
else:
return inputs
def _module_info_string(self) -> str:
return "drop_prob={drop_prob}".format(drop_prob=self.drop_prob)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册