提交 08589650 编写于 作者: G guo-ran

Merge branch 'master' of https://github.com/Oneflow-Inc/oneflow into dev_partial_fc_mirror


Former-commit-id: f2fcb711d12da1b42cfee85c18c8acafd3583c17
......@@ -18,6 +18,7 @@ runs:
set -x
src_dir=${PWD}
tmp_dir="${{ inputs.tmp_dir }}"
mkdir -p ${tmp_dir}
cd ${tmp_dir}
docker run --rm -v $PWD:/p -w $PWD:/p busybox rm -rf /p/wheelhouse
python3 ${src_dir}/docker/package/manylinux/build_wheel.py \
......
......@@ -187,41 +187,48 @@ inline bool MaybeIsOk(Maybe<void>&& maybe) {
#if defined(__GNUC__) || defined(__CUDACC__) || defined(__clang__)
// fix CUDA 11.1 compiler crashes
#if defined(__CUDACC__)
#define MAYBE_CONST_AUTO_REF const auto
#else
#define MAYBE_CONST_AUTO_REF const auto&
#endif // defined(__CUDACC__)
#define TRY(...) __MaybeErrorStackCheckWrapper__(__VA_ARGS__)
#define JUST(...) \
({ \
const auto& maybe = __MaybeErrorStackCheckWrapper__(__VA_ARGS__); \
if (!maybe.IsOk()) { \
auto* stack_frame = maybe.error()->add_stack_frame(); \
stack_frame->set_location(MAYBE_FAILED_LOC); \
stack_frame->set_function(__FUNCTION__); \
return maybe.error(); \
} \
maybe; \
}) \
#define JUST(...) \
({ \
MAYBE_CONST_AUTO_REF maybe = __MaybeErrorStackCheckWrapper__(__VA_ARGS__); \
if (!maybe.IsOk()) { \
auto* stack_frame = maybe.error()->add_stack_frame(); \
stack_frame->set_location(MAYBE_FAILED_LOC); \
stack_frame->set_function(__FUNCTION__); \
return maybe.error(); \
} \
maybe; \
}) \
.Data_YouAreNotAllowedToCallThisFuncOutsideThisFile()
#define CHECK_JUST(...) \
({ \
const auto& maybe = __MaybeErrorStackCheckWrapper__(__VA_ARGS__); \
if (!maybe.IsOk()) { \
auto* stack_frame = maybe.error()->add_stack_frame(); \
stack_frame->set_location(MAYBE_FAILED_LOC); \
stack_frame->set_function(__FUNCTION__); \
LOG(FATAL) << maybe.GetSerializedError(); \
} \
maybe; \
}) \
#define CHECK_JUST(...) \
({ \
MAYBE_CONST_AUTO_REF maybe = __MaybeErrorStackCheckWrapper__(__VA_ARGS__); \
if (!maybe.IsOk()) { \
auto* stack_frame = maybe.error()->add_stack_frame(); \
stack_frame->set_location(MAYBE_FAILED_LOC); \
stack_frame->set_function(__FUNCTION__); \
LOG(FATAL) << maybe.GetSerializedError(); \
} \
maybe; \
}) \
.Data_YouAreNotAllowedToCallThisFuncOutsideThisFile()
#define CHECK_OK(...) CHECK(MaybeIsOk(std::move(__VA_ARGS__)))
#define OF_RETURN_IF_ERROR(...) \
const auto& maybe_##__LINE__ = __MaybeErrorStackCheckWrapper__(__VA_ARGS__); \
if (!maybe_##__LINE__.IsOk()) { \
auto* stack_frame = maybe_##__LINE__.error()->add_stack_frame(); \
stack_frame->set_location(MAYBE_FAILED_LOC); \
stack_frame->set_function(__FUNCTION__); \
return maybe_##__LINE__.error(); \
#define OF_RETURN_IF_ERROR(...) \
MAYBE_CONST_AUTO_REF maybe_##__LINE__ = __MaybeErrorStackCheckWrapper__(__VA_ARGS__); \
if (!maybe_##__LINE__.IsOk()) { \
auto* stack_frame = maybe_##__LINE__.error()->add_stack_frame(); \
stack_frame->set_location(MAYBE_FAILED_LOC); \
stack_frame->set_function(__FUNCTION__); \
return maybe_##__LINE__.error(); \
}
#else
......
......@@ -143,7 +143,8 @@ inline uint32_t NewRandomSeed() {
#define DIM_SEQ \
OF_PP_MAKE_TUPLE_SEQ(1) \
OF_PP_MAKE_TUPLE_SEQ(2) OF_PP_MAKE_TUPLE_SEQ(3) OF_PP_MAKE_TUPLE_SEQ(4) OF_PP_MAKE_TUPLE_SEQ(5)
OF_PP_MAKE_TUPLE_SEQ(2) \
OF_PP_MAKE_TUPLE_SEQ(3) OF_PP_MAKE_TUPLE_SEQ(4) OF_PP_MAKE_TUPLE_SEQ(5) OF_PP_MAKE_TUPLE_SEQ(6)
#define BOOL_SEQ (true)(false)
......
......@@ -56,7 +56,8 @@ class ArgWhereKernel : public KernelIf<DeviceType::kCPU> {
REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, 2) \
REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, 3) \
REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, 4) \
REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, 5)
REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, 5) \
REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, 6)
#define REGISTER_ARG_WHERE_KERNELS(device_type_v, dtype_pair, itype_pair) \
REGISTER_ARG_WHERE_KERNELS_AT_NDIMS(device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \
......
......@@ -41,7 +41,9 @@ struct ArgWhereKernelUtil {
INSTANTIATE_ARG_WHERE_KERNEL_UTIL_INTERNAL(device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \
OF_PP_PAIR_FIRST(itype_pair), 4) \
INSTANTIATE_ARG_WHERE_KERNEL_UTIL_INTERNAL(device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \
OF_PP_PAIR_FIRST(itype_pair), 5)
OF_PP_PAIR_FIRST(itype_pair), 5) \
INSTANTIATE_ARG_WHERE_KERNEL_UTIL_INTERNAL(device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \
OF_PP_PAIR_FIRST(itype_pair), 6)
} // namespace oneflow
......
......@@ -270,7 +270,7 @@ struct BinaryFuncFloorMod<half> final {
#if __CUDA_ARCH__ >= 530
const half trunc_mod = __float2half(fmodf(__half2float(x), __half2float(y)));
return __hne(trunc_mod, GetZeroVal<half>())
&& __hne(__hlt(y, GetZeroVal<half>()), __hlt(trunc_mod, half(0)))
&& __hlt(y, GetZeroVal<half>()) != __hlt(trunc_mod, half(0))
? trunc_mod + y
: trunc_mod;
#else
......
......@@ -60,6 +60,7 @@ SPECIALIZE_XPU_BROADCAST_NDARRAY_UTIL(1);
SPECIALIZE_XPU_BROADCAST_NDARRAY_UTIL(2);
SPECIALIZE_XPU_BROADCAST_NDARRAY_UTIL(3);
SPECIALIZE_XPU_BROADCAST_NDARRAY_UTIL(4);
SPECIALIZE_XPU_BROADCAST_NDARRAY_UTIL(5);
#undef SPECIALIZE_XPU_BROADCAST_NDARRAY_UTIL
#undef IMPLACE_SET_SRC_COORD
......
......@@ -110,6 +110,7 @@ SPECIALIZE_XPU_SHAPE_UTIL(0);
SPECIALIZE_XPU_SHAPE_UTIL(1);
SPECIALIZE_XPU_SHAPE_UTIL(2);
SPECIALIZE_XPU_SHAPE_UTIL(3);
SPECIALIZE_XPU_SHAPE_UTIL(4);
#undef SPECIALIZE_XPU_SHAPE_UTIL
#undef EXTRACT_COORD
#undef COORD_MUL_STRIDE
......
......@@ -156,6 +156,59 @@ def gather(
)
@oneflow_export("flatten")
def flatten(
input: remote_blob_util.BlobDef,
start_dim: int = 0,
end_dim: int = -1,
name: Optional[str] = None,
) -> remote_blob_util.BlobDef:
r"""Flattens a contiguous range of dims in a Blob.
Args:
input: A `Blob`.
start_dim: The first dim to flatten.
end_dim: The last dim to flatten.
name: A name for the operation (optional).
Returns:
A `Blob`, has the same type as `input`.
For example:
.. code-block:: python
import oneflow as flow
import numpy as np
import oneflow.typing as tp
@flow.global_function()
def flatten_Job(input: tp.Numpy.Placeholder(shape=(4, 4, 3, 2), dtype=flow.float32)
) -> tp.Numpy:
flatten_blob = flow.flatten(input, start_dim=1, end_dim=-1)
return flatten_blob
input = np.zeros((4, 4, 3, 2)).astype(np.float32)
out = flatten_Job(input)
# out.shape (4, 24)
"""
if name is None:
name = id_util.UniqueStr("Flatten_")
return (
flow.user_op_builder(name)
.Op("flatten")
.Input("in", [input])
.Output("out")
.Attr("start_dim", start_dim)
.Attr("end_dim", end_dim)
.Build()
.InferAndTryRun()
.RemoteBlobList()[0]
)
def infer_shape(x, shape):
dim_index_need_infer = shape.index(-1) if shape.count(-1) == 1 else None
in_elem_cnt = reduce(operator.mul, x.shape, 1)
......
"""
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import unittest
from collections import OrderedDict
import numpy as np
import oneflow as flow
from test_util import GenArgList
import test_global_storage
def compare_with_numpy(test_case, device_type, input_shape, start_end_dim):
assert device_type in ["gpu", "cpu"]
flow.clear_default_session()
func_config = flow.FunctionConfig()
func_config.default_data_type(flow.float)
start_dim = start_end_dim[0]
end_dim = start_end_dim[1]
@flow.global_function(type="train", function_config=func_config)
def FlattenJob() -> flow.typing.Numpy:
with flow.scope.placement(device_type, "0:0"):
x = flow.get_variable(
"in",
shape=input_shape,
dtype=flow.float,
initializer=flow.random_uniform_initializer(minval=2, maxval=5),
trainable=True,
)
loss = flow.flatten(x, start_dim=start_dim, end_dim=end_dim)
flow.optimizer.SGD(
flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0
).minimize(loss)
flow.watch(x, test_global_storage.Setter("x"))
flow.watch_diff(x, test_global_storage.Setter("x_diff"))
return loss
# OneFlow
check_point = flow.train.CheckPoint()
check_point.init()
of_out = FlattenJob()
# Numpy
of_x = test_global_storage.Get("x")
of_x_shape = of_x.shape
of_x_diff = test_global_storage.Get("x_diff")
true_end_dim = end_dim + len(of_x_shape) if end_dim < 0 else end_dim
new_shape = []
for i in range(0, start_dim):
new_shape.append(of_x_shape[i])
flatten_dim = 1
for i in range(start_dim, true_end_dim + 1):
flatten_dim *= of_x_shape[i]
new_shape.append(flatten_dim)
for i in range(true_end_dim + 1, len(of_x_shape)):
new_shape.append(of_x_shape[i])
np_out = np.reshape(of_x, tuple(new_shape))
test_case.assertTrue(of_out.shape == np_out.shape)
test_case.assertTrue(np.allclose(of_out, np_out, rtol=1e-5, atol=1e-5))
test_case.assertTrue(
np.allclose(of_x_diff, np.ones(of_x_diff.shape), rtol=1e-5, atol=1e-5)
)
@flow.unittest.skip_unless_1n1d()
class TestFlatten(flow.unittest.TestCase):
def test_flatten(test_case):
arg_dict = OrderedDict()
arg_dict["test_case"] = [test_case]
arg_dict["device_type"] = ["gpu", "cpu"]
arg_dict["input_shape"] = [(2, 3, 4, 5)]
arg_dict["start_end_dim"] = [(0, -1), (1, 3), (2, -2)]
for arg in GenArgList(arg_dict):
compare_with_numpy(*arg)
if __name__ == "__main__":
unittest.main()
......@@ -113,6 +113,14 @@ class TestTranspose(flow.unittest.TestCase):
x = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]).astype(np.float32)
transpose_batchaxis_non_change_job(x)
def test_transpose_dim6(test_case):
arg_dict = OrderedDict()
arg_dict["device_type"] = ["gpu", "cpu"]
arg_dict["input_shape"] = [(2, 3, 4, 5, 6, 7)]
arg_dict["perm"] = [(2, 0, 1, 3, 5, 4)]
for arg in GenArgList(arg_dict):
compare_with_tensorflow(*arg)
if __name__ == "__main__":
unittest.main()
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/framework.h"
#include "oneflow/user/kernels/copy_data_content_kernel.h"
namespace oneflow {
#define REGISTER_FLATTEN_KERNEL(device) \
REGISTER_USER_KERNEL("flatten") \
.SetCreateFn<CopyDataContentKernel<device>>() \
.SetIsMatchedHob(user_op::HobDeviceTag() == device) \
.SetInplaceProposalFn([](const user_op::InferContext&, \
user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \
OF_RETURN_IF_ERROR(AddInplaceArgPairFn("out", 0, "in", 0, false)); \
return Maybe<void>::Ok(); \
});
REGISTER_FLATTEN_KERNEL(DeviceType::kCPU)
#ifdef WITH_CUDA
REGISTER_FLATTEN_KERNEL(DeviceType::kGPU)
#endif
} // namespace oneflow
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/framework.h"
namespace oneflow {
namespace {
Maybe<void> GetSbpFn(user_op::SbpContext* ctx) {
const auto& in_shape = ctx->LogicalTensorDesc4InputArgNameAndIndex("in", 0).shape();
const int32_t start_dim = ctx->Attr<int32_t>("start_dim");
const int32_t end_dim = ctx->Attr<int32_t>("end_dim");
CHECK_GE_OR_RETURN(start_dim, 0);
CHECK_LT_OR_RETURN(start_dim, in_shape.NumAxes());
const int32_t true_end_dim = end_dim < 0 ? end_dim + in_shape.NumAxes() : end_dim;
CHECK_GE_OR_RETURN(true_end_dim, 0);
CHECK_LT_OR_RETURN(true_end_dim, in_shape.NumAxes());
CHECK_LE_OR_RETURN(start_dim, true_end_dim);
for (int i = 0; i <= start_dim; ++i) {
ctx->NewBuilder().Split(user_op::OpArg("in", 0), i).Split(user_op::OpArg("out", 0), i).Build();
}
const int32_t diff = true_end_dim - start_dim;
for (int i = true_end_dim + 1; i < in_shape.NumAxes(); ++i) {
ctx->NewBuilder()
.Split(user_op::OpArg("in", 0), i)
.Split(user_op::OpArg("out", 0), i - diff)
.Build();
}
ctx->NewBuilder().PartialSum(ctx->inputs()).PartialSum(ctx->outputs()).Build();
return Maybe<void>::Ok();
}
Maybe<void> TensorDescInferFn(user_op::InferContext* ctx) {
const int32_t start_dim = ctx->Attr<int32_t>("start_dim");
const int32_t end_dim = ctx->Attr<int32_t>("end_dim");
const user_op::TensorDesc* in_tensor_desc = ctx->TensorDesc4ArgNameAndIndex("in", 0);
user_op::TensorDesc* out_tensor_desc = ctx->TensorDesc4ArgNameAndIndex("out", 0);
const Shape& in_shape = in_tensor_desc->shape();
CHECK_GE_OR_RETURN(start_dim, 0);
CHECK_LT_OR_RETURN(start_dim, in_shape.NumAxes());
const int32_t true_end_dim = end_dim < 0 ? end_dim + in_shape.NumAxes() : end_dim;
CHECK_GE_OR_RETURN(true_end_dim, 0);
CHECK_LT_OR_RETURN(true_end_dim, in_shape.NumAxes());
CHECK_LE_OR_RETURN(start_dim, true_end_dim);
*out_tensor_desc = *in_tensor_desc;
Shape* out_shape = out_tensor_desc->mut_shape();
DimVector dim_vec;
for (int i = 0; i < start_dim; ++i) { dim_vec.push_back(in_shape.At(i)); }
int64_t flatten_dim = 1;
for (int i = start_dim; i <= true_end_dim; ++i) { flatten_dim *= in_shape.At(i); }
dim_vec.push_back(flatten_dim);
for (int i = true_end_dim + 1; i < in_shape.NumAxes(); ++i) { dim_vec.push_back(in_shape.At(i)); }
*out_shape = Shape(dim_vec);
CHECK_EQ_OR_RETURN(out_shape->elem_cnt(), in_shape.elem_cnt());
return Maybe<void>::Ok();
}
Maybe<void> GetBatchAxisInferFn(user_op::BatchAxisContext* ctx) {
const int32_t start_dim = ctx->Attr<int32_t>("start_dim");
const int32_t end_dim = ctx->Attr<int32_t>("end_dim");
const auto& in_shape = ctx->LogicalTensorDesc4InputArgNameAndIndex("in", 0).shape();
CHECK_GE_OR_RETURN(start_dim, 0);
CHECK_LT_OR_RETURN(start_dim, in_shape.NumAxes());
const int32_t true_end_dim = end_dim < 0 ? end_dim + in_shape.NumAxes() : end_dim;
CHECK_GE_OR_RETURN(true_end_dim, 0);
CHECK_LT_OR_RETURN(true_end_dim, in_shape.NumAxes());
CHECK_LE_OR_RETURN(start_dim, true_end_dim);
const int64_t input_batch_axis = (*ctx->BatchAxis4ArgNameAndIndex("in", 0)).value();
OptInt64 output_batch_axis;
if (input_batch_axis < start_dim) {
output_batch_axis.set_value(input_batch_axis);
} else if (input_batch_axis >= start_dim && input_batch_axis <= true_end_dim) {
output_batch_axis.set_value(start_dim);
} else if (input_batch_axis > true_end_dim) {
output_batch_axis.set_value(input_batch_axis - (true_end_dim - start_dim));
}
*ctx->BatchAxis4ArgNameAndIndex("out", 0) = output_batch_axis;
return Maybe<void>::Ok();
}
REGISTER_USER_OP("flatten")
.Input("in")
.Output("out")
.Attr<int32_t>("start_dim", 0)
.Attr<int32_t>("end_dim", -1)
.SetTensorDescInferFn(TensorDescInferFn)
.SetGetSbpFn(GetSbpFn)
.SetBatchAxisInferFn(GetBatchAxisInferFn);
REGISTER_USER_OP_GRAD("flatten").SetGenBackwardOpConfFn([](const user_op::UserOpWrapper& op,
user_op::AddOpFn AddOp) {
if (op.NeedGenGradTensor4OpInput("in", 0)) {
user_op::UserOpConfWrapperBuilder builder(op.op_name() + "_grad");
user_op::UserOpConfWrapper reshape_grad_op =
builder.Op("reshape_like")
.Input("in", op.GetGradTensorWithOpOutput("out", 0))
.Input("like", op.input("in", 0))
.Output("out")
.Build();
op.BindGradTensorWithOpInput(reshape_grad_op.output("out", 0), "in", 0);
AddOp(reshape_grad_op);
}
});
} // namespace
} // namespace oneflow
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册