未验证 提交 4d5a3ad6 编写于 作者: 张春乔 提交者: GitHub

remove mlu(#53007)

上级 240e13a2
......@@ -62,7 +62,6 @@ function(op_library TARGET)
set(hip_cc_srcs)
set(xpu_cc_srcs)
set(xpu_kp_cc_srcs)
set(mlu_cc_srcs)
set(cudnn_cu_cc_srcs)
set(miopen_cu_cc_srcs)
set(cudnn_cu_srcs)
......@@ -307,9 +306,8 @@ function(op_library TARGET)
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc source files.
compose_unity_target_sources(
${UNITY_TARGET} cc ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs}
${mlu_cc_srcs})
compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs}
${mkldnn_cc_srcs} ${xpu_cc_srcs})
if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources})
......@@ -325,7 +323,7 @@ function(op_library TARGET)
else()
cc_library(
${TARGET}
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${mlu_cc_srcs}
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
endif()
......@@ -337,7 +335,6 @@ function(op_library TARGET)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
list(LENGTH mlu_cc_srcs mlu_cc_srcs_len)
# Define operators that don't need pybind here.
foreach(
......@@ -562,7 +559,6 @@ function(register_operators)
"*_op.cc")
string(REPLACE "_mkldnn" "" OPS "${OPS}")
string(REPLACE "_xpu" "" OPS "${OPS}")
string(REPLACE "_mlu" "" OPS "${OPS}")
string(REPLACE ".cc" "" OPS "${OPS}")
list(REMOVE_DUPLICATES OPS)
list(LENGTH register_operators_DEPS register_operators_DEPS_len)
......
......@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_mlu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place)) {
......
......@@ -96,7 +96,6 @@ inline phi::DataType GetDtypeWithPlace(
is_right_place = (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_mlu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place));
......
......@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_mlu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place) ||
......
......@@ -72,8 +72,6 @@ inline LibraryType StringToLibraryType(const char* ctype) {
return LibraryType::kPlain;
} else if (s == std::string("CUDA")) {
return LibraryType::kPlain;
} else if (s == std::string("MLU")) {
return LibraryType::kPlain;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unknown LibraryType string (%s), only support library type string "
......
......@@ -276,7 +276,6 @@ void TensorCopyImpl(const TENSOR& src,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place) ||
platform::is_mlu_place(dst_place) ||
platform::is_custom_place(dst_place)) {
dev_ctx = pool.Get(dst_place);
} else {
......@@ -615,7 +614,6 @@ void TensorFromStream(std::istream& is,
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) ||
platform::is_mlu_place(dev_ctx.GetPlace()) ||
platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
......@@ -691,7 +689,6 @@ void TensorFromStream(std::istream& is,
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) ||
platform::is_mlu_place(dev_ctx.GetPlace()) ||
platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
......
......@@ -166,5 +166,4 @@ cc_library(
var_type_traits
layer
math_function
phi_tensor
${MLU_DEPS})
phi_tensor)
......@@ -52,7 +52,6 @@ OpSupportedInfos(const std::string& place,
{"CPU", &platform::is_cpu_place},
{"XPU", &platform::is_xpu_place},
{"NPU", &platform::is_npu_place},
{"MLU", &platform::is_mlu_place},
};
PADDLE_ENFORCE_NE(is_target_place.count(query_place),
0,
......@@ -245,7 +244,6 @@ inline bool NeedCast(const std::shared_ptr<VarType>& var) {
if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_mlu_place(place) ||
paddle::platform::is_custom_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place)) {
......
......@@ -66,9 +66,6 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) {
#ifdef PADDLE_WITH_XPU_BKCL
// TODO(liuyuhui) support xpu about div nranks in the future
#endif
} else if (platform::is_mlu_place(tensor->place())) {
// TODO(zhangna)
VLOG(4) << "divnrank for mlu not support yet";
}
}
......
......@@ -61,8 +61,7 @@ class CCommInitOp : public framework::OperatorBase {
#endif
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(place) || platform::is_xpu_place(place) ||
platform::is_mlu_place(place),
platform::is_gpu_place(place) || platform::is_xpu_place(place),
true,
platform::errors::PreconditionNotMet(
"CCommInitOp can run on gpu or xpu or mlu place only."));
......
......@@ -76,7 +76,6 @@ phi::KernelKey GetReduceExpectedKernelType(
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_mlu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()),
true,
......
......@@ -41,8 +41,7 @@ inline std::vector<int> get_new_shape(
"The shape of dimension tensor should be [1],"
"but received d%.",
tensor->dims()));
if (platform::is_gpu_place(tensor->place()) ||
platform::is_mlu_place(tensor->place())) {
if (platform::is_gpu_place(tensor->place())) {
phi::DenseTensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
......@@ -60,8 +59,7 @@ inline std::vector<T> get_new_data_from_tensor(
std::vector<T> vec_new_data;
auto* new_data = new_data_tensor->data<T>();
phi::DenseTensor cpu_starts_tensor;
if (platform::is_gpu_place(new_data_tensor->place()) ||
platform::is_mlu_place(new_data_tensor->place())) {
if (platform::is_gpu_place(new_data_tensor->place())) {
paddle::framework::TensorCopySync(
*new_data_tensor, platform::CPUPlace(), &cpu_starts_tensor);
new_data = cpu_starts_tensor.data<T>();
......
......@@ -99,7 +99,6 @@ BufferedReader::BufferedReader(
cpu_buffer_.resize(buffer_size);
cuda_buffer_.resize(buffer_size);
npu_buffer_.resize(buffer_size);
mlu_buffer_.resize(buffer_size);
xpu_buffer_.resize(buffer_size);
custom_device_buffer_.resize(buffer_size);
ReadTillBufferFullAsync();
......@@ -387,8 +386,6 @@ void BufferedReader::ReadNextImpl(paddle::framework::LoDTensorArray *out) {
*out = std::move(cuda_buffer_[i]);
} else if (platform::is_npu_place(place_)) {
*out = std::move(npu_buffer_[i]);
} else if (platform::is_mlu_place(place_)) {
*out = std::move(mlu_buffer_[i]);
} else if (platform::is_xpu_place(place_)) {
*out = std::move(xpu_buffer_[i]);
} else if (platform::is_custom_place(place_)) {
......
......@@ -78,7 +78,6 @@ class BufferedReader : public framework::DecoratedReader {
std::vector<TensorVec> cpu_buffer_;
std::vector<TensorVec> cuda_buffer_;
std::vector<TensorVec> npu_buffer_;
std::vector<TensorVec> mlu_buffer_;
std::vector<TensorVec> xpu_buffer_;
std::vector<TensorVec> custom_device_buffer_;
size_t prev_pos_{-1UL};
......
......@@ -625,7 +625,6 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_mlu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()),
true,
......
......@@ -45,7 +45,6 @@ class SoftmaxOp : public framework::OperatorWithKernel {
platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_mlu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()),
true,
platform::errors::InvalidArgument(
......@@ -131,7 +130,6 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
if (!(platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_mlu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace())))
PADDLE_THROW(platform::errors::InvalidArgument(
"float16 can only be used on GPU/NPU/XPU/MLU and custom place"));
......
......@@ -30,7 +30,6 @@ inline std::vector<int> get_repeat_times(
phi::DenseTensor cpu_repeat_tensor;
if (platform::is_gpu_place(repeat_tensor->place()) ||
platform::is_xpu_place(repeat_tensor->place()) ||
platform::is_mlu_place(repeat_tensor->place()) ||
platform::is_npu_place(repeat_tensor->place())) {
paddle::framework::TensorCopySync(
*repeat_tensor, platform::CPUPlace(), &cpu_repeat_tensor);
......@@ -50,7 +49,6 @@ inline std::vector<int> get_repeat_times(
auto tensor = list_repeat_times_tensor[i];
if (platform::is_gpu_place(tensor->place()) ||
platform::is_xpu_place(tensor->place()) ||
platform::is_mlu_place(tensor->place()) ||
platform::is_npu_place(tensor->place())) {
phi::DenseTensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
......
......@@ -129,7 +129,6 @@ cc_library(
dlpack
cudnn_workspace_helper
${XPU_CTX_DEPS}
${MLU_CTX_DEPS}
phi_backends
phi_device_context
generator
......
......@@ -47,8 +47,6 @@ DeviceType Place2DeviceType(const platform::Place& place) {
return platform::DeviceType::IPU;
} else if (platform::is_npu_place(place)) {
return platform::DeviceType::NPU;
} else if (platform::is_mlu_place(place)) {
return platform::DeviceType::MLU;
} else if (platform::is_custom_place(place)) {
return platform::DeviceType::CUSTOM_DEVICE;
} else {
......
......@@ -109,7 +109,6 @@ constexpr DeviceType kCUDA = DeviceType::CUDA;
constexpr DeviceType kXPU = DeviceType::XPU;
constexpr DeviceType kNPU = DeviceType::NPU;
constexpr DeviceType kIPU = DeviceType::IPU;
constexpr DeviceType kMLU = DeviceType::MLU;
constexpr DeviceType kCUSTOM_DEVICE = DeviceType::CUSTOM_DEVICE;
using DeviceContext = phi::DeviceContext;
......
......@@ -46,21 +46,3 @@ DEFINE_INT_STATUS(STAT_npu4_mem_size)
DEFINE_INT_STATUS(STAT_npu5_mem_size)
DEFINE_INT_STATUS(STAT_npu6_mem_size)
DEFINE_INT_STATUS(STAT_npu7_mem_size)
// For MLU
DEFINE_INT_STATUS(STAT_mlu0_mem_size)
DEFINE_INT_STATUS(STAT_mlu1_mem_size)
DEFINE_INT_STATUS(STAT_mlu2_mem_size)
DEFINE_INT_STATUS(STAT_mlu3_mem_size)
DEFINE_INT_STATUS(STAT_mlu4_mem_size)
DEFINE_INT_STATUS(STAT_mlu5_mem_size)
DEFINE_INT_STATUS(STAT_mlu6_mem_size)
DEFINE_INT_STATUS(STAT_mlu7_mem_size)
DEFINE_INT_STATUS(STAT_mlu8_mem_size)
DEFINE_INT_STATUS(STAT_mlu9_mem_size)
DEFINE_INT_STATUS(STAT_mlu10_mem_size)
DEFINE_INT_STATUS(STAT_mlu11_mem_size)
DEFINE_INT_STATUS(STAT_mlu12_mem_size)
DEFINE_INT_STATUS(STAT_mlu13_mem_size)
DEFINE_INT_STATUS(STAT_mlu14_mem_size)
DEFINE_INT_STATUS(STAT_mlu15_mem_size)
......@@ -200,21 +200,3 @@ class StatRegistry {
USE_INT_STAT(STAT_npu5_mem_size); \
USE_INT_STAT(STAT_npu6_mem_size); \
USE_INT_STAT(STAT_npu7_mem_size)
#define USE_MLU_MEM_STAT \
USE_INT_STAT(STAT_mlu0_mem_size); \
USE_INT_STAT(STAT_mlu1_mem_size); \
USE_INT_STAT(STAT_mlu2_mem_size); \
USE_INT_STAT(STAT_mlu3_mem_size); \
USE_INT_STAT(STAT_mlu4_mem_size); \
USE_INT_STAT(STAT_mlu5_mem_size); \
USE_INT_STAT(STAT_mlu6_mem_size); \
USE_INT_STAT(STAT_mlu7_mem_size); \
USE_INT_STAT(STAT_mlu8_mem_size); \
USE_INT_STAT(STAT_mlu9_mem_size); \
USE_INT_STAT(STAT_mlu10_mem_size); \
USE_INT_STAT(STAT_mlu11_mem_size); \
USE_INT_STAT(STAT_mlu12_mem_size); \
USE_INT_STAT(STAT_mlu13_mem_size); \
USE_INT_STAT(STAT_mlu14_mem_size); \
USE_INT_STAT(STAT_mlu15_mem_size)
......@@ -33,10 +33,6 @@ bool is_xpu_place(const Place &p) {
return p.GetType() == phi::AllocationType::XPU;
}
bool is_mlu_place(const Place &p) {
return p.GetType() == phi::AllocationType::MLU;
}
bool is_npu_place(const Place &p) {
return p.GetType() == phi::AllocationType::NPU;
}
......@@ -77,8 +73,6 @@ bool is_same_place(const Place &p1, const Place &p2) {
return true;
} else if (is_xpu_place(p1)) {
return p1 == p2;
} else if (is_mlu_place(p1)) {
return p1 == p2;
} else if (is_npu_place(p1)) {
return p1 == p2;
} else if (is_ipu_place(p1)) {
......
......@@ -48,7 +48,6 @@ class PlaceHelper {
bool is_gpu_place(const Place &);
bool is_xpu_place(const Place &);
bool is_npu_place(const Place &);
bool is_mlu_place(const Place &);
bool is_ipu_place(const Place &);
bool is_cpu_place(const Place &);
bool is_cuda_pinned_place(const Place &);
......
......@@ -257,7 +257,6 @@ void ChromeTracingLogger::LogHostTraceEventNode(
case TracerEventType::UserDefined:
case TracerEventType::OperatorInner:
case TracerEventType::Communication:
case TracerEventType::MluRuntime:
case TracerEventType::NumTypes:
default:
output_file_stream_ << string_format(
......
......@@ -34,7 +34,6 @@ namespace platform {
static constexpr uint32_t kProfileCPUOptionBit = 0;
static constexpr uint32_t kProfileGPUOptionBit = 1;
static constexpr uint32_t kProfileMLUOptionBit = 2;
static constexpr uint32_t kProfileCustomDeviceOptionBit = 3;
void SynchronizeDevice();
......
......@@ -165,8 +165,7 @@ const char* StringTracerEventType(TracerEventType type) {
"Optimization",
"Communication",
"PythonOp",
"PythonUserDefined",
"MluRuntime"};
"PythonUserDefined"};
return categary_name_[static_cast<int>(type)];
}
......
......@@ -656,8 +656,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
[](platform::Place &self) {
return platform::is_cuda_pinned_place(self);
})
.def("is_mlu_place",
[](platform::Place &self) { return platform::is_mlu_place(self); })
.def(
"is_custom_place",
[](platform::Place &self) { return platform::is_custom_place(self); })
......@@ -665,7 +663,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
.def("xpu_device_id", [](platform::Place &self) { return self.device; })
.def("npu_device_id", [](platform::Place &self) { return self.device; })
.def("ipu_device_id", [](platform::Place &self) { return self.device; })
.def("mlu_device_id", [](platform::Place &self) { return self.device; })
.def("custom_device_id",
[](platform::Place &self) { return self.device; })
.def("set_place",
......
......@@ -401,7 +401,7 @@ void BindTensor(pybind11::module &m) { // NOLINT
Args:
lod (numpy.ndarray): The data to set.
place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace|NPUPlace|MLUPlace): The place where the
place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace|NPUPlace): The place where the
Tensor is to be set.
zero_copy (bool, optional): Whether to share memory with the input numpy array.
This parameter only works with CPUPlace. Default: False.
......
......@@ -961,7 +961,6 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
bool is_gpu_tensor = platform::is_gpu_place(tensor.place());
bool is_xpu_tensor = platform::is_xpu_place(tensor.place());
bool is_npu_tensor = platform::is_npu_place(tensor.place());
bool is_mlu_tensor = platform::is_mlu_place(tensor.place());
bool is_custom_device_tensor = platform::is_custom_place(tensor.place());
const auto &tensor_dims = tensor.dims();
auto tensor_dtype = framework::TransToProtoVarType(tensor.dtype());
......@@ -982,7 +981,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(
framework::TransToProtoVarType(tensor.dtype()));
if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor && !is_mlu_tensor &&
if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor &&
!is_custom_device_tensor) {
if (!need_deep_copy) {
auto base = py::cast(std::move(tensor));
......
......@@ -95,9 +95,6 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case Backend::NPU:
os << "NPU";
break;
case Backend::MLU:
os << "MLU";
break;
case Backend::ONEDNN:
os << "ONEDNN";
break;
......@@ -143,8 +140,6 @@ inline Backend StringToBackend(const char* backend_cstr) {
return Backend::XPU;
} else if (s == std::string("NPU")) {
return Backend::NPU;
} else if (s == std::string("MLU")) {
return Backend::MLU;
} else if (s == std::string("OneDNN")) {
return Backend::ONEDNN;
} else if (s == std::string("GPUDNN")) {
......@@ -181,8 +176,6 @@ inline std::string BackendToString(const Backend& backend) {
return "XPU";
case Backend::NPU:
return "NPU";
case Backend::MLU:
return "MLU";
case Backend::ONEDNN:
return "ONEDNN";
case Backend::GPUDNN:
......
......@@ -41,8 +41,6 @@ const char *AllocationTypeStr(AllocationType type) {
return "npu_pinned";
case AllocationType::IPU:
return "ipu";
case AllocationType::MLU:
return "mlu";
default:
PD_THROW("Invalid phi device type.");
return {};
......
......@@ -41,8 +41,6 @@ Backend TransToPhiBackend(const phi::Place& place) {
return Backend::NPU;
case AllocationType::IPU:
return Backend::IPU;
case AllocationType::MLU:
return Backend::MLU;
case AllocationType::CUSTOM:
return static_cast<Backend>(
static_cast<size_t>(Backend::NUM_BACKENDS) +
......
......@@ -108,8 +108,6 @@ def _get_default_nprocs():
return core.get_cuda_device_count()
elif 'xpu' in device:
return core.get_xpu_device_count()
elif 'mlu' in device:
return core.get_custom_device_count('mlu')
elif 'cpu' in device:
return multiprocessing.cpu_count()
else:
......
......@@ -2734,9 +2734,7 @@ def cross_entropy(
valid_label = (
paddle.cast(label != ignore_index, dtype=label.dtype) * label
)
if core.is_compiled_with_custom_device(
"npu"
) or core.is_compiled_with_custom_device("mlu"):
if core.is_compiled_with_custom_device("npu"):
if not soft_label:
_, out = _legacy_C_ops.softmax_with_cross_entropy(
input,
......
......@@ -499,8 +499,6 @@ class Profiler:
profileoption.trace_switch |= 1
if ProfilerTarget.GPU in self.targets:
profileoption.trace_switch |= 1 << 1
if ProfilerTarget.MLU in self.targets:
profileoption.trace_switch |= 1 << 2
if ProfilerTarget.CUSTOM_DEVICE in self.targets:
profileoption.trace_switch |= 1 << 3
if not custom_device_types:
......
......@@ -343,7 +343,6 @@ class PRChecker:
elif (
('/xpu/' in filename.lower())
or ('/npu/' in filename.lower())
or ('/mlu/' in filename.lower())
or ('/ipu/' in filename.lower())
):
filterFiles.append(filename)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册