提交 edc5c06e 编写于 作者: H hong19860320 提交者: GitHub

[LITE][XPU] Supporting llvm and xpu device target (#2711)

上级 f81222da
......@@ -36,8 +36,11 @@ std::unique_ptr<xtcl::network::xRuntimeInstance> Device::Build(
}
xtcl::xNetwork network =
builder->FinalizeNetwork(xtcl::relay::TupleNode::make(all_outs));
auto target = xtcl::Target::Create(device_name_);
auto compiler = xtcl::network::xTensorCompiler(network, target);
auto target = xtcl::NullValue<xtcl::Target>();
if (!target_.empty()) {
target = xtcl::Target::Create(target_);
}
xtcl::network::xTensorCompiler compiler(network, target);
compiler.SetParams(*params); // Set the data of constant tensors
compiler.Build();
VLOG(3) << "[XPU] Build done";
......
......@@ -15,6 +15,7 @@
#pragma once
#include <xtcl/xtcl.h>
#include <cstdlib>
#include <memory>
#include <string>
#include <utility>
......@@ -30,7 +31,18 @@ class Device {
static Device x;
return x;
}
Device() {}
Device() {
char* name = std::getenv("XPU_DEVICE_NAME");
if (name) {
name_ = std::string(name);
}
// XPU_DEVICE_TARGET for XPU model building, which supports 'llvm' and 'xpu
// -libs=xdnn'
char* target = std::getenv("XPU_DEVICE_TARGET");
if (target) {
target_ = std::string(target);
}
}
// Build the XPU graph to the XPU runtime, return the XPU runtime which can be
// used to run inference.
......@@ -39,10 +51,12 @@ class Device {
xtcl::network::xTensorCompiler::ParamNDArrayMap* params,
std::vector<xtcl::xExpr*>* outputs);
const std::string name() const { return name_; }
const std::string target() const { return target_; }
private:
// Keep reserved fields
int device_id_{0};
std::string device_name_{"llvm"};
std::string name_{""};
std::string target_{""};
};
} // namespace xpu
......
......@@ -103,7 +103,7 @@ DLDeviceType CvtDLDeviceType(TargetType in_type) {
out_type = kDLGPU;
break;
case TARGET(kXPU):
out_type = kDLCPU;
out_type = static_cast<DLDeviceType>(kDLXPU);
break;
default:
LOG(FATAL) << "[XPU] Can not convert target type(" << TargetToStr(in_type)
......
......@@ -175,7 +175,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
// Update the data pointer of DLTensor to track the origin input tensors
device_itensors_[i].data =
const_cast<void*>(origin_itensors_[i]->raw_data());
device_program_->SetInputZeroCopy(device_inames_[i], &device_itensors_[i]);
device_program_->SetInput(device_inames_[i], &device_itensors_[i]);
}
// Run the XPU model
auto GetCurrentUS = []() -> double {
......
......@@ -104,6 +104,11 @@ function main {
build_xpu
shift
;;
full_publish)
TARGET_NAME=publish_inference
build_xpu
shift
;;
*)
# unknown option
print_usage
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册