提交 59ea536d 编写于 作者: X xiaojun.lin

MS-565 1. del wrapper/knowhere


Former-commit-id: 3905da5c0716c6fe8317588c27a61b4a5c36316e
上级 ed105628
......@@ -35,7 +35,7 @@ aux_source_directory(db/engine db_engine_files)
aux_source_directory(db/insert db_insert_files)
aux_source_directory(db/meta db_meta_files)
aux_source_directory(metrics metrics_files)
aux_source_directory(wrapper/knowhere knowhere_files)
aux_source_directory(wrapper wrapper_files)
aux_source_directory(scheduler/action scheduler_action_files)
aux_source_directory(scheduler/event scheduler_event_files)
......@@ -88,7 +88,7 @@ set(db_files
${db_meta_files}
${db_scheduler_files}
${metrics_files}
${knowhere_files}
${wrapper_files}
${utils_files}
)
......
......@@ -18,7 +18,7 @@
#pragma once
#include "wrapper/knowhere/vec_index.h"
#include "src/wrapper/vec_index.h"
#include <memory>
......
......@@ -23,8 +23,8 @@
#include "utils/CommonUtil.h"
#include "utils/Exception.h"
#include "wrapper/knowhere/vec_index.h"
#include "wrapper/knowhere/vec_impl.h"
#include "src/wrapper/vec_index.h"
#include "src/wrapper/vec_impl.h"
#include "knowhere/common/exception.h"
#include <stdexcept>
......
......@@ -18,7 +18,7 @@
#pragma once
#include "ExecutionEngine.h"
#include "wrapper/knowhere/vec_index.h"
#include "src/wrapper/vec_index.h"
#include <memory>
#include <string>
......
......@@ -32,7 +32,7 @@
#include <unistd.h>
#include <string.h>
#include <src/scheduler/SchedInst.h>
#include "wrapper/knowhere/KnowhereResource.h"
#include "src/wrapper/KnowhereResource.h"
#include "metrics/Metrics.h"
#include "DBWrapper.h"
......
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <cstddef>
#include <limits>
#include <cstddef>
namespace zilliz {
namespace milvus {
namespace engine {
using Bool = int8_t;
using Byte = uint8_t;
using Word = unsigned long;
using EnumType = uint64_t;
using Float32 = float;
using Float64 = double;
constexpr bool kBoolMax = std::numeric_limits<bool>::max();
constexpr bool kBoolMin = std::numeric_limits<bool>::lowest();
constexpr int8_t kInt8Max = std::numeric_limits<int8_t>::max();
constexpr int8_t kInt8Min = std::numeric_limits<int8_t>::lowest();
constexpr int16_t kInt16Max = std::numeric_limits<int16_t>::max();
constexpr int16_t kInt16Min = std::numeric_limits<int16_t>::lowest();
constexpr int32_t kInt32Max = std::numeric_limits<int32_t>::max();
constexpr int32_t kInt32Min = std::numeric_limits<int32_t>::lowest();
constexpr int64_t kInt64Max = std::numeric_limits<int64_t>::max();
constexpr int64_t kInt64Min = std::numeric_limits<int64_t>::lowest();
constexpr float kFloatMax = std::numeric_limits<float>::max();
constexpr float kFloatMin = std::numeric_limits<float>::lowest();
constexpr double kDoubleMax = std::numeric_limits<double>::max();
constexpr double kDoubleMin = std::numeric_limits<double>::lowest();
constexpr uint32_t kFloat32DecimalPrecision = std::numeric_limits<Float32>::digits10;
constexpr uint32_t kFloat64DecimalPrecision = std::numeric_limits<Float64>::digits10;
constexpr uint8_t kByteWidth = 8;
constexpr uint8_t kCharWidth = kByteWidth;
constexpr uint8_t kWordWidth = sizeof(Word) * kByteWidth;
constexpr uint8_t kEnumTypeWidth = sizeof(EnumType) * kByteWidth;
template<typename T>
inline size_t
WidthOf() { return sizeof(T) << 3; }
template<typename T>
inline size_t
WidthOf(const T &) { return sizeof(T) << 3; }
}
} // namespace lib
} // namespace zilliz
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "faiss/FaissAssert.h"
#include "faiss/gpu/utils/Limits.cuh"
#include "Arithmetic.h"
namespace faiss {
namespace gpu {
constexpr bool kBoolMax = zilliz::milvus::engine::kBoolMax;
constexpr bool kBoolMin = zilliz::milvus::engine::kBoolMin;
template<>
struct Limits<bool> {
static __device__ __host__
inline bool getMin() {
return kBoolMin;
}
static __device__ __host__
inline bool getMax() {
return kBoolMax;
}
};
constexpr int8_t kInt8Max = zilliz::milvus::engine::kInt8Max;
constexpr int8_t kInt8Min = zilliz::milvus::engine::kInt8Min;
template<>
struct Limits<int8_t> {
static __device__ __host__
inline int8_t getMin() {
return kInt8Min;
}
static __device__ __host__
inline int8_t getMax() {
return kInt8Max;
}
};
constexpr int16_t kInt16Max = zilliz::milvus::engine::kInt16Max;
constexpr int16_t kInt16Min = zilliz::milvus::engine::kInt16Min;
template<>
struct Limits<int16_t> {
static __device__ __host__
inline int16_t getMin() {
return kInt16Min;
}
static __device__ __host__
inline int16_t getMax() {
return kInt16Max;
}
};
constexpr int64_t kInt64Max = zilliz::milvus::engine::kInt64Max;
constexpr int64_t kInt64Min = zilliz::milvus::engine::kInt64Min;
template<>
struct Limits<int64_t> {
static __device__ __host__
inline int64_t getMin() {
return kInt64Min;
}
static __device__ __host__
inline int64_t getMax() {
return kInt64Max;
}
};
constexpr double kDoubleMax = zilliz::milvus::engine::kDoubleMax;
constexpr double kDoubleMin = zilliz::milvus::engine::kDoubleMin;
template<>
struct Limits<double> {
static __device__ __host__
inline double getMin() {
return kDoubleMin;
}
static __device__ __host__
inline double getMax() {
return kDoubleMax;
}
};
}
}
#include "faiss/gpu/utils/DeviceUtils.h"
#include "faiss/gpu/utils/MathOperators.cuh"
#include "faiss/gpu/utils/Pair.cuh"
#include "faiss/gpu/utils/Reductions.cuh"
#include "faiss/gpu/utils/Select.cuh"
#include "faiss/gpu/utils/Tensor.cuh"
#include "faiss/gpu/utils/StaticUtils.h"
#include "Topk.h"
namespace zilliz {
namespace milvus {
namespace engine {
namespace gpu {
constexpr int kWarpSize = 32;
template<typename T, int Dim, bool InnerContig>
using Tensor = faiss::gpu::Tensor<T, Dim, InnerContig>;
template<typename T, typename U>
using Pair = faiss::gpu::Pair<T, U>;
// select kernel for k == 1
template<typename T, int kRowsPerBlock, int kBlockSize>
__global__ void topkSelectMin1(Tensor<T, 2, true> productDistances,
Tensor<T, 2, true> outDistances,
Tensor<int64_t, 2, true> outIndices) {
// Each block handles kRowsPerBlock rows of the distances (results)
Pair<T, int64_t> threadMin[kRowsPerBlock];
__shared__
Pair<T, int64_t> blockMin[kRowsPerBlock * (kBlockSize / kWarpSize)];
T distance[kRowsPerBlock];
#pragma unroll
for (int i = 0; i < kRowsPerBlock; ++i) {
threadMin[i].k = faiss::gpu::Limits<T>::getMax();
threadMin[i].v = -1;
}
// blockIdx.x: which chunk of rows we are responsible for updating
int rowStart = blockIdx.x * kRowsPerBlock;
// FIXME: if we have exact multiples, don't need this
bool endRow = (blockIdx.x == gridDim.x - 1);
if (endRow) {
if (productDistances.getSize(0) % kRowsPerBlock == 0) {
endRow = false;
}
}
if (endRow) {
for (int row = rowStart; row < productDistances.getSize(0); ++row) {
for (int col = threadIdx.x; col < productDistances.getSize(1);
col += blockDim.x) {
distance[0] = productDistances[row][col];
if (faiss::gpu::Math<T>::lt(distance[0], threadMin[0].k)) {
threadMin[0].k = distance[0];
threadMin[0].v = col;
}
}
// Reduce within the block
threadMin[0] =
faiss::gpu::blockReduceAll<Pair<T, int64_t>, faiss::gpu::Min<Pair<T, int64_t> >, false, false>(
threadMin[0], faiss::gpu::Min<Pair<T, int64_t> >(), blockMin);
if (threadIdx.x == 0) {
outDistances[row][0] = threadMin[0].k;
outIndices[row][0] = threadMin[0].v;
}
// so we can use the shared memory again
__syncthreads();
threadMin[0].k = faiss::gpu::Limits<T>::getMax();
threadMin[0].v = -1;
}
} else {
for (int col = threadIdx.x; col < productDistances.getSize(1);
col += blockDim.x) {
#pragma unroll
for (int row = 0; row < kRowsPerBlock; ++row) {
distance[row] = productDistances[rowStart + row][col];
}
#pragma unroll
for (int row = 0; row < kRowsPerBlock; ++row) {
if (faiss::gpu::Math<T>::lt(distance[row], threadMin[row].k)) {
threadMin[row].k = distance[row];
threadMin[row].v = col;
}
}
}
// Reduce within the block
faiss::gpu::blockReduceAll<kRowsPerBlock, Pair<T, int64_t>, faiss::gpu::Min<Pair<T, int64_t> >, false, false>(
threadMin, faiss::gpu::Min<Pair<T, int64_t> >(), blockMin);
if (threadIdx.x == 0) {
#pragma unroll
for (int row = 0; row < kRowsPerBlock; ++row) {
outDistances[rowStart + row][0] = threadMin[row].k;
outIndices[rowStart + row][0] = threadMin[row].v;
}
}
}
}
// L2 + select kernel for k > 1, no re-use of ||c||^2
template<typename T, int NumWarpQ, int NumThreadQ, int ThreadsPerBlock>
__global__ void topkSelectMinK(Tensor<T, 2, true> productDistances,
Tensor<T, 2, true> outDistances,
Tensor<int64_t, 2, true> outIndices,
int k, T initK) {
// Each block handles a single row of the distances (results)
constexpr int kNumWarps = ThreadsPerBlock / kWarpSize;
__shared__
T smemK[kNumWarps * NumWarpQ];
__shared__
int64_t smemV[kNumWarps * NumWarpQ];
faiss::gpu::BlockSelect<T, int64_t, false, faiss::gpu::Comparator<T>,
NumWarpQ, NumThreadQ, ThreadsPerBlock>
heap(initK, -1, smemK, smemV, k);
int row = blockIdx.x;
// Whole warps must participate in the selection
int limit = faiss::gpu::utils::roundDown(productDistances.getSize(1), kWarpSize);
int i = threadIdx.x;
for (; i < limit; i += blockDim.x) {
T v = productDistances[row][i];
heap.add(v, i);
}
if (i < productDistances.getSize(1)) {
T v = productDistances[row][i];
heap.addThreadQ(v, i);
}
heap.reduce();
for (int i = threadIdx.x; i < k; i += blockDim.x) {
outDistances[row][i] = smemK[i];
outIndices[row][i] = smemV[i];
}
}
// FIXME: no TVec specialization
template<typename T>
void runTopKSelectMin(Tensor<T, 2, true> &productDistances,
Tensor<T, 2, true> &outDistances,
Tensor<int64_t, 2, true> &outIndices,
int k,
cudaStream_t stream) {
FAISS_ASSERT(productDistances.getSize(0) == outDistances.getSize(0));
FAISS_ASSERT(productDistances.getSize(0) == outIndices.getSize(0));
FAISS_ASSERT(outDistances.getSize(1) == k);
FAISS_ASSERT(outIndices.getSize(1) == k);
FAISS_ASSERT(k <= 1024);
if (k == 1) {
constexpr int kThreadsPerBlock = 256;
constexpr int kRowsPerBlock = 8;
auto block = dim3(kThreadsPerBlock);
auto grid = dim3(faiss::gpu::utils::divUp(outDistances.getSize(0), kRowsPerBlock));
topkSelectMin1<T, kRowsPerBlock, kThreadsPerBlock>
<< < grid, block, 0, stream >> > (productDistances, outDistances, outIndices);
} else {
constexpr int kThreadsPerBlock = 128;
auto block = dim3(kThreadsPerBlock);
auto grid = dim3(outDistances.getSize(0));
#define RUN_TOPK_SELECT_MIN(NUM_WARP_Q, NUM_THREAD_Q) \
do { \
topkSelectMinK<T, NUM_WARP_Q, NUM_THREAD_Q, kThreadsPerBlock> \
<<<grid, block, 0, stream>>>(productDistances, \
outDistances, outIndices, \
k, faiss::gpu::Limits<T>::getMax()); \
} while (0)
if (k <= 32) {
RUN_TOPK_SELECT_MIN(32, 2);
} else if (k <= 64) {
RUN_TOPK_SELECT_MIN(64, 3);
} else if (k <= 128) {
RUN_TOPK_SELECT_MIN(128, 3);
} else if (k <= 256) {
RUN_TOPK_SELECT_MIN(256, 4);
} else if (k <= 512) {
RUN_TOPK_SELECT_MIN(512, 8);
} else if (k <= 1024) {
RUN_TOPK_SELECT_MIN(1024, 8);
} else {
FAISS_ASSERT(false);
}
}
CUDA_TEST_ERROR();
}
////////////////////////////////////////////////////////////
// select kernel for k == 1
template<typename T, int kRowsPerBlock, int kBlockSize>
__global__ void topkSelectMax1(Tensor<T, 2, true> productDistances,
Tensor<T, 2, true> outDistances,
Tensor<int64_t, 2, true> outIndices) {
// Each block handles kRowsPerBlock rows of the distances (results)
Pair<T, int64_t> threadMax[kRowsPerBlock];
__shared__
Pair<T, int64_t> blockMax[kRowsPerBlock * (kBlockSize / kWarpSize)];
T distance[kRowsPerBlock];
#pragma unroll
for (int i = 0; i < kRowsPerBlock; ++i) {
threadMax[i].k = faiss::gpu::Limits<T>::getMin();
threadMax[i].v = -1;
}
// blockIdx.x: which chunk of rows we are responsible for updating
int rowStart = blockIdx.x * kRowsPerBlock;
// FIXME: if we have exact multiples, don't need this
bool endRow = (blockIdx.x == gridDim.x - 1);
if (endRow) {
if (productDistances.getSize(0) % kRowsPerBlock == 0) {
endRow = false;
}
}
if (endRow) {
for (int row = rowStart; row < productDistances.getSize(0); ++row) {
for (int col = threadIdx.x; col < productDistances.getSize(1);
col += blockDim.x) {
distance[0] = productDistances[row][col];
if (faiss::gpu::Math<T>::gt(distance[0], threadMax[0].k)) {
threadMax[0].k = distance[0];
threadMax[0].v = col;
}
}
// Reduce within the block
threadMax[0] =
faiss::gpu::blockReduceAll<Pair<T, int64_t>, faiss::gpu::Max<Pair<T, int64_t> >, false, false>(
threadMax[0], faiss::gpu::Max<Pair<T, int64_t> >(), blockMax);
if (threadIdx.x == 0) {
outDistances[row][0] = threadMax[0].k;
outIndices[row][0] = threadMax[0].v;
}
// so we can use the shared memory again
__syncthreads();
threadMax[0].k = faiss::gpu::Limits<T>::getMin();
threadMax[0].v = -1;
}
} else {
for (int col = threadIdx.x; col < productDistances.getSize(1);
col += blockDim.x) {
#pragma unroll
for (int row = 0; row < kRowsPerBlock; ++row) {
distance[row] = productDistances[rowStart + row][col];
}
#pragma unroll
for (int row = 0; row < kRowsPerBlock; ++row) {
if (faiss::gpu::Math<T>::gt(distance[row], threadMax[row].k)) {
threadMax[row].k = distance[row];
threadMax[row].v = col;
}
}
}
// Reduce within the block
faiss::gpu::blockReduceAll<kRowsPerBlock, Pair<T, int64_t>, faiss::gpu::Max<Pair<T, int64_t> >, false, false>(
threadMax, faiss::gpu::Max<Pair<T, int64_t> >(), blockMax);
if (threadIdx.x == 0) {
#pragma unroll
for (int row = 0; row < kRowsPerBlock; ++row) {
outDistances[rowStart + row][0] = threadMax[row].k;
outIndices[rowStart + row][0] = threadMax[row].v;
}
}
}
}
// L2 + select kernel for k > 1, no re-use of ||c||^2
template<typename T, int NumWarpQ, int NumThreadQ, int ThreadsPerBlock>
__global__ void topkSelectMaxK(Tensor<T, 2, true> productDistances,
Tensor<T, 2, true> outDistances,
Tensor<int64_t, 2, true> outIndices,
int k, T initK) {
// Each block handles a single row of the distances (results)
constexpr int kNumWarps = ThreadsPerBlock / kWarpSize;
__shared__
T smemK[kNumWarps * NumWarpQ];
__shared__
int64_t smemV[kNumWarps * NumWarpQ];
faiss::gpu::BlockSelect<T, int64_t, true, faiss::gpu::Comparator<T>,
NumWarpQ, NumThreadQ, ThreadsPerBlock>
heap(initK, -1, smemK, smemV, k);
int row = blockIdx.x;
// Whole warps must participate in the selection
int limit = faiss::gpu::utils::roundDown(productDistances.getSize(1), kWarpSize);
int i = threadIdx.x;
for (; i < limit; i += blockDim.x) {
T v = productDistances[row][i];
heap.add(v, i);
}
if (i < productDistances.getSize(1)) {
T v = productDistances[row][i];
heap.addThreadQ(v, i);
}
heap.reduce();
for (int i = threadIdx.x; i < k; i += blockDim.x) {
outDistances[row][i] = smemK[i];
outIndices[row][i] = smemV[i];
}
}
// FIXME: no TVec specialization
template<typename T>
void runTopKSelectMax(Tensor<T, 2, true> &productDistances,
Tensor<T, 2, true> &outDistances,
Tensor<int64_t, 2, true> &outIndices,
int k,
cudaStream_t stream) {
FAISS_ASSERT(productDistances.getSize(0) == outDistances.getSize(0));
FAISS_ASSERT(productDistances.getSize(0) == outIndices.getSize(0));
FAISS_ASSERT(outDistances.getSize(1) == k);
FAISS_ASSERT(outIndices.getSize(1) == k);
FAISS_ASSERT(k <= 1024);
if (k == 1) {
constexpr int kThreadsPerBlock = 256;
constexpr int kRowsPerBlock = 8;
auto block = dim3(kThreadsPerBlock);
auto grid = dim3(faiss::gpu::utils::divUp(outDistances.getSize(0), kRowsPerBlock));
topkSelectMax1<T, kRowsPerBlock, kThreadsPerBlock>
<< < grid, block, 0, stream >> > (productDistances, outDistances, outIndices);
} else {
constexpr int kThreadsPerBlock = 128;
auto block = dim3(kThreadsPerBlock);
auto grid = dim3(outDistances.getSize(0));
#define RUN_TOPK_SELECT_MAX(NUM_WARP_Q, NUM_THREAD_Q) \
do { \
topkSelectMaxK<T, NUM_WARP_Q, NUM_THREAD_Q, kThreadsPerBlock> \
<<<grid, block, 0, stream>>>(productDistances, \
outDistances, outIndices, \
k, faiss::gpu::Limits<T>::getMin()); \
} while (0)
if (k <= 32) {
RUN_TOPK_SELECT_MAX(32, 2);
} else if (k <= 64) {
RUN_TOPK_SELECT_MAX(64, 3);
} else if (k <= 128) {
RUN_TOPK_SELECT_MAX(128, 3);
} else if (k <= 256) {
RUN_TOPK_SELECT_MAX(256, 4);
} else if (k <= 512) {
RUN_TOPK_SELECT_MAX(512, 8);
} else if (k <= 1024) {
RUN_TOPK_SELECT_MAX(1024, 8);
} else {
FAISS_ASSERT(false);
}
}
CUDA_TEST_ERROR();
}
//////////////////////////////////////////////////////////////
template<typename T>
void runTopKSelect(Tensor<T, 2, true> &productDistances,
Tensor<T, 2, true> &outDistances,
Tensor<int64_t, 2, true> &outIndices,
bool dir,
int k,
cudaStream_t stream) {
if (dir) {
runTopKSelectMax<T>(productDistances,
outDistances,
outIndices,
k,
stream);
} else {
runTopKSelectMin<T>(productDistances,
outDistances,
outIndices,
k,
stream);
}
}
template<typename T>
void TopK(T *input,
int length,
int k,
T *output,
int64_t *idx,
// Ordering order_flag,
cudaStream_t stream) {
// bool dir = (order_flag == Ordering::kAscending ? false : true);
bool dir = 0;
Tensor<T, 2, true> t_input(input, {1, length});
Tensor<T, 2, true> t_output(output, {1, k});
Tensor<int64_t, 2, true> t_idx(idx, {1, k});
runTopKSelect<T>(t_input, t_output, t_idx, dir, k, stream);
}
//INSTANTIATION_TOPK_2(bool);
//INSTANTIATION_TOPK_2(int8_t);
//INSTANTIATION_TOPK_2(int16_t);
INSTANTIATION_TOPK_2(int32_t);
//INSTANTIATION_TOPK_2(int64_t);
INSTANTIATION_TOPK_2(float);
//INSTANTIATION_TOPK_2(double);
//INSTANTIATION_TOPK(TimeInterval);
//INSTANTIATION_TOPK(Float128);
//INSTANTIATION_TOPK(char);
}
void TopK(float *host_input,
int length,
int k,
float *output,
int64_t *indices) {
float *device_input, *device_output;
int64_t *ids;
cudaMalloc((void **) &device_input, sizeof(float) * length);
cudaMalloc((void **) &device_output, sizeof(float) * k);
cudaMalloc((void **) &ids, sizeof(int64_t) * k);
cudaMemcpy(device_input, host_input, sizeof(float) * length, cudaMemcpyHostToDevice);
gpu::TopK<float>(device_input, length, k, device_output, ids, nullptr);
cudaMemcpy(output, device_output, sizeof(float) * k, cudaMemcpyDeviceToHost);
cudaMemcpy(indices, ids, sizeof(int64_t) * k, cudaMemcpyDeviceToHost);
cudaFree(device_input);
cudaFree(device_output);
cudaFree(ids);
}
}
}
}
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cuda.h>
#include <cuda_runtime.h>
namespace zilliz {
namespace milvus {
namespace engine {
namespace gpu {
template<typename T>
void
TopK(T *input,
int length,
int k,
T *output,
int64_t *indices,
// Ordering order_flag,
cudaStream_t stream = nullptr);
#define INSTANTIATION_TOPK_2(T) \
template void \
TopK<T>(T *input, \
int length, \
int k, \
T *output, \
int64_t *indices, \
cudaStream_t stream)
// Ordering order_flag, \
// cudaStream_t stream)
//extern INSTANTIATION_TOPK_2(int8_t);
//extern INSTANTIATION_TOPK_2(int16_t);
extern INSTANTIATION_TOPK_2(int32_t);
//extern INSTANTIATION_TOPK_2(int64_t);
extern INSTANTIATION_TOPK_2(float);
//extern INSTANTIATION_TOPK_2(double);
//extern INSTANTIATION_TOPK(TimeInterval);
//extern INSTANTIATION_TOPK(Float128);
}
// User Interface.
void TopK(float *input,
int length,
int k,
float *output,
int64_t *indices);
}
}
}
......@@ -21,9 +21,9 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include")
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
set(knowhere_src
${MILVUS_ENGINE_SRC}/wrapper/knowhere/data_transfer.cpp
${MILVUS_ENGINE_SRC}/wrapper/knowhere/vec_impl.cpp
${MILVUS_ENGINE_SRC}/wrapper/knowhere/vec_index.cpp)
${MILVUS_ENGINE_SRC}/wrapper/data_transfer.cpp
${MILVUS_ENGINE_SRC}/wrapper/vec_impl.cpp
${MILVUS_ENGINE_SRC}/wrapper/vec_index.cpp)
set(helper
utils.cpp
......
......@@ -19,7 +19,7 @@
#include <gtest/gtest.h>
#include "utils/easylogging++.h"
#include <wrapper/knowhere/vec_index.h>
#include "src/wrapper/vec_index.h"
#include "knowhere/index/vector_index/gpu_ivf.h"
#include "utils.h"
......
......@@ -25,7 +25,7 @@
#include "scheduler/ResourceFactory.h"
#include "scheduler/resource/Resource.h"
#include "utils/Error.h"
#include "wrapper/knowhere/vec_index.h"
#include "src/wrapper/vec_index.h"
#include "scheduler/tasklabel/SpecResLabel.h"
......
......@@ -21,7 +21,7 @@
#include "server/ServerConfig.h"
#include "utils/Error.h"
#include "wrapper/knowhere/vec_index.h"
#include "src/wrapper/vec_index.h"
using namespace zilliz::milvus;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册