diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index c8cba4d9aa40ee7e165bfc4e4d6924c48c25a65a..60def5c4cae4db36872558aeee6fcd761c628ea8 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -35,7 +35,7 @@ aux_source_directory(db/engine db_engine_files) aux_source_directory(db/insert db_insert_files) aux_source_directory(db/meta db_meta_files) aux_source_directory(metrics metrics_files) -aux_source_directory(wrapper/knowhere knowhere_files) +aux_source_directory(wrapper wrapper_files) aux_source_directory(scheduler/action scheduler_action_files) aux_source_directory(scheduler/event scheduler_event_files) @@ -88,7 +88,7 @@ set(db_files ${db_meta_files} ${db_scheduler_files} ${metrics_files} - ${knowhere_files} + ${wrapper_files} ${utils_files} ) diff --git a/cpp/src/cache/DataObj.h b/cpp/src/cache/DataObj.h index 8ba59104cdfdec218c91eb63bb992959e4aa0218..12406301cb656b90e89fe048200d6165cdb8b59a 100644 --- a/cpp/src/cache/DataObj.h +++ b/cpp/src/cache/DataObj.h @@ -18,7 +18,7 @@ #pragma once -#include "wrapper/knowhere/vec_index.h" +#include "src/wrapper/vec_index.h" #include diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp index 7ad9d3ea12eac0b4ad6e6f5fdd6cdcf8e7ad50ba..2fb868f548cc94ec1d51a9f4917f2061d6272244 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.cpp +++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp @@ -23,8 +23,8 @@ #include "utils/CommonUtil.h" #include "utils/Exception.h" -#include "wrapper/knowhere/vec_index.h" -#include "wrapper/knowhere/vec_impl.h" +#include "src/wrapper/vec_index.h" +#include "src/wrapper/vec_impl.h" #include "knowhere/common/exception.h" #include diff --git a/cpp/src/db/engine/ExecutionEngineImpl.h b/cpp/src/db/engine/ExecutionEngineImpl.h index 611a247a082977f97c27ef9b8329f9b666fa6be9..cb08c50ad4e6fedc6ab41bd1ac1392f458d037db 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.h +++ b/cpp/src/db/engine/ExecutionEngineImpl.h @@ -18,7 +18,7 @@ #pragma once #include "ExecutionEngine.h" -#include "wrapper/knowhere/vec_index.h" +#include "src/wrapper/vec_index.h" #include #include diff --git a/cpp/src/server/Server.cpp b/cpp/src/server/Server.cpp index 252f92a5f04fbeb3bf75638a6ae28493d4dbeb6e..9920b302468b23861af1f3ce6529f2a33f1e478e 100644 --- a/cpp/src/server/Server.cpp +++ b/cpp/src/server/Server.cpp @@ -32,7 +32,7 @@ #include #include #include -#include "wrapper/knowhere/KnowhereResource.h" +#include "src/wrapper/KnowhereResource.h" #include "metrics/Metrics.h" #include "DBWrapper.h" diff --git a/cpp/src/wrapper/knowhere/KnowhereResource.cpp b/cpp/src/wrapper/KnowhereResource.cpp similarity index 100% rename from cpp/src/wrapper/knowhere/KnowhereResource.cpp rename to cpp/src/wrapper/KnowhereResource.cpp diff --git a/cpp/src/wrapper/knowhere/KnowhereResource.h b/cpp/src/wrapper/KnowhereResource.h similarity index 100% rename from cpp/src/wrapper/knowhere/KnowhereResource.h rename to cpp/src/wrapper/KnowhereResource.h diff --git a/cpp/src/wrapper/knowhere/data_transfer.cpp b/cpp/src/wrapper/data_transfer.cpp similarity index 100% rename from cpp/src/wrapper/knowhere/data_transfer.cpp rename to cpp/src/wrapper/data_transfer.cpp diff --git a/cpp/src/wrapper/knowhere/data_transfer.h b/cpp/src/wrapper/data_transfer.h similarity index 100% rename from cpp/src/wrapper/knowhere/data_transfer.h rename to cpp/src/wrapper/data_transfer.h diff --git a/cpp/src/wrapper/gpu/Arithmetic.h b/cpp/src/wrapper/gpu/Arithmetic.h deleted file mode 100644 index c6ab7a2bc8fcdb817021f3cb606f81151d4ea4b6..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/gpu/Arithmetic.h +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - - -namespace zilliz { -namespace milvus { -namespace engine { - -using Bool = int8_t; -using Byte = uint8_t; -using Word = unsigned long; -using EnumType = uint64_t; - -using Float32 = float; -using Float64 = double; - -constexpr bool kBoolMax = std::numeric_limits::max(); -constexpr bool kBoolMin = std::numeric_limits::lowest(); - -constexpr int8_t kInt8Max = std::numeric_limits::max(); -constexpr int8_t kInt8Min = std::numeric_limits::lowest(); - -constexpr int16_t kInt16Max = std::numeric_limits::max(); -constexpr int16_t kInt16Min = std::numeric_limits::lowest(); - -constexpr int32_t kInt32Max = std::numeric_limits::max(); -constexpr int32_t kInt32Min = std::numeric_limits::lowest(); - -constexpr int64_t kInt64Max = std::numeric_limits::max(); -constexpr int64_t kInt64Min = std::numeric_limits::lowest(); - -constexpr float kFloatMax = std::numeric_limits::max(); -constexpr float kFloatMin = std::numeric_limits::lowest(); - -constexpr double kDoubleMax = std::numeric_limits::max(); -constexpr double kDoubleMin = std::numeric_limits::lowest(); - -constexpr uint32_t kFloat32DecimalPrecision = std::numeric_limits::digits10; -constexpr uint32_t kFloat64DecimalPrecision = std::numeric_limits::digits10; - - -constexpr uint8_t kByteWidth = 8; -constexpr uint8_t kCharWidth = kByteWidth; -constexpr uint8_t kWordWidth = sizeof(Word) * kByteWidth; -constexpr uint8_t kEnumTypeWidth = sizeof(EnumType) * kByteWidth; - -template -inline size_t -WidthOf() { return sizeof(T) << 3; } - -template -inline size_t -WidthOf(const T &) { return sizeof(T) << 3; } - - -} -} // namespace lib -} // namespace zilliz diff --git a/cpp/src/wrapper/gpu/Topk.cu b/cpp/src/wrapper/gpu/Topk.cu deleted file mode 100644 index 423c8806c457cf5f0ea8de8a662e03e4ee4dd050..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/gpu/Topk.cu +++ /dev/null @@ -1,586 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - - -#include "faiss/FaissAssert.h" -#include "faiss/gpu/utils/Limits.cuh" -#include "Arithmetic.h" - - -namespace faiss { -namespace gpu { - -constexpr bool kBoolMax = zilliz::milvus::engine::kBoolMax; -constexpr bool kBoolMin = zilliz::milvus::engine::kBoolMin; - -template<> -struct Limits { - static __device__ __host__ - inline bool getMin() { - return kBoolMin; - } - static __device__ __host__ - inline bool getMax() { - return kBoolMax; - } -}; - -constexpr int8_t kInt8Max = zilliz::milvus::engine::kInt8Max; -constexpr int8_t kInt8Min = zilliz::milvus::engine::kInt8Min; - -template<> -struct Limits { - static __device__ __host__ - inline int8_t getMin() { - return kInt8Min; - } - static __device__ __host__ - inline int8_t getMax() { - return kInt8Max; - } -}; - -constexpr int16_t kInt16Max = zilliz::milvus::engine::kInt16Max; -constexpr int16_t kInt16Min = zilliz::milvus::engine::kInt16Min; - -template<> -struct Limits { - static __device__ __host__ - inline int16_t getMin() { - return kInt16Min; - } - static __device__ __host__ - inline int16_t getMax() { - return kInt16Max; - } -}; - -constexpr int64_t kInt64Max = zilliz::milvus::engine::kInt64Max; -constexpr int64_t kInt64Min = zilliz::milvus::engine::kInt64Min; - -template<> -struct Limits { - static __device__ __host__ - inline int64_t getMin() { - return kInt64Min; - } - static __device__ __host__ - inline int64_t getMax() { - return kInt64Max; - } -}; - -constexpr double kDoubleMax = zilliz::milvus::engine::kDoubleMax; -constexpr double kDoubleMin = zilliz::milvus::engine::kDoubleMin; - -template<> -struct Limits { - static __device__ __host__ - inline double getMin() { - return kDoubleMin; - } - static __device__ __host__ - inline double getMax() { - return kDoubleMax; - } -}; - -} -} - -#include "faiss/gpu/utils/DeviceUtils.h" -#include "faiss/gpu/utils/MathOperators.cuh" -#include "faiss/gpu/utils/Pair.cuh" -#include "faiss/gpu/utils/Reductions.cuh" -#include "faiss/gpu/utils/Select.cuh" -#include "faiss/gpu/utils/Tensor.cuh" -#include "faiss/gpu/utils/StaticUtils.h" - -#include "Topk.h" - - -namespace zilliz { -namespace milvus { -namespace engine { -namespace gpu { - -constexpr int kWarpSize = 32; - -template -using Tensor = faiss::gpu::Tensor; - -template -using Pair = faiss::gpu::Pair; - - -// select kernel for k == 1 -template -__global__ void topkSelectMin1(Tensor productDistances, - Tensor outDistances, - Tensor outIndices) { - // Each block handles kRowsPerBlock rows of the distances (results) - Pair threadMin[kRowsPerBlock]; - __shared__ - Pair blockMin[kRowsPerBlock * (kBlockSize / kWarpSize)]; - - T distance[kRowsPerBlock]; - -#pragma unroll - for (int i = 0; i < kRowsPerBlock; ++i) { - threadMin[i].k = faiss::gpu::Limits::getMax(); - threadMin[i].v = -1; - } - - // blockIdx.x: which chunk of rows we are responsible for updating - int rowStart = blockIdx.x * kRowsPerBlock; - - // FIXME: if we have exact multiples, don't need this - bool endRow = (blockIdx.x == gridDim.x - 1); - - if (endRow) { - if (productDistances.getSize(0) % kRowsPerBlock == 0) { - endRow = false; - } - } - - if (endRow) { - for (int row = rowStart; row < productDistances.getSize(0); ++row) { - for (int col = threadIdx.x; col < productDistances.getSize(1); - col += blockDim.x) { - distance[0] = productDistances[row][col]; - - if (faiss::gpu::Math::lt(distance[0], threadMin[0].k)) { - threadMin[0].k = distance[0]; - threadMin[0].v = col; - } - } - - // Reduce within the block - threadMin[0] = - faiss::gpu::blockReduceAll, faiss::gpu::Min >, false, false>( - threadMin[0], faiss::gpu::Min >(), blockMin); - - if (threadIdx.x == 0) { - outDistances[row][0] = threadMin[0].k; - outIndices[row][0] = threadMin[0].v; - } - - // so we can use the shared memory again - __syncthreads(); - - threadMin[0].k = faiss::gpu::Limits::getMax(); - threadMin[0].v = -1; - } - } else { - for (int col = threadIdx.x; col < productDistances.getSize(1); - col += blockDim.x) { - -#pragma unroll - for (int row = 0; row < kRowsPerBlock; ++row) { - distance[row] = productDistances[rowStart + row][col]; - } - -#pragma unroll - for (int row = 0; row < kRowsPerBlock; ++row) { - if (faiss::gpu::Math::lt(distance[row], threadMin[row].k)) { - threadMin[row].k = distance[row]; - threadMin[row].v = col; - } - } - } - - // Reduce within the block - faiss::gpu::blockReduceAll, faiss::gpu::Min >, false, false>( - threadMin, faiss::gpu::Min >(), blockMin); - - if (threadIdx.x == 0) { -#pragma unroll - for (int row = 0; row < kRowsPerBlock; ++row) { - outDistances[rowStart + row][0] = threadMin[row].k; - outIndices[rowStart + row][0] = threadMin[row].v; - } - } - } -} - -// L2 + select kernel for k > 1, no re-use of ||c||^2 -template -__global__ void topkSelectMinK(Tensor productDistances, - Tensor outDistances, - Tensor outIndices, - int k, T initK) { - // Each block handles a single row of the distances (results) - constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; - - __shared__ - T smemK[kNumWarps * NumWarpQ]; - __shared__ - int64_t smemV[kNumWarps * NumWarpQ]; - - faiss::gpu::BlockSelect, - NumWarpQ, NumThreadQ, ThreadsPerBlock> - heap(initK, -1, smemK, smemV, k); - - int row = blockIdx.x; - - // Whole warps must participate in the selection - int limit = faiss::gpu::utils::roundDown(productDistances.getSize(1), kWarpSize); - int i = threadIdx.x; - - for (; i < limit; i += blockDim.x) { - T v = productDistances[row][i]; - heap.add(v, i); - } - - if (i < productDistances.getSize(1)) { - T v = productDistances[row][i]; - heap.addThreadQ(v, i); - } - - heap.reduce(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { - outDistances[row][i] = smemK[i]; - outIndices[row][i] = smemV[i]; - } -} - -// FIXME: no TVec specialization -template -void runTopKSelectMin(Tensor &productDistances, - Tensor &outDistances, - Tensor &outIndices, - int k, - cudaStream_t stream) { - FAISS_ASSERT(productDistances.getSize(0) == outDistances.getSize(0)); - FAISS_ASSERT(productDistances.getSize(0) == outIndices.getSize(0)); - FAISS_ASSERT(outDistances.getSize(1) == k); - FAISS_ASSERT(outIndices.getSize(1) == k); - FAISS_ASSERT(k <= 1024); - - if (k == 1) { - constexpr int kThreadsPerBlock = 256; - constexpr int kRowsPerBlock = 8; - - auto block = dim3(kThreadsPerBlock); - auto grid = dim3(faiss::gpu::utils::divUp(outDistances.getSize(0), kRowsPerBlock)); - - topkSelectMin1 - << < grid, block, 0, stream >> > (productDistances, outDistances, outIndices); - } else { - constexpr int kThreadsPerBlock = 128; - - auto block = dim3(kThreadsPerBlock); - auto grid = dim3(outDistances.getSize(0)); - -#define RUN_TOPK_SELECT_MIN(NUM_WARP_Q, NUM_THREAD_Q) \ - do { \ - topkSelectMinK \ - <<>>(productDistances, \ - outDistances, outIndices, \ - k, faiss::gpu::Limits::getMax()); \ - } while (0) - - if (k <= 32) { - RUN_TOPK_SELECT_MIN(32, 2); - } else if (k <= 64) { - RUN_TOPK_SELECT_MIN(64, 3); - } else if (k <= 128) { - RUN_TOPK_SELECT_MIN(128, 3); - } else if (k <= 256) { - RUN_TOPK_SELECT_MIN(256, 4); - } else if (k <= 512) { - RUN_TOPK_SELECT_MIN(512, 8); - } else if (k <= 1024) { - RUN_TOPK_SELECT_MIN(1024, 8); - } else { - FAISS_ASSERT(false); - } - } - - CUDA_TEST_ERROR(); -} - -//////////////////////////////////////////////////////////// -// select kernel for k == 1 -template -__global__ void topkSelectMax1(Tensor productDistances, - Tensor outDistances, - Tensor outIndices) { - // Each block handles kRowsPerBlock rows of the distances (results) - Pair threadMax[kRowsPerBlock]; - __shared__ - Pair blockMax[kRowsPerBlock * (kBlockSize / kWarpSize)]; - - T distance[kRowsPerBlock]; - -#pragma unroll - for (int i = 0; i < kRowsPerBlock; ++i) { - threadMax[i].k = faiss::gpu::Limits::getMin(); - threadMax[i].v = -1; - } - - // blockIdx.x: which chunk of rows we are responsible for updating - int rowStart = blockIdx.x * kRowsPerBlock; - - // FIXME: if we have exact multiples, don't need this - bool endRow = (blockIdx.x == gridDim.x - 1); - - if (endRow) { - if (productDistances.getSize(0) % kRowsPerBlock == 0) { - endRow = false; - } - } - - if (endRow) { - for (int row = rowStart; row < productDistances.getSize(0); ++row) { - for (int col = threadIdx.x; col < productDistances.getSize(1); - col += blockDim.x) { - distance[0] = productDistances[row][col]; - - if (faiss::gpu::Math::gt(distance[0], threadMax[0].k)) { - threadMax[0].k = distance[0]; - threadMax[0].v = col; - } - } - - // Reduce within the block - threadMax[0] = - faiss::gpu::blockReduceAll, faiss::gpu::Max >, false, false>( - threadMax[0], faiss::gpu::Max >(), blockMax); - - if (threadIdx.x == 0) { - outDistances[row][0] = threadMax[0].k; - outIndices[row][0] = threadMax[0].v; - } - - // so we can use the shared memory again - __syncthreads(); - - threadMax[0].k = faiss::gpu::Limits::getMin(); - threadMax[0].v = -1; - } - } else { - for (int col = threadIdx.x; col < productDistances.getSize(1); - col += blockDim.x) { - -#pragma unroll - for (int row = 0; row < kRowsPerBlock; ++row) { - distance[row] = productDistances[rowStart + row][col]; - } - -#pragma unroll - for (int row = 0; row < kRowsPerBlock; ++row) { - if (faiss::gpu::Math::gt(distance[row], threadMax[row].k)) { - threadMax[row].k = distance[row]; - threadMax[row].v = col; - } - } - } - - // Reduce within the block - faiss::gpu::blockReduceAll, faiss::gpu::Max >, false, false>( - threadMax, faiss::gpu::Max >(), blockMax); - - if (threadIdx.x == 0) { -#pragma unroll - for (int row = 0; row < kRowsPerBlock; ++row) { - outDistances[rowStart + row][0] = threadMax[row].k; - outIndices[rowStart + row][0] = threadMax[row].v; - } - } - } -} - -// L2 + select kernel for k > 1, no re-use of ||c||^2 -template -__global__ void topkSelectMaxK(Tensor productDistances, - Tensor outDistances, - Tensor outIndices, - int k, T initK) { - // Each block handles a single row of the distances (results) - constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; - - __shared__ - T smemK[kNumWarps * NumWarpQ]; - __shared__ - int64_t smemV[kNumWarps * NumWarpQ]; - - faiss::gpu::BlockSelect, - NumWarpQ, NumThreadQ, ThreadsPerBlock> - heap(initK, -1, smemK, smemV, k); - - int row = blockIdx.x; - - // Whole warps must participate in the selection - int limit = faiss::gpu::utils::roundDown(productDistances.getSize(1), kWarpSize); - int i = threadIdx.x; - - for (; i < limit; i += blockDim.x) { - T v = productDistances[row][i]; - heap.add(v, i); - } - - if (i < productDistances.getSize(1)) { - T v = productDistances[row][i]; - heap.addThreadQ(v, i); - } - - heap.reduce(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { - outDistances[row][i] = smemK[i]; - outIndices[row][i] = smemV[i]; - } -} - -// FIXME: no TVec specialization -template -void runTopKSelectMax(Tensor &productDistances, - Tensor &outDistances, - Tensor &outIndices, - int k, - cudaStream_t stream) { - FAISS_ASSERT(productDistances.getSize(0) == outDistances.getSize(0)); - FAISS_ASSERT(productDistances.getSize(0) == outIndices.getSize(0)); - FAISS_ASSERT(outDistances.getSize(1) == k); - FAISS_ASSERT(outIndices.getSize(1) == k); - FAISS_ASSERT(k <= 1024); - - if (k == 1) { - constexpr int kThreadsPerBlock = 256; - constexpr int kRowsPerBlock = 8; - - auto block = dim3(kThreadsPerBlock); - auto grid = dim3(faiss::gpu::utils::divUp(outDistances.getSize(0), kRowsPerBlock)); - - topkSelectMax1 - << < grid, block, 0, stream >> > (productDistances, outDistances, outIndices); - } else { - constexpr int kThreadsPerBlock = 128; - - auto block = dim3(kThreadsPerBlock); - auto grid = dim3(outDistances.getSize(0)); - -#define RUN_TOPK_SELECT_MAX(NUM_WARP_Q, NUM_THREAD_Q) \ - do { \ - topkSelectMaxK \ - <<>>(productDistances, \ - outDistances, outIndices, \ - k, faiss::gpu::Limits::getMin()); \ - } while (0) - - if (k <= 32) { - RUN_TOPK_SELECT_MAX(32, 2); - } else if (k <= 64) { - RUN_TOPK_SELECT_MAX(64, 3); - } else if (k <= 128) { - RUN_TOPK_SELECT_MAX(128, 3); - } else if (k <= 256) { - RUN_TOPK_SELECT_MAX(256, 4); - } else if (k <= 512) { - RUN_TOPK_SELECT_MAX(512, 8); - } else if (k <= 1024) { - RUN_TOPK_SELECT_MAX(1024, 8); - } else { - FAISS_ASSERT(false); - } - } - - CUDA_TEST_ERROR(); -} -////////////////////////////////////////////////////////////// - -template -void runTopKSelect(Tensor &productDistances, - Tensor &outDistances, - Tensor &outIndices, - bool dir, - int k, - cudaStream_t stream) { - if (dir) { - runTopKSelectMax(productDistances, - outDistances, - outIndices, - k, - stream); - } else { - runTopKSelectMin(productDistances, - outDistances, - outIndices, - k, - stream); - } -} - -template -void TopK(T *input, - int length, - int k, - T *output, - int64_t *idx, -// Ordering order_flag, - cudaStream_t stream) { - -// bool dir = (order_flag == Ordering::kAscending ? false : true); - bool dir = 0; - - Tensor t_input(input, {1, length}); - Tensor t_output(output, {1, k}); - Tensor t_idx(idx, {1, k}); - - runTopKSelect(t_input, t_output, t_idx, dir, k, stream); -} - -//INSTANTIATION_TOPK_2(bool); -//INSTANTIATION_TOPK_2(int8_t); -//INSTANTIATION_TOPK_2(int16_t); -INSTANTIATION_TOPK_2(int32_t); -//INSTANTIATION_TOPK_2(int64_t); -INSTANTIATION_TOPK_2(float); -//INSTANTIATION_TOPK_2(double); -//INSTANTIATION_TOPK(TimeInterval); -//INSTANTIATION_TOPK(Float128); -//INSTANTIATION_TOPK(char); - -} - -void TopK(float *host_input, - int length, - int k, - float *output, - int64_t *indices) { - float *device_input, *device_output; - int64_t *ids; - - cudaMalloc((void **) &device_input, sizeof(float) * length); - cudaMalloc((void **) &device_output, sizeof(float) * k); - cudaMalloc((void **) &ids, sizeof(int64_t) * k); - - cudaMemcpy(device_input, host_input, sizeof(float) * length, cudaMemcpyHostToDevice); - - gpu::TopK(device_input, length, k, device_output, ids, nullptr); - - cudaMemcpy(output, device_output, sizeof(float) * k, cudaMemcpyDeviceToHost); - cudaMemcpy(indices, ids, sizeof(int64_t) * k, cudaMemcpyDeviceToHost); - - cudaFree(device_input); - cudaFree(device_output); - cudaFree(ids); -} - -} -} -} diff --git a/cpp/src/wrapper/gpu/Topk.h b/cpp/src/wrapper/gpu/Topk.h deleted file mode 100644 index d8dc69fd092928cc453002b432781446781c0e37..0000000000000000000000000000000000000000 --- a/cpp/src/wrapper/gpu/Topk.h +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - - -#pragma once - -#include -#include - - -namespace zilliz { -namespace milvus { -namespace engine { -namespace gpu { - -template -void -TopK(T *input, - int length, - int k, - T *output, - int64_t *indices, -// Ordering order_flag, - cudaStream_t stream = nullptr); - - -#define INSTANTIATION_TOPK_2(T) \ - template void \ - TopK(T *input, \ - int length, \ - int k, \ - T *output, \ - int64_t *indices, \ - cudaStream_t stream) -// Ordering order_flag, \ -// cudaStream_t stream) - -//extern INSTANTIATION_TOPK_2(int8_t); -//extern INSTANTIATION_TOPK_2(int16_t); -extern INSTANTIATION_TOPK_2(int32_t); -//extern INSTANTIATION_TOPK_2(int64_t); -extern INSTANTIATION_TOPK_2(float); -//extern INSTANTIATION_TOPK_2(double); -//extern INSTANTIATION_TOPK(TimeInterval); -//extern INSTANTIATION_TOPK(Float128); - -} - -// User Interface. -void TopK(float *input, - int length, - int k, - float *output, - int64_t *indices); - - -} -} -} diff --git a/cpp/src/wrapper/knowhere/vec_impl.cpp b/cpp/src/wrapper/vec_impl.cpp similarity index 100% rename from cpp/src/wrapper/knowhere/vec_impl.cpp rename to cpp/src/wrapper/vec_impl.cpp diff --git a/cpp/src/wrapper/knowhere/vec_impl.h b/cpp/src/wrapper/vec_impl.h similarity index 100% rename from cpp/src/wrapper/knowhere/vec_impl.h rename to cpp/src/wrapper/vec_impl.h diff --git a/cpp/src/wrapper/knowhere/vec_index.cpp b/cpp/src/wrapper/vec_index.cpp similarity index 100% rename from cpp/src/wrapper/knowhere/vec_index.cpp rename to cpp/src/wrapper/vec_index.cpp diff --git a/cpp/src/wrapper/knowhere/vec_index.h b/cpp/src/wrapper/vec_index.h similarity index 100% rename from cpp/src/wrapper/knowhere/vec_index.h rename to cpp/src/wrapper/vec_index.h diff --git a/cpp/unittest/knowhere/CMakeLists.txt b/cpp/unittest/knowhere/CMakeLists.txt index 27e2612ae7714082938906af6aebe2d06f3c99e4..21faf3568cbea45d041d4b2e499952108132ef3f 100644 --- a/cpp/unittest/knowhere/CMakeLists.txt +++ b/cpp/unittest/knowhere/CMakeLists.txt @@ -21,9 +21,9 @@ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") set(knowhere_src - ${MILVUS_ENGINE_SRC}/wrapper/knowhere/data_transfer.cpp - ${MILVUS_ENGINE_SRC}/wrapper/knowhere/vec_impl.cpp - ${MILVUS_ENGINE_SRC}/wrapper/knowhere/vec_index.cpp) + ${MILVUS_ENGINE_SRC}/wrapper/data_transfer.cpp + ${MILVUS_ENGINE_SRC}/wrapper/vec_impl.cpp + ${MILVUS_ENGINE_SRC}/wrapper/vec_index.cpp) set(helper utils.cpp diff --git a/cpp/unittest/knowhere/knowhere_test.cpp b/cpp/unittest/knowhere/knowhere_test.cpp index 6d8f57f322c342886992a85e5401e9e1f7585553..ac185efbb3f8137462a9b15917a1313a149874ed 100644 --- a/cpp/unittest/knowhere/knowhere_test.cpp +++ b/cpp/unittest/knowhere/knowhere_test.cpp @@ -19,7 +19,7 @@ #include #include "utils/easylogging++.h" -#include +#include "src/wrapper/vec_index.h" #include "knowhere/index/vector_index/gpu_ivf.h" #include "utils.h" diff --git a/cpp/unittest/scheduler/scheduler_test.cpp b/cpp/unittest/scheduler/scheduler_test.cpp index 6f680f4a60e8ab9aa7cbdf174d76ac6523f284bb..13bdf53fb056d8441d9514c8d2c22a4c07f625f2 100644 --- a/cpp/unittest/scheduler/scheduler_test.cpp +++ b/cpp/unittest/scheduler/scheduler_test.cpp @@ -25,7 +25,7 @@ #include "scheduler/ResourceFactory.h" #include "scheduler/resource/Resource.h" #include "utils/Error.h" -#include "wrapper/knowhere/vec_index.h" +#include "src/wrapper/vec_index.h" #include "scheduler/tasklabel/SpecResLabel.h" diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index 5bb1fb861200d97d3d99abd6b337dfe2ed6853c8..bf9b6edf7d4b2fb126db8f11b06289c304b4a34f 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -21,7 +21,7 @@ #include "server/ServerConfig.h" #include "utils/Error.h" -#include "wrapper/knowhere/vec_index.h" +#include "src/wrapper/vec_index.h" using namespace zilliz::milvus;