BroadcastSum.cuh 1.6 KB
Newer Older
J
JinHai-CN 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */


#pragma once

#include <faiss/gpu/utils/Tensor.cuh>

namespace faiss { namespace gpu {

// output[x][i] += input[i] for all x
void runSumAlongColumns(Tensor<float, 1, true>& input,
                        Tensor<float, 2, true>& output,
                        cudaStream_t stream);

S
shengjun.li 已提交
20
#ifdef FAISS_USE_FLOAT16
J
JinHai-CN 已提交
21 22 23
void runSumAlongColumns(Tensor<half, 1, true>& input,
                        Tensor<half, 2, true>& output,
                        cudaStream_t stream);
S
shengjun.li 已提交
24
#endif
J
JinHai-CN 已提交
25 26 27 28 29 30

// output[x][i] = input[i] for all x
void runAssignAlongColumns(Tensor<float, 1, true>& input,
                           Tensor<float, 2, true>& output,
                           cudaStream_t stream);

S
shengjun.li 已提交
31
#ifdef FAISS_USE_FLOAT16
J
JinHai-CN 已提交
32 33 34
void runAssignAlongColumns(Tensor<half, 1, true>& input,
                           Tensor<half, 2, true>& output,
                           cudaStream_t stream);
S
shengjun.li 已提交
35
#endif
J
JinHai-CN 已提交
36 37 38 39 40 41 42 43

// output[i][x] += input[i] for all x
// If zeroClamp, output[i][x] = max(output[i][x] + input[i], 0) for all x
void runSumAlongRows(Tensor<float, 1, true>& input,
                     Tensor<float, 2, true>& output,
                     bool zeroClamp,
                     cudaStream_t stream);

S
shengjun.li 已提交
44
#ifdef FAISS_USE_FLOAT16
J
JinHai-CN 已提交
45 46 47 48 49
void runSumAlongRows(Tensor<half, 1, true>& input,
                     Tensor<half, 2, true>& output,
                     bool zeroClamp,
                     cudaStream_t stream);

S
shengjun.li 已提交
50
#endif
J
JinHai-CN 已提交
51
} } // namespace