Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767

Prevent integer truncation from 64 to 32 bits.
The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767
ccb3e4cc · Mihai Maruseac · c5aec0b3 · ccb3e4cc · ccb3e4cc · ccb3e4cc
7 changed file
--- a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
@@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel {
      auto do_work = [&resource, &bucketized_features, &cached_tree_ids,
                      &cached_node_ids, &output_partial_logits,
                      &output_node_ids, latest_tree,
-                      this](int32 start, int32 end) {
+                      this](int64 start, int64 end) {
        for (int32 i = start; i < end; ++i) {
          int32 tree_id = cached_tree_ids(i);
          int32 node_id = cached_node_ids(i);
@@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel {

    const int32 last_tree = resource->num_trees() - 1;
    auto do_work = [&resource, &bucketized_features, &output_logits, last_tree,
-                    this](int32 start, int32 end) {
+                    this](int64 start, int64 end) {
      for (int32 i = start; i < end; ++i) {
        std::vector<float> tree_logits(logits_dimension_, 0.0);
        int32 tree_id = 0;
@@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel {
    // path. Note: feature_ids has one less value than logits_path because the
    // first value of each logit path will be the bias.
    auto do_work = [&resource, &bucketized_features, &output_debug_info,
-                    last_tree](int32 start, int32 end) {
+                    last_tree](int64 start, int64 end) {
      for (int32 i = start; i < end; ++i) {
        // Proto to store debug outputs, per example.
        boosted_trees::DebugOutput example_debug_info;

--- a/tensorflow/core/kernels/nth_element_op.cc
+++ b/tensorflow/core/kernels/nth_element_op.cc
@@ -95,7 +95,8 @@ struct NthElementFunctor<CPUDevice, T> {
    const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1);

    // Allocate each row to different shard.
-    auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) {
+    auto SubNthElement = [&, input, output, last_dim, n](int64 start,
+                                                         int64 limit) {
      // std::nth_element would rearrange the array, so we need a new buffer.
      std::vector<T> buf(last_dim);


--- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
+++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
@@ -69,8 +69,8 @@ struct TruncatedNormalFunctor<CPUDevice, T> {

    auto DoWork = [samples_per_batch, num_elements, &ctx, &means, &stddevs,
                   &minvals, &maxvals, &gen, &output,
-                   kStdDevsInsideBoundsToUseRandnSampler](int start_batch,
-                                                          int limit_batch) {
+                   kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch,
+                                                          int64 limit_batch) {
      // Capturing "gen" by-value would only make a copy for the _shared_
      // lambda.  Since we want to let each worker have its own copy, we pass
      // "gen" by reference and explicitly do a copy assignment here.

--- a/tensorflow/core/kernels/random_binomial_op.cc
+++ b/tensorflow/core/kernels/random_binomial_op.cc
@@ -182,7 +182,7 @@ struct RandomBinomialFunctor<CPUDevice, T, U> {
    // the sample shape and [H1, ... Hm] for the batch shape of the samples.
    // We have B1 * ... * Bk samples per batch member we need.
    auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs,
-                   &gen, &output](int start_output, int limit_output) {
+                   &gen, &output](int64 start_output, int64 limit_output) {
      // Vectorized intermediate calculations for uniform rejection sampling.
      // We always generate at most 4 samples.
      Eigen::array<T, 4> z;

--- a/tensorflow/core/kernels/random_poisson_op.cc
+++ b/tensorflow/core/kernels/random_poisson_op.cc
@@ -97,7 +97,7 @@ struct PoissonFunctor<CPUDevice, T, U> {
    typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;

    auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
-                      int start_output, int limit_output) {
+                      int64 start_output, int64 limit_output) {
      // Capturing "rng" by value would only make a copy for the _shared_
      // lambda.  Since we want to let each worker have its own copy, we pass
      // "rng" by reference and explicitly do a copy assignment.

--- a/tensorflow/core/kernels/stateless_random_ops.cc
+++ b/tensorflow/core/kernels/stateless_random_ops.cc
@@ -252,7 +252,7 @@ class StatelessRandomGammaOp : public StatelessRandomOpBase {
    // avoid a couple flops which can be done on a per-alpha basis.

    auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat,
-                   alpha_flat](int start_output, int limit_output) {
+                   alpha_flat](int64 start_output, int64 limit_output) {
      // Capturing "random" by-value would only make a copy for the _shared_
      // lambda.  Since we want to let each worker have its own copy, we pass
      // "random" by reference and explicitly do a copy assignment.

--- a/tensorflow/core/kernels/topk_op.cc
+++ b/tensorflow/core/kernels/topk_op.cc
@@ -136,7 +136,7 @@ struct TopKFunctor<CPUDevice, T> {
      return Status::OK();
    }

-    auto SortIndices = [&](int start_batch, int limit_batch) {
+    auto SortIndices = [&](int64 start_batch, int64 limit_batch) {
      for (int32 b = start_batch; b < limit_batch; ++b) {
        const T* input_data = &input(b, 0);
        const auto stable_comp = [input_data](const int32 a, const int32 b) {