提交 38715e6f 编写于 作者: P peizhilin

minor fix

上级 511cc902
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
...@@ -54,7 +54,7 @@ class ParallelExecutorPrivate { ...@@ -54,7 +54,7 @@ class ParallelExecutorPrivate {
Scope *global_scope_; // not owned Scope *global_scope_; // not owned
std::unique_ptr<details::SSAGraphExecutor> executor_; std::unique_ptr<details::SSAGraphExecutor> executor_;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif #endif
bool own_local_scope_; bool own_local_scope_;
...@@ -104,7 +104,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -104,7 +104,7 @@ ParallelExecutor::ParallelExecutor(
if (member_->use_cuda_) { if (member_->use_cuda_) {
// Bcast Parameters to all GPUs // Bcast Parameters to all GPUs
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME); auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME);
ncclUniqueId *nccl_id = nullptr; ncclUniqueId *nccl_id = nullptr;
if (nccl_id_var != nullptr) { if (nccl_id_var != nullptr) {
...@@ -124,7 +124,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -124,7 +124,7 @@ ParallelExecutor::ParallelExecutor(
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert // Step 2. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp // ncclOp
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<ir::Graph> graph = build_strategy.Apply( std::unique_ptr<ir::Graph> graph = build_strategy.Apply(
main_program, member_->places_, loss_var_name, params, main_program, member_->places_, loss_var_name, params,
member_->local_scopes_, member_->use_cuda_, member_->nccl_ctxs_.get()); member_->local_scopes_, member_->use_cuda_, member_->nccl_ctxs_.get());
...@@ -213,7 +213,7 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -213,7 +213,7 @@ void ParallelExecutor::BCastParamsToDevices(
} }
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
if (paddle::platform::is_gpu_place(main_tensor.place())) { if (paddle::platform::is_gpu_place(main_tensor.place())) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::vector<void *> buffers; std::vector<void *> buffers;
size_t numel = main_tensor.numel(); size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type()); ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
......
...@@ -76,7 +76,8 @@ void TestWord2vecPrediction(const std::string& model_path) { ...@@ -76,7 +76,8 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706}; 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min((size_t)5UL, num_elements); i++) { for (size_t i = 0; i < std::min(static_cast<size_t>(5UL), num_elements);
i++) {
LOG(INFO) << "data: " LOG(INFO) << "data: "
<< static_cast<float*>(outputs.front().data.data())[i]; << static_cast<float*>(outputs.front().data.data())[i];
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i], PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
......
...@@ -86,7 +86,11 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) { ...@@ -86,7 +86,11 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
munlock(p, size); munlock(p, size);
#endif #endif
} }
#ifdef _WIN32
_aligned_free(p);
#else
free(p); free(p);
#endif
} }
bool CPUAllocator::UseGpu() const { return false; } bool CPUAllocator::UseGpu() const { return false; }
......
...@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef _WIN32
#include <unistd.h> #include <unistd.h>
#endif
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
......
...@@ -16,13 +16,12 @@ limitations under the License. */ ...@@ -16,13 +16,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_pooling.h" #include "paddle/fluid/operators/math/sequence_pooling.h"
#include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/macros.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
#define FLT_MAX __FLT_MAX__
template <typename T> template <typename T>
struct MaxPoolFunctor { struct MaxPoolFunctor {
HOSTDEVICE void operator()(const T* input, const size_t start, HOSTDEVICE void operator()(const T* input, const size_t start,
......
...@@ -20,12 +20,12 @@ limitations under the License. */ ...@@ -20,12 +20,12 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifndef _WIN32 #ifndef _WIN32
const float fraction_of_gpu_memory_to_use = 0.92f; constexpr static float fraction_of_gpu_memory_to_use = 0.92f;
#else #else
// fraction_of_gpu_memory_to_use cannot be too high on windows, // fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory // since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle // which may lead to insufficient memory left for paddle
const float fraction_of_gpu_memory_to_use = 0.5f; constexpr static float fraction_of_gpu_memory_to_use = 0.5f;
#endif #endif
DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use, DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,
......
...@@ -63,7 +63,7 @@ function(py_test_modules TARGET_NAME) ...@@ -63,7 +63,7 @@ function(py_test_modules TARGET_NAME)
set(multiValueArgs MODULES DEPS ENVS) set(multiValueArgs MODULES DEPS ENVS)
cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS} COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS}
${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if (py_test_modules_SERIAL) if (py_test_modules_SERIAL)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册