diff --git a/benchmark/fluid/machine_translation.py b/benchmark/fluid/machine_translation.py index d7a421c10979c3b9d6865a8c0b99a6410e0f46a8..adde5f21acd4e77d58a453d6868abeccfca4bb5a 100644 --- a/benchmark/fluid/machine_translation.py +++ b/benchmark/fluid/machine_translation.py @@ -21,7 +21,7 @@ import argparse import time import distutils.util -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework diff --git a/benchmark/fluid/mnist.py b/benchmark/fluid/mnist.py index dc10ac2ec195acc9a5693718141ddb32417dfb71..1e2185dfac1072d1f1046f4616a9d53a8fc76061 100644 --- a/benchmark/fluid/mnist.py +++ b/benchmark/fluid/mnist.py @@ -20,7 +20,7 @@ import numpy as np import argparse import time -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.profiler as profiler diff --git a/benchmark/fluid/resnet.py b/benchmark/fluid/resnet.py index 1af5eaf6b46be47cb6b778cedcf53830c201ef39..831fa2c019fc2868cd85b1ca7b2c8c76a2f1628c 100644 --- a/benchmark/fluid/resnet.py +++ b/benchmark/fluid/resnet.py @@ -23,7 +23,7 @@ import time import cProfile, pstats, StringIO -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.profiler as profiler diff --git a/benchmark/fluid/stacked_dynamic_lstm.py b/benchmark/fluid/stacked_dynamic_lstm.py index 5fcbdd64af9dc196c9d5b2b82ce4213478ea1418..73bcc47b4d404af2c01d61ca3dfb11971bbcfe9c 100644 --- a/benchmark/fluid/stacked_dynamic_lstm.py +++ b/benchmark/fluid/stacked_dynamic_lstm.py @@ -23,10 +23,10 @@ import random import time import numpy -import paddle.v2 as paddle -import paddle.v2.dataset.imdb as imdb +import paddle +import paddle.dataset.imdb as imdb import paddle.fluid as fluid -from paddle.v2 import batch +import paddle.batch as batch import paddle.fluid.profiler as profiler diff --git a/benchmark/fluid/vgg.py b/benchmark/fluid/vgg.py index 9d990eff62ec368dc7033f55cc0862fa974a64e0..53e34e0cbd15914791c305db6797f826ebfae34e 100644 --- a/benchmark/fluid/vgg.py +++ b/benchmark/fluid/vgg.py @@ -17,7 +17,7 @@ from __future__ import print_function import sys import time import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import argparse diff --git a/doc/v2/api/data/dataset.rst b/doc/v2/api/data/dataset.rst index 02e41564b1e48c07da6ac071fc4b60089169e05a..e7c8be4452bf55e0967d750c2e624e8e316e9330 100644 --- a/doc/v2/api/data/dataset.rst +++ b/doc/v2/api/data/dataset.rst @@ -1,82 +1,82 @@ Dataset ======= -.. automodule:: paddle.v2.dataset +.. automodule:: paddle.dataset :members: :noindex: mnist +++++ -.. automodule:: paddle.v2.dataset.mnist +.. automodule:: paddle.dataset.mnist :members: :noindex: cifar +++++ -.. automodule:: paddle.v2.dataset.cifar +.. automodule:: paddle.dataset.cifar :members: :noindex: conll05 +++++++ -.. automodule:: paddle.v2.dataset.conll05 +.. automodule:: paddle.dataset.conll05 :members: get_dict,get_embedding,test :noindex: imdb ++++ -.. automodule:: paddle.v2.dataset.imdb +.. automodule:: paddle.dataset.imdb :members: :noindex: imikolov ++++++++ -.. automodule:: paddle.v2.dataset.imikolov +.. automodule:: paddle.dataset.imikolov :members: :noindex: movielens +++++++++ -.. automodule:: paddle.v2.dataset.movielens +.. automodule:: paddle.dataset.movielens :members: :noindex: -.. autoclass:: paddle.v2.dataset.movielens.MovieInfo +.. autoclass:: paddle.dataset.movielens.MovieInfo :noindex: - -.. autoclass:: paddle.v2.dataset.movielens.UserInfo + +.. autoclass:: paddle.dataset.movielens.UserInfo :noindex: sentiment +++++++++ -.. automodule:: paddle.v2.dataset.sentiment +.. automodule:: paddle.dataset.sentiment :members: :noindex: uci_housing +++++++++++ -.. automodule:: paddle.v2.dataset.uci_housing +.. automodule:: paddle.dataset.uci_housing :members: :noindex: wmt14 +++++ -.. automodule:: paddle.v2.dataset.wmt14 +.. automodule:: paddle.dataset.wmt14 :members: :noindex: wmt16 +++++ -.. automodule:: paddle.v2.dataset.wmt16 +.. automodule:: paddle.dataset.wmt16 :members: :noindex: diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt index 845528860f91d0b479bb3c4dbbe05e32c68dc16f..3106978eb0149b14849dfd1aaad8bbe76791f2f6 100644 --- a/paddle/fluid/operators/reader/CMakeLists.txt +++ b/paddle/fluid/operators/reader/CMakeLists.txt @@ -23,5 +23,7 @@ reader_library(create_recordio_file_reader_op SRCS create_recordio_file_reader_o reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc) reader_library(create_multi_pass_reader_op SRCS create_multi_pass_reader_op.cc) reader_library(create_threaded_reader_op SRCS create_threaded_reader_op.cc) + +cc_test(reader_blocking_queue_test SRCS reader_blocking_queue_test.cc) # Export local libraries to parent set(READER_LIBRARY ${LOCAL_READER_LIBS} PARENT_SCOPE) diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..71684b14176edc8f71efbefa9a7decffc8f3011e --- /dev/null +++ b/paddle/fluid/operators/reader/blocking_queue.h @@ -0,0 +1,112 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include // NOLINT +#include + +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace reader { + +template +class BlockingQueue { + // BlockingQueue is for buffered reading and is supposed to use only the + // reader package. It is true that we could and we should have been using + // framework::Channel, but which has currently a deadlock bug. BlockingQueue + // is a workaround and a simplified version of framework::Channel as it + // doesn't support GPU and it implements on buffered blocking queue. + public: + explicit BlockingQueue(size_t capacity) + : capacity_(capacity), closed_(false) { + PADDLE_ENFORCE_GT( + capacity_, 0, + "The capacity of a reader::BlockingQueue must be greater than 0."); + } + + bool Send(const T& elem) { + std::unique_lock lock(mutex_); + send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; }); + if (closed_) { + VLOG(5) + << "WARNING: Sending an element to a closed reader::BlokcingQueue."; + return false; + } + PADDLE_ENFORCE_LT(queue_.size(), capacity_); + queue_.push_back(elem); + receive_cv_.notify_one(); + return true; + } + + bool Send(T&& elem) { + std::unique_lock lock(mutex_); + send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; }); + if (closed_) { + VLOG(5) + << "WARNING: Sending an element to a closed reader::BlokcingQueue."; + return false; + } + PADDLE_ENFORCE_LT(queue_.size(), capacity_); + queue_.emplace_back(std::move(elem)); + receive_cv_.notify_one(); + return true; + } + + bool Receive(T* elem) { + std::unique_lock lock(mutex_); + receive_cv_.wait(lock, [&] { return !queue_.empty() || closed_; }); + if (!queue_.empty()) { + PADDLE_ENFORCE_NOT_NULL(elem); + *elem = queue_.front(); + queue_.pop_front(); + send_cv_.notify_one(); + return true; + } else { + PADDLE_ENFORCE(closed_); + return false; + } + } + + void Close() { + std::lock_guard lock(mutex_); + closed_ = true; + send_cv_.notify_all(); + receive_cv_.notify_all(); + } + + bool IsClosed() { + std::lock_guard lock(mutex_); + return closed_; + } + + size_t Cap() { + std::lock_guard lock(mutex_); + return capacity_; + } + + private: + size_t capacity_; + bool closed_; + std::deque queue_; + + std::mutex mutex_; + std::condition_variable receive_cv_; + std::condition_variable send_cv_; +}; +} // namespace reader +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc index 4372f23fc1dbd85e43b04a9d644977392316c2e9..3fdc31dfa5242b6487c308d395d70d7ff348bc73 100644 --- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc +++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc @@ -14,7 +14,7 @@ #include // NOLINT -#include "paddle/fluid/framework/channel.h" +#include "paddle/fluid/operators/reader/blocking_queue.h" #include "paddle/fluid/operators/reader/reader_op_registry.h" namespace paddle { @@ -23,13 +23,13 @@ namespace reader { // 'Double buffer' means we shall maintain two batches of input data at the same // time. So the kCacheSize shoul be at least 2. -static constexpr size_t kCacheSize = 2; +static constexpr size_t kCacheSize = 3; // There will be two bacthes out of the channel during training: // 1. the one waiting to be sent to the channel // 2. the one just be received from the channel, which is also being used by // subsequent operators. // So the channel size should be kChacheSize - 2 -static constexpr size_t kChannelSize = 0; // kCacheSize - 2 +static constexpr size_t kChannelSize = 1; // kCacheSize - 2 class DoubleBufferReader : public framework::DecoratedReader { public: @@ -55,10 +55,8 @@ class DoubleBufferReader : public framework::DecoratedReader { ~DoubleBufferReader() { EndPrefetcher(); } private: - bool HasNext() const; - void StartPrefetcher() { - channel_ = framework::MakeChannel(kChannelSize); + channel_ = new reader::BlockingQueue(kChannelSize); prefetcher_ = std::thread([this] { PrefetchThreadFunc(); }); } @@ -74,7 +72,7 @@ class DoubleBufferReader : public framework::DecoratedReader { void PrefetchThreadFunc(); std::thread prefetcher_; - framework::Channel* channel_; + reader::BlockingQueue* channel_; platform::Place place_; std::vector> cpu_tensor_cache_; std::vector> gpu_tensor_cache_; @@ -139,17 +137,16 @@ class CreateDoubleBufferReaderOpMaker : public DecoratedReaderMakerBase { }; void DoubleBufferReader::ReadNext(std::vector* out) { - out->clear(); - if (HasNext()) { - size_t cached_tensor_id; - channel_->Receive(&cached_tensor_id); + size_t cached_tensor_id; + if (channel_->Receive(&cached_tensor_id)) { if (platform::is_gpu_place(place_)) { *out = gpu_tensor_cache_[cached_tensor_id]; - ctxs_[cached_tensor_id]->Wait(); } else { // CPU place *out = cpu_tensor_cache_[cached_tensor_id]; } + } else { + out->clear(); } } @@ -159,12 +156,6 @@ void DoubleBufferReader::ReInit() { StartPrefetcher(); } -bool DoubleBufferReader::HasNext() const { - while (!channel_->IsClosed() && !channel_->CanReceive()) { - } - return channel_->CanReceive(); -} - void DoubleBufferReader::PrefetchThreadFunc() { VLOG(5) << "A new prefetch thread starts."; size_t cached_tensor_id = 0; @@ -185,10 +176,7 @@ void DoubleBufferReader::PrefetchThreadFunc() { gpu_batch[i].set_lod(cpu_batch[i].lod()); } } - try { - size_t tmp = cached_tensor_id; - channel_->Send(&tmp); - } catch (paddle::platform::EnforceNotMet e) { + if (!channel_->Send(cached_tensor_id)) { VLOG(5) << "WARNING: The double buffer channel has been closed. The " "prefetch thread will terminate."; break; diff --git a/paddle/fluid/operators/reader/open_files_op.cc b/paddle/fluid/operators/reader/open_files_op.cc index 779dc8a6a0deb7792e0540071e3a2588102fa708..91ad7d56583446ee4686e74187de166f387125df 100644 --- a/paddle/fluid/operators/reader/open_files_op.cc +++ b/paddle/fluid/operators/reader/open_files_op.cc @@ -14,7 +14,7 @@ #include // NOLINT -#include "paddle/fluid/framework/channel.h" +#include "paddle/fluid/operators/reader/blocking_queue.h" #include "paddle/fluid/operators/reader/reader_op_registry.h" namespace paddle { @@ -37,7 +37,6 @@ class MultiFileReader : public framework::ReaderBase { ~MultiFileReader() { EndScheduler(); } private: - bool HasNext(); void StartNewScheduler(); void EndScheduler(); void ScheduleThreadFunc(); @@ -48,15 +47,14 @@ class MultiFileReader : public framework::ReaderBase { std::thread scheduler_; std::vector prefetchers_; size_t buffer_size_; - framework::Channel* waiting_file_idx_; - framework::Channel* available_thread_idx_; - framework::Channel>* buffer_; + reader::BlockingQueue* waiting_file_idx_; + reader::BlockingQueue* available_thread_idx_; + reader::BlockingQueue>* buffer_; }; void MultiFileReader::ReadNext(std::vector* out) { - out->clear(); - if (HasNext()) { - buffer_->Receive(out); + if (!buffer_->Receive(out)) { + out->clear(); } } @@ -65,25 +63,19 @@ void MultiFileReader::ReInit() { StartNewScheduler(); } -bool MultiFileReader::HasNext() { - while (!buffer_->IsClosed() && !buffer_->CanReceive()) { - } - return buffer_->CanReceive(); -} - void MultiFileReader::StartNewScheduler() { size_t thread_num = prefetchers_.size(); - waiting_file_idx_ = framework::MakeChannel(file_names_.size()); - available_thread_idx_ = framework::MakeChannel(thread_num); - buffer_ = - framework::MakeChannel>(buffer_size_); + waiting_file_idx_ = new reader::BlockingQueue(file_names_.size()); + available_thread_idx_ = new reader::BlockingQueue(thread_num); + buffer_ = new reader::BlockingQueue>( + buffer_size_); for (size_t i = 0; i < file_names_.size(); ++i) { - waiting_file_idx_->Send(&i); + waiting_file_idx_->Send(i); } waiting_file_idx_->Close(); for (size_t i = 0; i < thread_num; ++i) { - available_thread_idx_->Send(&i); + available_thread_idx_->Send(i); } scheduler_ = std::thread([this] { ScheduleThreadFunc(); }); @@ -149,7 +141,7 @@ void MultiFileReader::PrefetchThreadFunc(std::string file_name, break; } try { - buffer_->Send(&ins); + buffer_->Send(std::move(ins)); } catch (paddle::platform::EnforceNotMet e) { VLOG(5) << "WARNING: The buffer channel has been closed. The prefetch " "thread of file '" @@ -158,9 +150,7 @@ void MultiFileReader::PrefetchThreadFunc(std::string file_name, } } - try { - available_thread_idx_->Send(&thread_idx); - } catch (paddle::platform::EnforceNotMet e) { + if (!available_thread_idx_->Send(thread_idx)) { VLOG(5) << "WARNING: The available_thread_idx_ channel has been closed. " "Fail to send thread_idx."; } diff --git a/paddle/fluid/operators/reader/reader_blocking_queue_test.cc b/paddle/fluid/operators/reader/reader_blocking_queue_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..7d1b381d56c8cdc1e79e594b18c1a1ed59ab5284 --- /dev/null +++ b/paddle/fluid/operators/reader/reader_blocking_queue_test.cc @@ -0,0 +1,219 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include // NOLINT +#include +#include // NOLINT +#include +#include "gtest/gtest.h" + +#include "paddle/fluid/operators/reader/blocking_queue.h" + +using paddle::operators::reader::BlockingQueue; + +TEST(BlockingQueue, CapacityTest) { + size_t cap = 10; + BlockingQueue q(cap); + EXPECT_EQ(q.Cap(), cap); +} + +void FirstInFirstOut(size_t queue_cap, size_t elem_num, size_t send_time_gap, + size_t receive_time_gap) { + BlockingQueue q(queue_cap); + std::thread sender([&]() { + for (size_t i = 0; i < elem_num; ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(send_time_gap)); + EXPECT_TRUE(q.Send(i)); + } + q.Close(); + }); + size_t count = 0; + while (true) { + std::this_thread::sleep_for(std::chrono::milliseconds(receive_time_gap)); + size_t elem; + if (!q.Receive(&elem)) { + break; + } + EXPECT_EQ(elem, count++); + } + sender.join(); + EXPECT_EQ(count, elem_num); + EXPECT_TRUE(q.IsClosed()); +} + +TEST(BlockingQueue, FirstInFirstOutTest) { + FirstInFirstOut(2, 5, 2, 50); + FirstInFirstOut(2, 5, 50, 2); + FirstInFirstOut(10, 3, 50, 2); + FirstInFirstOut(10, 3, 2, 50); +} + +TEST(BlockingQueue, SenderBlockingTest) { + const size_t queue_cap = 2; + BlockingQueue q(queue_cap); + size_t send_count = 0; + std::thread sender([&]() { + for (size_t i = 0; i < 5; ++i) { + if (!q.Send(i)) { + break; + } + ++send_count; + } + }); + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + q.Close(); + sender.join(); + EXPECT_EQ(send_count, queue_cap); + std::vector res; + while (true) { + size_t elem; + if (!q.Receive(&elem)) { + break; + } + res.push_back(elem); + } + EXPECT_EQ(res.size(), queue_cap); + for (size_t i = 0; i < res.size(); ++i) { + EXPECT_EQ(res[i], i); + } +} + +TEST(BlockingQueue, ReceiverBlockingTest) { + const size_t queue_cap = 5; + BlockingQueue q(queue_cap); + std::vector receive_res; + std::thread receiver([&]() { + size_t elem; + while (true) { + if (!q.Receive(&elem)) { + break; + } + receive_res.push_back(elem); + } + }); + std::vector to_send{2, 1, 7}; + for (auto e : to_send) { + q.Send(e); + } + q.Close(); + receiver.join(); + EXPECT_EQ(receive_res.size(), to_send.size()); + for (size_t i = 0; i < to_send.size(); ++i) { + EXPECT_EQ(receive_res[i], to_send[i]); + } +} + +void CheckIsUnorderedSame(const std::vector>& v1, + const std::vector>& v2) { + std::set s1; + std::set s2; + for (auto vec : v1) { + for (size_t elem : vec) { + s1.insert(elem); + } + } + for (auto vec : v2) { + for (size_t elem : vec) { + s2.insert(elem); + } + } + EXPECT_EQ(s1.size(), s2.size()); + auto it1 = s1.begin(); + auto it2 = s2.begin(); + while (it1 != s1.end()) { + EXPECT_EQ(*it1, *it2); + ++it1; + ++it2; + } +} + +void MultiSenderMultiReceiver(const size_t queue_cap, + const std::vector>& to_send, + size_t receiver_num, size_t send_time_gap, + size_t receive_time_gap) { + BlockingQueue q(queue_cap); + size_t sender_num = to_send.size(); + std::vector senders; + for (size_t s_idx = 0; s_idx < sender_num; ++s_idx) { + senders.emplace_back(std::thread([&, s_idx] { + for (size_t elem : to_send[s_idx]) { + std::this_thread::sleep_for(std::chrono::milliseconds(send_time_gap)); + EXPECT_TRUE(q.Send(elem)); + } + })); + } + std::vector receivers; + std::mutex mu; + std::vector> res; + for (size_t r_idx = 0; r_idx < receiver_num; ++r_idx) { + receivers.emplace_back(std::thread([&] { + std::vector receiver_res; + while (true) { + std::this_thread::sleep_for( + std::chrono::milliseconds(receive_time_gap)); + size_t elem; + if (!q.Receive(&elem)) { + break; + } + receiver_res.push_back(elem); + } + std::lock_guard lock(mu); + res.push_back(receiver_res); + })); + } + for (auto& t : senders) { + t.join(); + } + q.Close(); + for (auto& t : receivers) { + t.join(); + } + CheckIsUnorderedSame(to_send, res); +} + +TEST(BlockingQueue, MultiSenderMultiReaderTest) { + std::vector> to_send_1{{2, 3, 4}, {9}, {0, 7, 15, 6}}; + MultiSenderMultiReceiver(2, to_send_1, 2, 0, 0); + MultiSenderMultiReceiver(10, to_send_1, 2, 0, 0); + MultiSenderMultiReceiver(2, to_send_1, 20, 0, 0); + MultiSenderMultiReceiver(2, to_send_1, 2, 50, 0); + MultiSenderMultiReceiver(2, to_send_1, 2, 0, 50); + + std::vector> to_send_2{ + {2, 3, 4}, {}, {0, 7, 15, 6, 9, 32}}; + MultiSenderMultiReceiver(2, to_send_2, 3, 0, 0); + MultiSenderMultiReceiver(20, to_send_2, 3, 0, 0); + MultiSenderMultiReceiver(2, to_send_2, 30, 0, 0); + MultiSenderMultiReceiver(2, to_send_2, 3, 50, 0); + MultiSenderMultiReceiver(2, to_send_2, 3, 0, 50); +} + +struct MyClass { + MyClass() : val_(0) {} + explicit MyClass(int val) : val_(val) {} + MyClass(const MyClass& b) { val_ = b.val_; } + MyClass(MyClass&& b) { val_ = b.val_; } + void operator=(const MyClass& b) { val_ = b.val_; } + + int val_; +}; + +TEST(BlockingQueue, MyClassTest) { + BlockingQueue q(2); + MyClass a(200); + q.Send(std::move(a)); + MyClass b; + q.Receive(&b); + EXPECT_EQ(a.val_, b.val_); +} diff --git a/paddle/fluid/platform/dynload/cublas.h b/paddle/fluid/platform/dynload/cublas.h index 1ab55d6b9bf8fdbd14c9c2bd978e3e99dba3e73e..81acaff87d3c2025cf0d6185a1590b018bfbd83c 100644 --- a/paddle/fluid/platform/dynload/cublas.h +++ b/paddle/fluid/platform/dynload/cublas.h @@ -14,10 +14,12 @@ #pragma once +#include #include #include #include #include // NOLINT +#include #include "paddle/fluid/platform/dynload/dynamic_loader.h" namespace paddle { @@ -37,14 +39,14 @@ extern void *cublas_dso_handle; #ifdef PADDLE_USE_DSO #define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ struct DynLoad__##__name { \ + using FUNC_TYPE = decltype(&::__name); \ template \ inline cublasStatus_t operator()(Args... args) { \ - typedef cublasStatus_t (*cublasFunc)(Args...); \ std::call_once(cublas_dso_flag, []() { \ cublas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \ }); \ void *p_##__name = dlsym(cublas_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ + return reinterpret_cast(p_##__name)(args...); \ } \ }; \ extern DynLoad__##__name __name @@ -71,8 +73,8 @@ extern void *cublas_dso_handle; __macro(cublasDgemm_v2); \ __macro(cublasHgemm); \ __macro(cublasSgemmEx); \ - __macro(cublasSgeam_v2); \ - __macro(cublasDgeam_v2); \ + __macro(cublasSgeam); \ + __macro(cublasDgeam); \ __macro(cublasCreate_v2); \ __macro(cublasDestroy_v2); \ __macro(cublasSetStream_v2); \ diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 24475b62ca2825c45ff7edb39328dece3b822b25..34d83e395694f55eafca74d63ebf363169ab30e8 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); struct DynLoad__##__name { \ template \ auto operator()(Args... args) -> decltype(__name(args...)) { \ - using cudnn_func = decltype(__name(args...)) (*)(Args...); \ + using cudnn_func = decltype(&::__name); \ std::call_once(cudnn_dso_flag, []() { \ cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \ }); \ diff --git a/paddle/fluid/platform/dynload/cupti.h b/paddle/fluid/platform/dynload/cupti.h index d0d676b9d8ac462900b48246bec43166d04ef97b..e64de7c20fc9d145e51cfc4528e321b3c4ec86c8 100644 --- a/paddle/fluid/platform/dynload/cupti.h +++ b/paddle/fluid/platform/dynload/cupti.h @@ -41,7 +41,7 @@ extern void *cupti_dso_handle; struct DynLoad__##__name { \ template \ inline CUptiResult CUPTIAPI operator()(Args... args) { \ - typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...); \ + using cuptiFunc = decltype(&::__name); \ std::call_once(cupti_dso_flag, []() { \ cupti_dso_handle = paddle::platform::dynload::GetCUPTIDsoHandle(); \ }); \ diff --git a/paddle/fluid/platform/dynload/curand.h b/paddle/fluid/platform/dynload/curand.h index 4697fb6cd96770127206bdabeea77e43eb09d1f5..46ad4379d5f9572d415ef1d747077217ae29391e 100644 --- a/paddle/fluid/platform/dynload/curand.h +++ b/paddle/fluid/platform/dynload/curand.h @@ -30,7 +30,7 @@ extern void *curand_dso_handle; struct DynLoad__##__name { \ template \ curandStatus_t operator()(Args... args) { \ - typedef curandStatus_t (*curandFunc)(Args...); \ + using curandFunc = decltype(&::__name); \ std::call_once(curand_dso_flag, []() { \ curand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle(); \ }); \ diff --git a/paddle/fluid/platform/dynload/nccl.h b/paddle/fluid/platform/dynload/nccl.h index c5a10a78a4f432b431680c089f255fea777277cb..37902ae20c5d9d64486232bbd468375c4a50a615 100644 --- a/paddle/fluid/platform/dynload/nccl.h +++ b/paddle/fluid/platform/dynload/nccl.h @@ -33,7 +33,7 @@ extern void* nccl_dso_handle; struct DynLoad__##__name { \ template \ auto operator()(Args... args) -> decltype(__name(args...)) { \ - using nccl_func = decltype(__name(args...)) (*)(Args...); \ + using nccl_func = decltype(&::__name); \ std::call_once(nccl_dso_flag, []() { \ nccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \ }); \ diff --git a/paddle/fluid/platform/dynload/warpctc.h b/paddle/fluid/platform/dynload/warpctc.h index 7fa468370463a51c486b80317f401612930bc72e..7c70649d21c547beb824576d4a8ecf6219a9bddf 100644 --- a/paddle/fluid/platform/dynload/warpctc.h +++ b/paddle/fluid/platform/dynload/warpctc.h @@ -36,7 +36,7 @@ extern void* warpctc_dso_handle; struct DynLoad__##__name { \ template \ auto operator()(Args... args) -> decltype(__name(args...)) { \ - using warpctcFunc = decltype(__name(args...)) (*)(Args...); \ + using warpctcFunc = decltype(&::__name); \ std::call_once(warpctc_dso_flag, []() { \ warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \ }); \ diff --git a/paddle/scripts/paddle_docker_build.sh b/paddle/scripts/paddle_docker_build.sh index 53df94980fdd3c9fdaa4cf077880a8f7737bbd8a..252227ef88abbe238686dd5d7672e57ad68dab7e 100755 --- a/paddle/scripts/paddle_docker_build.sh +++ b/paddle/scripts/paddle_docker_build.sh @@ -75,6 +75,7 @@ function main() { build_android) start_build_docker docker exec ${CONTAINER_ID} bash -c "./paddle/scripts/paddle_build.sh $@" + ;; *) if container_running "${CONTAINER_ID}"; then docker exec ${CONTAINER_ID} bash -c "./paddle/scripts/paddle_build.sh $@"