Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b7df7f9e
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b7df7f9e
编写于
11月 03, 2017
作者:
L
Luo Tao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove usused ProtoDataProvider related codes
上级
8b30e2ab
变更
8
展开全部
隐藏空白更改
内联
并排
Showing
8 changed file
with
1 addition
and
1862 deletion
+1
-1862
paddle/gserver/CMakeLists.txt
paddle/gserver/CMakeLists.txt
+0
-1
paddle/gserver/dataproviders/DataProvider.cpp
paddle/gserver/dataproviders/DataProvider.cpp
+1
-3
paddle/gserver/dataproviders/ProtoDataProvider.cpp
paddle/gserver/dataproviders/ProtoDataProvider.cpp
+0
-932
paddle/gserver/dataproviders/ProtoDataProvider.h
paddle/gserver/dataproviders/ProtoDataProvider.h
+0
-179
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+0
-11
paddle/gserver/tests/proto_files.txt
paddle/gserver/tests/proto_files.txt
+0
-2
paddle/gserver/tests/proto_files_compressed.txt
paddle/gserver/tests/proto_files_compressed.txt
+0
-2
paddle/gserver/tests/test_ProtoDataProvider.cpp
paddle/gserver/tests/test_ProtoDataProvider.cpp
+0
-732
未找到文件。
paddle/gserver/CMakeLists.txt
浏览文件 @
b7df7f9e
...
@@ -73,7 +73,6 @@ if(MOBILE_INFERENCE)
...
@@ -73,7 +73,6 @@ if(MOBILE_INFERENCE)
list
(
REMOVE_ITEM GSERVER_SOURCES
list
(
REMOVE_ITEM GSERVER_SOURCES
dataproviders/DataProvider.cpp
dataproviders/DataProvider.cpp
dataproviders/MultiDataProvider.cpp
dataproviders/MultiDataProvider.cpp
dataproviders/ProtoDataProvider.cpp
dataproviders/PyDataProvider2.cpp
dataproviders/PyDataProvider2.cpp
dataproviders/PyDataProvider.cpp
)
dataproviders/PyDataProvider.cpp
)
...
...
paddle/gserver/dataproviders/DataProvider.cpp
浏览文件 @
b7df7f9e
...
@@ -16,8 +16,8 @@ limitations under the License. */
...
@@ -16,8 +16,8 @@ limitations under the License. */
#include <unistd.h>
#include <unistd.h>
#include <algorithm>
#include <algorithm>
#include "ProtoDataProvider.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/Util.h"
...
@@ -164,8 +164,6 @@ DataProvider* DataProvider::create(const DataConfig& config,
...
@@ -164,8 +164,6 @@ DataProvider* DataProvider::create(const DataConfig& config,
REGISTER_DATA_PROVIDER
(
simple
,
SimpleDataProvider
);
REGISTER_DATA_PROVIDER
(
simple
,
SimpleDataProvider
);
REGISTER_DATA_PROVIDER
(
dummy
,
DummyDataProvider
);
REGISTER_DATA_PROVIDER
(
dummy
,
DummyDataProvider
);
REGISTER_DATA_PROVIDER
(
proto
,
ProtoDataProvider
);
REGISTER_DATA_PROVIDER
(
proto_sequence
,
ProtoSequenceDataProvider
);
int64_t
DataProvider
::
getNextBatch
(
int64_t
size
,
DataBatch
*
batch
)
{
int64_t
DataProvider
::
getNextBatch
(
int64_t
size
,
DataBatch
*
batch
)
{
int64_t
batchSize
=
doubleBuffer_
?
getNextBatchFromBuffer
(
size
,
batch
)
int64_t
batchSize
=
doubleBuffer_
?
getNextBatchFromBuffer
(
size
,
batch
)
...
...
paddle/gserver/dataproviders/ProtoDataProvider.cpp
已删除
100644 → 0
浏览文件 @
8b30e2ab
此差异已折叠。
点击以展开。
paddle/gserver/dataproviders/ProtoDataProvider.h
已删除
100644 → 0
浏览文件 @
8b30e2ab
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "DataFormat.pb.h"
#include "paddle/utils/Stat.h"
#include "DataProvider.h"
#include "ProtoReader.h"
namespace
paddle
{
/**
* @brief Provider data from protobuf data file with each sample
* specified by proto message
*
* DataSample defined in DataFormat.proto.
*
* The file format is
*
* header
*
* sample1
*
* sample2
*
* ...
*
* sampleN
*
* @note: In the data file, each message is prefixed with its length.
* The read/write of the protbuf are implemented in ProtoReader.h
*/
class
ProtoDataProvider
:
public
DataProvider
{
public:
ProtoDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
,
bool
loadDataAll
=
true
);
virtual
void
reset
();
/**
* @note this size includes the sequences which are skipped because they
* are longer than the batch size.
*/
virtual
int64_t
getSize
()
{
int64_t
size
=
sampleNums_
;
if
(
usageRatio_
<
1.0
f
)
{
size
=
static_cast
<
int64_t
>
(
size
*
usageRatio_
);
}
return
size
;
}
virtual
void
shuffle
();
void
loadData
(
const
std
::
vector
<
std
::
string
>&
fileList
);
virtual
int64_t
getNextBatchInternal
(
int64_t
size
,
DataBatch
*
batch
);
protected:
/**
* @brief load protobuf data from a list of file
* @param[in] fileName file name of a file which contains
* a list of file names
*/
void
loadData
(
const
std
::
string
&
fileName
);
/**
* @brief load protobuf data from file
* @param[in] fileName data file name
*/
void
loadDataFile
(
const
std
::
string
&
fileName
);
/** @brief check data header of each data sample
* @param[in] header data header read from protobuf data
*/
void
checkDataHeader
(
const
DataHeader
&
header
);
/**
* @brief fill protobuf data into slot_,
* slot_ is a vector of ProtoSlot in memory.
* @param[in] sample data sample read from protobuf data
*/
void
fillSlots
(
const
DataSample
&
sample
);
/**
* @brief return true if each sample is one sequence, i.e., independent
* of other samples.
*/
inline
bool
iidData
()
const
{
return
sequenceStartPositions_
.
empty
();
}
/**
* @brief check that sample is consistent with header_
*/
void
checkSample
(
const
DataSample
&
sample
);
template
<
class
Op
>
int64_t
sequenceLoop
(
Op
op
,
int64_t
size
);
template
<
class
Op
>
int64_t
sampleLoop
(
Op
op
,
int64_t
size
);
template
<
class
Op
>
int64_t
subSampleLoop
(
Op
op
,
int64_t
size
,
int
slot
);
void
showDataStats
();
protected:
struct
ProtoVarSlot
{
std
::
vector
<
real
>
data
;
std
::
vector
<
int
>
dims
;
};
struct
ProtoSlot
{
SlotDef
::
SlotType
type
;
int
dim
;
std
::
vector
<
int
>
indexData
;
std
::
vector
<
real
>
denseData
;
std
::
vector
<
sparse_non_value_t
>
sparseNonValueData
;
std
::
vector
<
sparse_float_value_t
>
sparseFloatValueData
;
std
::
vector
<
int64_t
>
indices
;
std
::
vector
<
int64_t
>
subIndices
;
std
::
vector
<
ProtoVarSlot
>
varDenseData
;
std
::
vector
<
std
::
vector
<
int
>>
varIndices
;
std
::
vector
<
std
::
string
>
strData
;
};
DataHeader
header_
;
int
numVecSlots_
;
std
::
vector
<
ProtoSlot
>
slots_
;
size_t
sampleNums_
;
/**
* The starting position of each sequence in samples.
* The last element should be num of samples.
* If empty, each sample is one sequence.
*/
std
::
vector
<
size_t
>
sequenceStartPositions_
;
int64_t
currentSequenceIndex_
;
// The size should be the number of sequences.
std
::
vector
<
size_t
>
shuffledSequenceIds_
;
ThreadLocalD
<
DataBatch
>
cpuBatch_
;
ThreadLocalD
<
DataBatch
>
gpuBatch_
;
RWLock
lock_
;
std
::
vector
<
StatPtr
>
nnzStats_
;
// stats for number of none-zeros entries
};
/**
* @brief Special use for Proto data: instances should contain sparse-non-value
* slots
* and label.
*
* @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
*/
class
ProtoSequenceDataProvider
:
public
ProtoDataProvider
{
public:
ProtoSequenceDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
,
bool
loadDataAll
=
true
);
~
ProtoSequenceDataProvider
()
{}
virtual
int64_t
getNextBatchInternal
(
int64_t
size
,
DataBatch
*
batch
);
};
}
// namespace paddle
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
b7df7f9e
...
@@ -58,17 +58,6 @@ if(NOT WITH_DOUBLE)
...
@@ -58,17 +58,6 @@ if(NOT WITH_DOUBLE)
endif
()
endif
()
if
(
NOT MOBILE_INFERENCE
)
if
(
NOT MOBILE_INFERENCE
)
################### test_ProtoDataProvider ############
add_unittest_without_exec
(
test_ProtoDataProvider
test_ProtoDataProvider.cpp
)
# test_ProtoDataProvider will mkdir as same name,
# so if WORKING_DIRECTORY is default directory, then
# mkdir will get error.
add_test
(
NAME test_ProtoDataProvider
COMMAND
${
CMAKE_CURRENT_BINARY_DIR
}
/test_ProtoDataProvider
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle
)
################## test_Evaluator #######################
################## test_Evaluator #######################
add_unittest
(
test_Evaluator
add_unittest
(
test_Evaluator
test_Evaluator.cpp
)
test_Evaluator.cpp
)
...
...
paddle/gserver/tests/proto_files.txt
已删除
100644 → 0
浏览文件 @
8b30e2ab
./test_ProtoDataProvider/data1.bin
./test_ProtoDataProvider/data2.bin
paddle/gserver/tests/proto_files_compressed.txt
已删除
100644 → 0
浏览文件 @
8b30e2ab
./test_ProtoDataProvider/data1.bin.gz
./test_ProtoDataProvider/data2.bin.gz
paddle/gserver/tests/test_ProtoDataProvider.cpp
已删除
100644 → 0
浏览文件 @
8b30e2ab
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录