提交 baef96e1 编写于 作者: D dzhwinter 提交者: GitHub

Merge branch 'develop' into optimizer_lib2

...@@ -50,6 +50,7 @@ before_install: ...@@ -50,6 +50,7 @@ before_install:
# protobuf version. # protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
- pip install rarfile - pip install rarfile
- eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
- | - |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script: script:
......
...@@ -126,7 +126,9 @@ endif(WITH_GPU) ...@@ -126,7 +126,9 @@ endif(WITH_GPU)
add_subdirectory(proto) add_subdirectory(proto)
add_subdirectory(paddle) add_subdirectory(paddle)
add_subdirectory(go/master/c)
add_subdirectory(python) add_subdirectory(python)
add_subdirectory(go/pserver/cclient)
if(WITH_DOC) if(WITH_DOC)
add_subdirectory(doc) add_subdirectory(doc)
......
...@@ -30,7 +30,8 @@ RUN apt-get update && \ ...@@ -30,7 +30,8 @@ RUN apt-get update && \
python-numpy python-matplotlib gcc g++ \ python-numpy python-matplotlib gcc g++ \
automake locales clang-format-3.8 swig doxygen cmake \ automake locales clang-format-3.8 swig doxygen cmake \
liblapack-dev liblapacke-dev libboost-dev \ liblapack-dev liblapacke-dev libboost-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev && \ clang-3.8 llvm-3.8 libclang-3.8-dev \
net-tools && \
apt-get clean -y apt-get clean -y
# Install Go # Install Go
......
...@@ -59,7 +59,7 @@ macro(add_style_check_target TARGET_NAME) ...@@ -59,7 +59,7 @@ macro(add_style_check_target TARGET_NAME)
"--filter=${STYLE_FILTER}" "--filter=${STYLE_FILTER}"
"--write-success=${CUR_GEN}" ${filename} "--write-success=${CUR_GEN}" ${filename}
DEPENDS ${filename} DEPENDS ${filename}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif() endif()
endforeach() endforeach()
endif() endif()
......
...@@ -11,23 +11,16 @@ find_path(CUDNN_INCLUDE_DIR cudnn.h ...@@ -11,23 +11,16 @@ find_path(CUDNN_INCLUDE_DIR cudnn.h
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
if(NOT ${CMAKE_HOST_SYSTEM_PROCESSOR}) set(TARGET_ARCH "x86_64")
execute_process( if(NOT ${CMAKE_SYSTEM_PROCESSOR})
COMMAND uname -m COMMAND tr -d '\n' set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
OUTPUT_VARIABLE HOST_ARCH endif()
RESULT_VARIABLE UNAME_RESULT)
if(${UNAME_RESULT})
set(HOST_ARCH "x86_64")
endif(${UNAME_RESULT})
else(NOT ${CMAKE_HOST_SYSTEM_PROCESSOR})
set(HOST_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif(NOT ${CMAKE_HOST_SYSTEM_PROCESSOR})
list(APPEND CUDNN_CHECK_LIBRARY_DIRS list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDNN_ROOT} ${CUDNN_ROOT}
${CUDNN_ROOT}/lib64 ${CUDNN_ROOT}/lib64
${CUDNN_ROOT}/lib ${CUDNN_ROOT}/lib
${CUDNN_ROOT}/lib/${HOST_ARCH}-linux-gnu ${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu
$ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64 $ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib $ENV{CUDNN_ROOT}/lib
......
...@@ -24,20 +24,25 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -24,20 +24,25 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/${LIBRARY_PREFIX}openblas${STATIC_LIBRARY_SUFFIX}" SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/${LIBRARY_PREFIX}openblas${STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE) CACHE FILEPATH "openblas library." FORCE)
SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1) SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs)
IF(CMAKE_CROSSCOMPILING)
IF(ANDROID) IF(ANDROID)
# arm_soft_fp_abi branch of OpenBLAS to support softfp # arm_soft_fp_abi branch of OpenBLAS to support softfp
# https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0 libs) SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(RPI) ELSEIF(RPI)
# use hardfp # use hardfp
SET(OPENBLAS_COMMIT "v0.2.19") SET(OPENBLAS_COMMIT "v0.2.19")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0 libs) SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0)
ENDIF()
ELSE() ELSE()
SET(OPENBLAS_COMMIT "v0.2.19") SET(OPENBLAS_COMMIT "v0.2.19")
SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 libs NUM_THREADS=64) SET(OPTIONAL_ARGS "")
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$")
SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64)
ENDIF()
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
......
...@@ -182,7 +182,7 @@ function(go_library TARGET_NAME) ...@@ -182,7 +182,7 @@ function(go_library TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
${go_library_SRCS} ${go_library_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME}_lib ALL DEPENDS ${TARGET_NAME}_timestamp ${go_library_DEPS}) add_custom_target(${TARGET_NAME}_lib ALL DEPENDS ${TARGET_NAME}_timestamp ${go_library_DEPS})
add_library(${TARGET_NAME} STATIC IMPORTED) add_library(${TARGET_NAME} STATIC IMPORTED)
set_property(TARGET ${TARGET_NAME} PROPERTY set_property(TARGET ${TARGET_NAME} PROPERTY
...@@ -199,7 +199,7 @@ function(go_binary TARGET_NAME) ...@@ -199,7 +199,7 @@ function(go_binary TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_library_SRCS} ${go_library_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS}) add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)
endfunction(go_binary) endfunction(go_binary)
...@@ -213,7 +213,7 @@ function(go_test TARGET_NAME) ...@@ -213,7 +213,7 @@ function(go_test TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test
-c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_test_SRCS} ${go_test_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS}) add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})
add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}) add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME})
endfunction(go_test) endfunction(go_test)
......
data/cifar-10-batches-py
data/cifar-out
cifar_vgg_model/*
plot.png
train.log
image_provider_copy_1.py
*pyc
train.list
test.list
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['resnet_cifar10']
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
active_type=paddle.activation.Relu(),
ch_in=None):
tmp = paddle.layer.img_conv(
input=input,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=False)
return paddle.layer.batch_norm(input=tmp, act=active_type)
def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride, 0,
paddle.activation.Linear())
else:
return ipt
def basicblock(ipt, ch_out, stride):
ch_in = ch_out * 2
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
short = shortcut(ipt, ch_in, ch_out, stride)
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(ipt, features, stride)
for i in range(1, count):
tmp = block_func(tmp, features, 1)
return tmp
def resnet_cifar10(ipt, depth=32):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
nStages = {16, 64, 128}
conv1 = conv_bn_layer(
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, n, 1)
res2 = layer_warp(basicblock, res1, 32, n, 2)
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
return pool
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
def main():
datadim = 3 * 32 * 32
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data(
name="label", type=paddle.data_type.integer_value(classdim))
cost = paddle.layer.classification_cost(input=out, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
momentum_optimizer = paddle.optimizer.Momentum(
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
batch_size=128)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
num_passes=5,
event_handler=event_handler,
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['vgg_bn_drop']
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return paddle.networks.img_conv_group(
input=ipt,
num_channels=num_channels,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act=paddle.activation.Relu(),
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=paddle.pooling.Max())
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
bn = paddle.layer.batch_norm(
input=fc1,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
return fc2
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz
rm -rf cifar-out/*
echo Converting CIFAR data to images.....
python process_cifar.py ./cifar-10-batches-py ./cifar-out
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import sys
import os
import PIL.Image as Image
"""
Usage: python process_cifar input_dir output_dir
"""
def mkdir_not_exist(path):
"""
Make dir if the path does not exist.
path: the path to be created.
"""
if not os.path.exists(path):
os.mkdir(path)
def create_dir_structure(output_dir):
"""
Create the directory structure for the directory.
output_dir: the direcotry structure path.
"""
mkdir_not_exist(os.path.join(output_dir))
mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test"))
def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
"""
Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted.
label_set: the set of labels.
output_dir: the output path.
data_split: whether it is training or testing data.
"""
data = np.load(batch_path)
for data, label, filename in zip(data['data'], data['labels'],
data['filenames']):
data = data.reshape((3, 32, 32))
data = np.transpose(data, (1, 2, 0))
label = label_map[label]
output_dir_this = os.path.join(output_dir, data_split, str(label))
output_filename = os.path.join(output_dir_this, filename)
if not label in label_set:
label_set[label] = True
mkdir_not_exist(output_dir_this)
Image.fromarray(data).save(output_filename)
if __name__ == '__main__':
input_dir = sys.argv[1]
output_dir = sys.argv[2]
num_batch = 5
create_dir_structure(output_dir)
label_map = {
0: "airplane",
1: "automobile",
2: "bird",
3: "cat",
4: "deer",
5: "dog",
6: "frog",
7: "horse",
8: "ship",
9: "truck"
}
labels = {}
for i in range(1, num_batch + 1):
convert_batch(
os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
output_dir, "train")
convert_batch(
os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
"test")
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import random
import paddle.utils.image_util as image_util
from paddle.trainer.PyDataProvider2 import *
#
# {'img_size': 32,
# 'settings': a global object,
# 'color': True,
# 'mean_img_size': 32,
# 'meta': './data/cifar-out/batches/batches.meta',
# 'num_classes': 10,
# 'file_list': ('./data/cifar-out/batches/train_batch_000',),
# 'use_jpeg': True}
def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
is_train, **kwargs):
settings.mean_img_size = mean_img_size
settings.img_size = img_size
settings.num_classes = num_classes
settings.color = color
settings.is_train = is_train
if settings.color:
settings.img_raw_size = settings.img_size * settings.img_size * 3
else:
settings.img_raw_size = settings.img_size * settings.img_size
settings.meta_path = meta
settings.use_jpeg = use_jpeg
settings.img_mean = image_util.load_meta(settings.meta_path,
settings.mean_img_size,
settings.img_size, settings.color)
settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = {
'image': dense_vector(settings.img_raw_size),
'label': integer_value(settings.num_classes)
}
settings.logger.info('DataProvider Initialization finished')
@provider(init_hook=hook, min_pool_size=0)
def processData(settings, file_list):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
file_list: the batch file list.
"""
with open(file_list, 'r') as fdata:
lines = [line.strip() for line in fdata]
random.shuffle(lines)
for file_name in lines:
with io.open(file_name.strip(), 'rb') as file:
data = cPickle.load(file)
indexes = list(range(len(data['images'])))
if settings.is_train:
random.shuffle(indexes)
for i in indexes:
if settings.use_jpeg == 1:
img = image_util.decode_jpeg(data['images'][i])
else:
img = data['images'][i]
img_feat = image_util.preprocess_img(
img, settings.img_mean, settings.img_size,
settings.is_train, settings.color)
label = data['labels'][i]
yield {
'image': img_feat.astype('float32'),
'label': int(label)
}
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from PIL import Image
from cStringIO import StringIO
def resize_image(img, target_size):
"""
Resize an image so that the shorter edge has length target_size.
img: the input image to be resized.
target_size: the target resized image size.
"""
percent = (target_size / float(min(img.size[0], img.size[1])))
resized_size = int(round(img.size[0] * percent)), int(
round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS)
return img
def flip(im):
"""
Return the flipped image.
Flip an image along the horizontal direction.
im: input image, (H x W x K) ndarrays
"""
if len(im.shape) == 3:
return im[:, :, ::-1]
else:
return im[:, ::-1]
def crop_img(im, inner_size, color=True, test=True):
"""
Return cropped image.
The size of the cropped image is inner_size * inner_size.
im: (K x H x W) ndarrays
inner_size: the cropped image size.
color: whether it is color image.
test: whether in test mode.
If False, does random cropping and flipping.
If True, crop the center of images.
"""
if color:
height, width = max(inner_size, im.shape[1]), max(inner_size,
im.shape[2])
padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2]
padded_im[:, startY:endY, startX:endX] = im
else:
im = im.astype('float32')
height, width = max(inner_size, im.shape[0]), max(inner_size,
im.shape[1])
padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1]
padded_im[startY:endY, startX:endX] = im
if test:
startY = (height - inner_size) / 2
startX = (width - inner_size) / 2
else:
startY = np.random.randint(0, height - inner_size + 1)
startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size
if color:
pic = padded_im[:, startY:endY, startX:endX]
else:
pic = padded_im[startY:endY, startX:endX]
if (not test) and (np.random.randint(2) == 0):
pic = flip(pic)
return pic
def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1))
return np_array
def preprocess_img(im, img_mean, crop_size, is_train, color=True):
"""
Does data augmentation for images.
If is_train is false, cropping the center region from the image.
If is_train is true, randomly crop a region from the image,
and randomy does flipping.
im: (K x H x W) ndarrays
"""
im = im.astype('float32')
test = not is_train
pic = crop_img(im, crop_size, color, test)
pic -= img_mean
return pic.flatten()
def load_meta(meta_path, mean_img_size, crop_size, color=True):
"""
Return the loaded meta file.
Load the meta image, which is the mean of the images in the dataset.
The mean image is subtracted from every input image so that the expected mean
of each input image is zero.
"""
mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2
if color:
assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size)
mean = mean[:, border:border + crop_size, border:border +
crop_size].astype('float32')
else:
assert (mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size)
mean = mean[border:border + crop_size, border:border +
crop_size].astype('float32')
return mean
def load_image(img_path, is_color=True):
"""
Load image and return.
img_path: image path.
is_color: is color image or not.
"""
img = Image.open(img_path)
img.load()
return img
def oversample(img, crop_dims):
"""
image : iterable of (H x W x K) ndarrays
crop_dims: (height, width) tuple for the crops.
Returned data contains ten crops of input image, namely,
four corner patches and the center patch as well as their
horizontal reflections.
"""
# Dimensions and center.
im_shape = np.array(img[0].shape)
crop_dims = np.array(crop_dims)
im_center = im_shape[:2] / 2.0
# Make crop coordinates
h_indices = (0, im_shape[0] - crop_dims[0])
w_indices = (0, im_shape[1] - crop_dims[1])
crops_ix = np.empty((5, 4), dtype=int)
curr = 0
for i in h_indices:
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
[-crop_dims / 2.0, crop_dims / 2.0])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
crops = np.empty(
(10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
dtype=np.float32)
ix = 0
for im in img:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors
return crops
class ImageTransformer:
def __init__(self,
transpose=None,
channel_swap=None,
mean=None,
is_color=True):
self.transpose = transpose
self.channel_swap = None
self.mean = None
self.is_color = is_color
def set_transpose(self, order):
if self.is_color:
assert 3 == len(order)
self.transpose = order
def set_channel_swap(self, order):
if self.is_color:
assert 3 == len(order)
self.channel_swap = order
def set_mean(self, mean):
# mean value, may be one value per channel
if mean.ndim == 1:
mean = mean[:, np.newaxis, np.newaxis]
else:
# elementwise mean
if self.is_color:
assert len(mean.shape) == 3
self.mean = mean
def transformer(self, data):
if self.transpose is not None:
data = data.transpose(self.transpose)
if self.channel_swap is not None:
data = data[self.channel_swap, :, :]
if self.mean is not None:
data -= self.mean
return data
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
model=cifar_vgg_model/pass-00299/
image=data/cifar-out/test/airplane/seaplane_s_000978.png
use_gpu=1
python prediction.py $model $image $use_gpu
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import numpy as np
import logging
from PIL import Image
from optparse import OptionParser
import paddle.utils.image_util as image_util
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
class ImageClassifier():
def __init__(self,
train_conf,
use_gpu=True,
model_dir=None,
resize_dim=None,
crop_dim=None,
mean_file=None,
oversample=False,
is_color=True):
"""
train_conf: network configure.
model_dir: string, directory of model.
resize_dim: int, resized image size.
crop_dim: int, crop size.
mean_file: string, image mean file.
oversample: bool, oversample means multiple crops, namely five
patches (the four corner patches and the center
patch) as well as their horizontal reflections,
ten crops in all.
"""
self.train_conf = train_conf
self.model_dir = model_dir
if model_dir is None:
self.model_dir = os.path.dirname(train_conf)
self.resize_dim = resize_dim
self.crop_dims = [crop_dim, crop_dim]
self.oversample = oversample
self.is_color = is_color
self.transformer = image_util.ImageTransformer(is_color=is_color)
self.transformer.set_transpose((2, 0, 1))
self.mean_file = mean_file
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
self.transformer.set_mean(mean) # mean pixel
gpu = 1 if use_gpu else 0
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
slots = [dense_vector(data_size)]
self.converter = DataProviderConverter(slots)
def get_data(self, img_path):
"""
1. load image from img_path.
2. resize or oversampling.
3. transformer data: transpose, sub mean.
return K x H x W ndarray.
img_path: image path.
"""
image = image_util.load_image(img_path, self.is_color)
if self.oversample:
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
input = np.zeros(
(1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
input = np.zeros(
(1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
for img in input:
img = self.transformer.transformer(img).flatten()
data_in.append([img.tolist()])
return data_in
def forward(self, input_data):
in_arg = self.converter(input_data)
return self.network.forwardTest(in_arg)
def forward(self, data, output_layer):
"""
input_data: py_paddle input data.
output_layer: specify the name of probability, namely the layer with
softmax activation.
return: the predicting probability of each label.
"""
input = self.converter(data)
self.network.forwardTest(input)
output = self.network.getLayerOutputs(output_layer)
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
return output[output_layer]['value'].mean(0)
def predict(self, image=None, output_layer=None):
assert isinstance(image, basestring)
assert isinstance(output_layer, basestring)
data = self.get_data(image)
prob = self.forward(data, output_layer)
lab = np.argsort(-prob)
logging.info("Label of %s is: %d", image, lab[0])
if __name__ == '__main__':
image_size = 32
crop_size = 32
multi_crop = True
config = "vgg_16_cifar.py"
output_layer = "__fc_layer_1__"
mean_path = "data/cifar-out/batches/batches.meta"
model_path = sys.argv[1]
image = sys.argv[2]
use_gpu = bool(int(sys.argv[3]))
obj = ImageClassifier(
train_conf=config,
model_dir=model_path,
resize_dim=image_size,
crop_dim=crop_size,
mean_file=mean_path,
use_gpu=use_gpu,
oversample=multi_crop)
obj.predict(image, output_layer)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.utils.preprocess_img import ImageClassificationDatasetCreater
from optparse import OptionParser
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
parser.add_option(
"-i",
"--input",
action="store",
dest="input",
help="Input data directory.")
parser.add_option(
"-s",
"--size",
action="store",
dest="size",
help="Processed image size.")
parser.add_option(
"-c",
"--color",
action="store",
dest="color",
help="whether to use color images.")
return parser.parse_args()
if __name__ == '__main__':
options, args = option_parser()
data_dir = options.input
processed_image_size = int(options.size)
color = options.color == "1"
data_creator = ImageClassificationDatasetCreater(
data_dir, processed_image_size, color)
data_creator.train_list_name = "train.txt"
data_creator.test_list_name = "test.txt"
data_creator.num_per_batch = 1000
data_creator.overwrite = True
data_creator.create_batches()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
echo "data/cifar-out/batches/train.txt" > train.list
echo "data/cifar-out/batches/test.txt" > test.list
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
config=vgg_16_cifar.py
output=./cifar_vgg_model
log=train.log
paddle train \
--config=$config \
--dot_period=10 \
--log_period=100 \
--test_all_data_in_one_period=1 \
--use_gpu=1 \
--trainer_count=1 \
--num_passes=300 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
data_dir = 'data/cifar-out/batches/'
meta_path = data_dir + 'batches.meta'
args = {
'meta': meta_path,
'mean_img_size': 32,
'img_size': 32,
'num_classes': 10,
'use_jpeg': 1,
'color': "color"
}
define_py_data_sources2(
train_list="train.list",
test_list="train.list",
module='image_provider',
obj='processData',
args=args)
######################Algorithm Configuration #############
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
data_size = 3 * 32 * 32
label_size = 10
img = data_layer(name='image', size=data_size)
# small_vgg is predefined in trainer_config_helpers.networks
predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
outputs(classification_cost(input=predict, label=lbl))
else:
outputs(predict)
dataprovider.pyc
empty.list
train.log
output
train.list
This folder contains scripts used in PaddlePaddle introduction.
- use `bash train.sh` to train a simple linear regression model
- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import random
# define data types of input: 2 real numbers
@provider(
input_types={'x': dense_vector(1),
'y': dense_vector(1)}, use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
yield {'x': [x], 'y': [2 * x + 0.3]}
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=trainer_config.py \
--save_dir=./output \
--num_passes=30 \
2>&1 |tee 'train.log'
paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
# 1. read data. Suppose you saved above python code as dataprovider.py
define_py_data_sources2(
train_list=['no_matter.txt'],
test_list=None,
module='dataprovider',
obj='process',
args={})
# 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# 3. Network configuration
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
y_predict = fc_layer(
input=x,
param_attr=ParamAttr(name='w'),
size=1,
act=LinearActivation(),
bias_attr=ParamAttr(name='b'))
cost = mse_cost(input=y_predict, label=y)
outputs(cost)
log.txt
data/meta.bin
data/ml-1m
data/ratings.dat.train
data/ratings.dat.test
data/train.list
data/test.list
dataprovider_copy_1.py
*.pyc
output
import paddle.v2 as paddle
import cPickle
import copy
def main():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_user_id() + 1))
usr_emb = paddle.layer.embedding(input=uid, size=32)
usr_gender_id = paddle.layer.data(
name='gender_id', type=paddle.data_type.integer_value(2))
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
usr_age_id = paddle.layer.data(
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
mov_id = paddle.layer.data(
name='movie_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.mse_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=256),
event_handler=event_handler,
feeding=feeding,
num_passes=1)
user_id = 234
movie_id = 345
user = paddle.dataset.movielens.user_info()[user_id]
movie = paddle.dataset.movielens.movie_info()[movie_id]
feature = user.value() + movie.value()
def reader():
yield feature
infer_dict = copy.copy(feeding)
del infer_dict['score']
prediction = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.batch(
reader, batch_size=32),
feeding=infer_dict)
print(prediction + 5) / 2
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
def meta_to_header(meta, name):
metas = meta[name]['__meta__']['raw_meta']
for each_meta in metas:
slot_name = each_meta.get('name', '%s_id' % name)
if each_meta['type'] == 'id':
yield slot_name, integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
yield slot_name, integer_value(
len(each_meta['dict']),
seq_type=SequenceType.SEQUENCE
if is_seq else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense':
yield slot_name, dense_vector(len(each_meta['dict']))
{
"user": {
"file": {
"name": "users.dat",
"delimiter": "::"
},
"fields": ["id", "gender", "age", "occupation"]
},
"movie": {
"file": {
"name": "movies.dat",
"delimiter": "::"
},
"fields": ["id", "title", "genres"]
}
}
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
config_generator.py
Usage:
./config_generator.py <config_file> [--output_format=<output_format>]
./config_generator.py -h | --help
Options:
-h --help Show this screen.
--output_format=<output_format> Output Config format(json or yaml) [default: json].
"""
import json
import docopt
import copy
DEFAULT_FILE = {"type": "split", "delimiter": ","}
DEFAULT_FIELD = {
"id": {
"type": "id"
},
"gender": {
"name": "gender",
"type": "embedding",
"dict": {
"type": "char_based"
}
},
"age": {
"name": "age",
"type": "embedding",
"dict": {
"type": "whole_content",
"sort": True
}
},
"occupation": {
"name": "occupation",
"type": "embedding",
"dict": {
"type": "whole_content",
"sort": "true"
}
},
"title": {
"regex": {
"pattern": r"^(.*)\((\d+)\)$",
"group_id": 1,
"strip": True
},
"name": "title",
"type": {
"name": "embedding",
"seq_type": "sequence",
},
"dict": {
"type": "char_based"
}
},
"genres": {
"type": "one_hot_dense",
"dict": {
"type": "split",
"delimiter": "|"
},
"name": "genres"
}
}
def merge_dict(master_dict, slave_dict):
return dict(((k, master_dict.get(k) or slave_dict.get(k))
for k in set(slave_dict) | set(master_dict)))
def main(filename, fmt):
with open(filename, 'r') as f:
conf = json.load(f)
obj = dict()
for k in conf:
val = conf[k]
file_dict = val['file']
file_dict = merge_dict(file_dict, DEFAULT_FILE)
fields = []
for pos, field_key in enumerate(val['fields']):
assert isinstance(field_key, basestring)
field = copy.deepcopy(DEFAULT_FIELD[field_key])
field['pos'] = pos
fields.append(field)
obj[k] = {"file": file_dict, "fields": fields}
meta = {"meta": obj}
# print meta
if fmt == 'json':
def formatter(x):
import json
return json.dumps(x, indent=2)
elif fmt == 'yaml':
def formatter(x):
import yaml
return yaml.safe_dump(x, default_flow_style=False)
else:
raise NotImplementedError("Dump format %s is not implemented" % fmt)
print formatter(meta)
if __name__ == '__main__':
args = docopt.docopt(__doc__, version="0.1.0")
main(args["<config_file>"], args["--output_format"])
{
"meta": {
"movie": {
"fields": [
{
"type": "id",
"pos": 0
},
{
"regex": {
"pattern": "^(.*)\\((\\d+)\\)$",
"group_id": 1,
"strip": true
},
"type": {
"seq_type": "sequence",
"name": "embedding"
},
"dict": {
"type": "char_based"
},
"name": "title",
"pos": 1
},
{
"type": "one_hot_dense",
"dict": {
"delimiter": "|",
"type": "split"
},
"name": "genres",
"pos": 2
}
],
"file": {
"delimiter": "::",
"type": "split",
"name": "movies.dat"
}
},
"user": {
"fields": [
{
"type": "id",
"pos": 0
},
{
"type": "embedding",
"dict": {
"type": "char_based"
},
"name": "gender",
"pos": 1
},
{
"type": "embedding",
"dict": {
"sort": true,
"type": "whole_content"
},
"name": "age",
"pos": 2
},
{
"type": "embedding",
"dict": {
"sort": "true",
"type": "whole_content"
},
"name": "occupation",
"pos": 3
}
],
"file": {
"delimiter": "::",
"type": "split",
"name": "users.dat"
}
}
}
}
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Preprocess Movielens dataset, to get movie/user object.
Usage:
./preprocess.py <dataset_dir> <binary_filename> [--config=<config_file>]
./preprocess.py -h | --help
Options:
-h --help Show this screen.
--version Show version.
--config=<config_file> Get MetaData config file [default: config.json].
"""
import docopt
import os
import sys
import re
import collections
try:
import cPickle as pickle
except ImportError:
import pickle
class UniqueIDGenerator(object):
def __init__(self):
self.pool = collections.defaultdict(self.__next_id__)
self.next_id = 0
def __next_id__(self):
tmp = self.next_id
self.next_id += 1
return tmp
def __call__(self, k):
return self.pool[k]
def to_list(self):
ret_val = [None] * len(self.pool)
for k in self.pool.keys():
ret_val[self.pool[k]] = k
return ret_val
class SortedIDGenerator(object):
def __init__(self):
self.__key_set__ = set()
self.dict = None
def scan(self, key):
self.__key_set__.add(key)
def finish_scan(self, compare=None, key=None, reverse=False):
self.__key_set__ = sorted(
list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
self.dict = dict()
for idx, each_key in enumerate(self.__key_set__):
self.dict[each_key] = idx
def __call__(self, key):
return self.dict[key]
def to_list(self):
return self.__key_set__
class SplitFileReader(object):
def __init__(self, work_dir, config):
assert isinstance(config, dict)
self.filename = config['name']
self.delimiter = config.get('delimiter', ',')
self.work_dir = work_dir
def read(self):
with open(os.path.join(self.work_dir, self.filename), 'r') as f:
for line in f:
line = line.strip()
if isinstance(self.delimiter, unicode):
self.delimiter = str(self.delimiter)
yield line.split(self.delimiter)
@staticmethod
def create(work_dir, config):
assert isinstance(config, dict)
if config['type'] == 'split':
return SplitFileReader(work_dir, config)
class IFileReader(object):
READERS = [SplitFileReader]
def read(self):
raise NotImplementedError()
@staticmethod
def create(work_dir, config):
for reader_cls in IFileReader.READERS:
val = reader_cls.create(work_dir, config)
if val is not None:
return val
class IDFieldParser(object):
TYPE = 'id'
def __init__(self, config):
self.__max_id__ = -sys.maxint - 1
self.__min_id__ = sys.maxint
self.__id_count__ = 0
def scan(self, line):
idx = int(line)
self.__max_id__ = max(self.__max_id__, idx)
self.__min_id__ = min(self.__min_id__, idx)
self.__id_count__ += 1
def parse(self, line):
return int(line)
def meta_field(self):
return {
"is_key": True,
'max': self.__max_id__,
'min': self.__min_id__,
'count': self.__id_count__,
'type': 'id'
}
class SplitEmbeddingDict(object):
def __init__(self, delimiter):
self.__id__ = UniqueIDGenerator()
self.delimiter = delimiter
def scan(self, multi):
for val in multi.split(self.delimiter):
self.__id__(val)
def parse(self, multi):
return map(self.__id__, multi.split(self.delimiter))
def meta_field(self):
return self.__id__.to_list()
class EmbeddingFieldParser(object):
TYPE = 'embedding'
NO_SEQUENCE = "no_sequence"
SEQUENCE = "sequence"
class CharBasedEmbeddingDict(object):
def __init__(self, is_seq=True):
self.__id__ = UniqueIDGenerator()
self.is_seq = is_seq
def scan(self, s):
for ch in s:
self.__id__(ch)
def parse(self, s):
return map(self.__id__, s) if self.is_seq else self.__id__(s[0])
def meta_field(self):
return self.__id__.to_list()
class WholeContentDict(object):
def __init__(self, need_sort=True):
assert need_sort
self.__id__ = SortedIDGenerator()
self.__has_finished__ = False
def scan(self, txt):
self.__id__.scan(txt)
def meta_field(self):
if not self.__has_finished__:
self.__id__.finish_scan()
self.__has_finished__ = True
return self.__id__.to_list()
def parse(self, txt):
return self.__id__(txt)
def __init__(self, config):
try:
self.seq_type = config['type']['seq_type']
except TypeError:
self.seq_type = EmbeddingFieldParser.NO_SEQUENCE
if config['dict']['type'] == 'char_based':
self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
self.seq_type == EmbeddingFieldParser.SEQUENCE)
elif config['dict']['type'] == 'split':
self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
elif config['dict']['type'] == 'whole_content':
self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
'sort'])
else:
print config
assert False
self.name = config['name']
def scan(self, s):
self.dict.scan(s)
def meta_field(self):
return {
'name': self.name,
'dict': self.dict.meta_field(),
'type': 'embedding',
'seq': self.seq_type
}
def parse(self, s):
return self.dict.parse(s)
class OneHotDenseFieldParser(object):
TYPE = 'one_hot_dense'
def __init__(self, config):
if config['dict']['type'] == 'split':
self.dict = SplitEmbeddingDict(config['dict']['delimiter'])
self.name = config['name']
def scan(self, s):
self.dict.scan(s)
def meta_field(self):
# print self.dict.meta_field()
return {
'dict': self.dict.meta_field(),
'name': self.name,
'type': 'one_hot_dense'
}
def parse(self, s):
ids = self.dict.parse(s)
retv = [0.0] * len(self.dict.meta_field())
for idx in ids:
retv[idx] = 1.0
# print retv
return retv
class FieldParserFactory(object):
PARSERS = [IDFieldParser, EmbeddingFieldParser, OneHotDenseFieldParser]
@staticmethod
def create(config):
if isinstance(config['type'], basestring):
config_type = config['type']
elif isinstance(config['type'], dict):
config_type = config['type']['name']
assert config_type is not None
for each_parser_cls in FieldParserFactory.PARSERS:
if config_type == each_parser_cls.TYPE:
return each_parser_cls(config)
print config
class CompositeFieldParser(object):
def __init__(self, parser, extractor):
self.extractor = extractor
self.parser = parser
def scan(self, *args, **kwargs):
self.parser.scan(self.extractor.extract(*args, **kwargs))
def parse(self, *args, **kwargs):
return self.parser.parse(self.extractor.extract(*args, **kwargs))
def meta_field(self):
return self.parser.meta_field()
class PositionContentExtractor(object):
def __init__(self, pos):
self.pos = pos
def extract(self, line):
assert isinstance(line, list)
return line[self.pos]
class RegexPositionContentExtractor(PositionContentExtractor):
def __init__(self, pos, pattern, group_id, strip=True):
PositionContentExtractor.__init__(self, pos)
pattern = pattern.strip()
self.pattern = re.compile(pattern)
self.group_id = group_id
self.strip = strip
def extract(self, line):
line = PositionContentExtractor.extract(self, line)
match = self.pattern.match(line)
# print line, self.pattern.pattern, match
assert match is not None
txt = match.group(self.group_id)
if self.strip:
txt.strip()
return txt
class ContentExtractorFactory(object):
def extract(self, line):
pass
@staticmethod
def create(config):
if 'pos' in config:
if 'regex' not in config:
return PositionContentExtractor(config['pos'])
else:
extra_args = config['regex']
return RegexPositionContentExtractor(
pos=config['pos'], **extra_args)
class MetaFile(object):
def __init__(self, work_dir):
self.work_dir = work_dir
self.obj = dict()
def parse(self, config):
config = config['meta']
ret_obj = dict()
for key in config.keys():
val = config[key]
assert 'file' in val
reader = IFileReader.create(self.work_dir, val['file'])
assert reader is not None
assert 'fields' in val and isinstance(val['fields'], list)
fields_config = val['fields']
field_parsers = map(MetaFile.__field_config_mapper__, fields_config)
for each_parser in field_parsers:
assert each_parser is not None
for each_block in reader.read():
for each_parser in field_parsers:
each_parser.scan(each_block)
metas = map(lambda x: x.meta_field(), field_parsers)
# print metas
key_index = filter(
lambda x: x is not None,
map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
enumerate(metas)))[0]
key_map = []
for i in range(min(key_index, len(metas))):
key_map.append(i)
for i in range(key_index + 1, len(metas)):
key_map.append(i)
obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
for each_block in reader.read():
idx = field_parsers[key_index].parse(each_block)
val = []
for i, each_parser in enumerate(field_parsers):
if i != key_index:
val.append(each_parser.parse(each_block))
obj[idx] = val
ret_obj[key] = obj
self.obj = ret_obj
return ret_obj
@staticmethod
def __field_config_mapper__(conf):
assert isinstance(conf, dict)
extrator = ContentExtractorFactory.create(conf)
field_parser = FieldParserFactory.create(conf)
assert extrator is not None
assert field_parser is not None
return CompositeFieldParser(field_parser, extrator)
def dump(self, fp):
pickle.dump(self.obj, fp, pickle.HIGHEST_PROTOCOL)
def preprocess(binary_filename, dataset_dir, config, **kwargs):
assert isinstance(config, str)
with open(config, 'r') as config_file:
file_loader = None
if config.lower().endswith('.yaml'):
import yaml
file_loader = yaml
elif config.lower().endswith('.json'):
import json
file_loader = json
config = file_loader.load(config_file)
meta = MetaFile(dataset_dir)
meta.parse(config)
with open(binary_filename, 'wb') as outf:
meta.dump(outf)
if __name__ == '__main__':
args = docopt.docopt(__doc__, version='0.1.0')
kwargs = dict()
for key in args.keys():
if key != '--help':
param_name = key
assert isinstance(param_name, str)
param_name = param_name.replace('<', '')
param_name = param_name.replace('>', '')
param_name = param_name.replace('--', '')
kwargs[param_name] = args[key]
preprocess(**kwargs)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
cd "$(dirname "$0")"
# download the dataset
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
# unzip the dataset
unzip ml-1m.zip
# remove the unused zip file
rm ml-1m.zip
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Separate movielens 1m dataset to train/test file.
Usage:
./separate.py <input_file> [--test_ratio=<test_ratio>] [--delimiter=<delimiter>]
./separate.py -h | --help
Options:
-h --help Show this screen.
--version Show version.
--test_ratio=<test_ratio> Test ratio for separate [default: 0.1].
--delimiter=<delimiter> File delimiter [default: ,].
"""
import docopt
import collections
import random
def process(test_ratio, input_file, delimiter, **kwargs):
test_ratio = float(test_ratio)
rating_dict = collections.defaultdict(list)
with open(input_file, 'r') as f:
for line in f:
user_id = int(line.split(delimiter)[0])
rating_dict[user_id].append(line.strip())
with open(input_file + ".train", 'w') as train_file:
with open(input_file + ".test", 'w') as test_file:
for k in rating_dict.keys():
lines = rating_dict[k]
assert isinstance(lines, list)
random.shuffle(lines)
test_len = int(len(lines) * test_ratio)
for line in lines[:test_len]:
print >> test_file, line
for line in lines[test_len:]:
print >> train_file, line
if __name__ == '__main__':
args = docopt.docopt(__doc__, version='0.1.0')
kwargs = dict()
for key in args.keys():
if key != '--help':
param_name = key
assert isinstance(param_name, str)
param_name = param_name.replace('<', '')
param_name = param_name.replace('>', '')
param_name = param_name.replace('--', '')
kwargs[param_name] = args[key]
process(**kwargs)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
def __list_to_map__(lst):
ret_val = dict()
for each in lst:
k, v = each
ret_val[k] = v
return ret_val
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
data meta.
:param obj: global object. It will passed to process routine.
:type obj: object
:param meta: the meta file object, which passed from trainer_config. Meta
file record movie/user features.
:param kwargs: unused other arguments.
"""
del kwargs # unused kwargs
# Header define slots that used for paddle.
# first part is movie features.
# second part is user features.
# final part is rating score.
# header is a list of [USE_SEQ_OR_NOT?, SlotType]
movie_headers = list(common_utils.meta_to_header(meta, 'movie'))
settings.movie_names = [h[0] for h in movie_headers]
headers = movie_headers
user_headers = list(common_utils.meta_to_header(meta, 'user'))
settings.user_names = [h[0] for h in user_headers]
headers.extend(user_headers)
headers.append(("rating", dense_vector(1))) # Score
# slot types.
settings.input_types = __list_to_map__(headers)
settings.meta = meta
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
with open(filename, 'r') as f:
for line in f:
# Get a rating from file.
user_id, movie_id, score = map(int, line.split('::')[:-1])
# Scale score to [-5, +5]
score = float(score) * 2 - 5.0
# Get movie/user features by movie_id, user_id
movie_meta = settings.meta['movie'][movie_id]
user_meta = settings.meta['user'][user_id]
outputs = [('movie_id', movie_id - 1)]
# Then add movie features
for i, each_meta in enumerate(movie_meta):
outputs.append((settings.movie_names[i + 1], each_meta))
# Then add user id.
outputs.append(('user_id', user_id - 1))
# Then add user features.
for i, each_meta in enumerate(user_meta):
outputs.append((settings.user_names[i + 1], each_meta))
# Finally, add score
outputs.append(('rating', [score]))
# Return data to paddle
yield __list_to_map__(outputs)
#!/usr/bin/python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import re
import math
def get_best_pass(log_filename):
with open(log_filename, 'r') as f:
text = f.read()
pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
re.S)
results = re.findall(pattern, text)
sorted_results = sorted(results, key=lambda result: float(result[0]))
return sorted_results[0]
log_filename = sys.argv[1]
log = get_best_pass(log_filename)
predict_error = math.sqrt(float(log[0])) / 2
print 'Best pass is %s, error is %s, which means predict get error as %f' % (
log[1], log[0], predict_error)
evaluate_pass = "output/pass-%s" % log[1]
print "evaluating from pass %s" % evaluate_pass
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | sort | head -n 1
}
LOG=`get_best_pass log.txt`
LOG=(${LOG})
echo 'Best pass is '${LOG[1]}, ' error is '${LOG[0]}, 'which means predict get error as '`echo ${LOG[0]} | python -c 'import math; print math.sqrt(float(raw_input()))/2'`
evaluate_pass="output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from py_paddle import swig_paddle, DataProviderConverter
from common_utils import *
from paddle.trainer.config_parser import parse_config
try:
import cPickle as pickle
except ImportError:
import pickle
import sys
if __name__ == '__main__':
model_path = sys.argv[1]
swig_paddle.initPaddle('--use_gpu=0')
conf = parse_config("trainer_config.py", "is_predict=1")
network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine)
network.loadParameters(model_path)
with open('./data/meta.bin', 'rb') as f:
meta = pickle.load(f)
headers = [h[1] for h in meta_to_header(meta, 'movie')]
headers.extend([h[1] for h in meta_to_header(meta, 'user')])
cvt = DataProviderConverter(headers)
while True:
movie_id = int(raw_input("Input movie_id: "))
user_id = int(raw_input("Input user_id: "))
movie_meta = meta['movie'][movie_id] # Query Data From Meta.
user_meta = meta['user'][user_id]
data = [movie_id - 1]
data.extend(movie_meta)
data.append(user_id - 1)
data.extend(user_meta)
print "Prediction Score is %.2f" % (
(network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
/ 2)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
UNAME_STR=`uname`
if [[ ${UNAME_STR} == 'Linux' ]]; then
SHUF_PROG='shuf'
else
SHUF_PROG='gshuf'
fi
cd "$(dirname "$0")"
delimiter='::'
dir=ml-1m
cd data
echo 'generate meta config file'
python config_generator.py config.json > meta_config.json
echo 'generate meta file'
python meta_generator.py $dir meta.bin --config=meta_config.json
echo 'split train/test file'
python split.py $dir/ratings.dat --delimiter=${delimiter} --test_ratio=0.1
echo 'shuffle train file'
${SHUF_PROG} $dir/ratings.dat.train > ratings.dat.train
cp $dir/ratings.dat.test .
echo "./data/ratings.dat.train" > train.list
echo "./data/ratings.dat.test" > test.list
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=trainer_config.py \
--save_dir=./output \
--use_gpu=false \
--trainer_count=4\
--test_all_data_in_one_period=true \
--log_period=100 \
--dot_period=1 \
--num_passes=50 2>&1 | tee 'log.txt'
paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
try:
import cPickle as pickle
except ImportError:
import pickle
is_predict = get_config_arg('is_predict', bool, False)
META_FILE = 'data/meta.bin'
with open(META_FILE, 'rb') as f:
# load meta file
meta = pickle.load(f)
settings(
batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
def construct_feature(name):
"""
Construct movie/user features.
This method read from meta data. Then convert feature to neural network due
to feature type. The map relation as follow.
* id: embedding => fc
* embedding:
is_sequence: embedding => context_projection => fc => pool
not sequence: embedding => fc
* one_hot_dense: fc => fc
Then gather all features vector, and use a fc layer to combined them as
return.
:param name: 'movie' or 'user'
:type name: basestring
:return: combined feature output
:rtype: LayerOutput
"""
__meta__ = meta[name]['__meta__']['raw_meta']
fusion = []
for each_meta in __meta__:
type_name = each_meta['type']
slot_name = each_meta.get('name', '%s_id' % name)
if type_name == 'id':
slot_dim = each_meta['max']
embedding = embedding_layer(
input=data_layer(
slot_name, size=slot_dim), size=256)
fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
slot_dim = len(each_meta['dict'])
din = data_layer(slot_name, slot_dim)
embedding = embedding_layer(input=din, size=256)
if is_seq:
fusion.append(
text_conv_pool(
input=embedding, context_len=5, hidden_size=256))
else:
fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'one_hot_dense':
slot_dim = len(each_meta['dict'])
hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
fusion.append(fc_layer(input=hidden, size=256))
return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
movie_feature = construct_feature("movie")
user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict:
outputs(mse_cost(input=similarity, label=data_layer('rating', size=1)))
define_py_data_sources2(
'data/train.list',
'data/test.list',
module='dataprovider',
obj='process',
args={'meta': meta})
else:
outputs(similarity)
*.pyc
train.log
data/feature
data/conll05st-release/
data/src.dict
data/test.wsj.props
data/test.wsj.seq_pair
data/test.wsj.words
data/tgt.dict
output
data/emb
data/targetDict.txt
data/verbDict.txt
data/wordDict.txt
import math
import numpy as np
import gzip
import logging
import paddle.v2.dataset.conll05 as conll05
import paddle.v2.evaluator as evaluator
import paddle.v2 as paddle
logger = logging.getLogger('paddle')
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
default_std = 1 / math.sqrt(hidden_dim) / 3.0
mix_hidden_lr = 1e-3
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
def db_lstm():
#8 features
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
return feature_out
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def train():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
feature_out = db_lstm()
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
# create parameters
parameters = paddle.parameters.create(crf_cost)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer,
extra_layers=crf_dec)
reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
logger.info("Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics))
if event.batch_id and event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
logger.info("\nTest with Pass %d, Batch %d, %s" %
(event.pass_id, event.batch_id, result.metrics))
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
logger.info("\nTest with Pass %d, %s" %
(event.pass_id, result.metrics))
trainer.train(
reader=reader,
event_handler=event_handler,
num_passes=10,
feeding=feeding)
def infer_a_batch(inferer, test_data, word_dict, pred_dict, label_dict):
probs = inferer.infer(input=test_data, field='id')
assert len(probs) == sum(len(x[0]) for x in test_data)
for idx, test_sample in enumerate(test_data):
start_id = 0
pred_str = "%s\t" % (pred_dict[test_sample[6][0]])
for w, tag in zip(test_sample[0],
probs[start_id:start_id + len(test_sample[0])]):
pred_str += "%s[%s] " % (word_dict[w], label_dict[tag])
print(pred_str.strip())
start_id += len(test_sample[0])
def infer():
label_dict_reverse = dict((value, key)
for key, value in label_dict.iteritems())
word_dict_reverse = dict((value, key)
for key, value in word_dict.iteritems())
pred_dict_reverse = dict((value, key)
for key, value in verb_dict.iteritems())
test_creator = paddle.dataset.conll05.test()
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
feature_out = db_lstm()
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
test_pass = 0
with gzip.open('params_pass_%d.tar.gz' % (test_pass)) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
inferer = paddle.inference.Inference(
output_layer=predict, parameters=parameters)
# prepare test data
test_data = []
test_batch_size = 50
for idx, item in enumerate(test_creator()):
test_data.append(item[0:8])
if idx and (not idx % test_batch_size):
infer_a_batch(
inferer,
test_data,
word_dict_reverse,
pred_dict_reverse,
label_dict_reverse, )
test_data = []
infer_a_batch(
inferer,
test_data,
word_dict_reverse,
pred_dict_reverse,
label_dict_reverse, )
test_data = []
def main(is_inferring=False):
if is_inferring:
infer()
else:
train()
if __name__ == '__main__':
main(is_inferring=False)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
from optparse import OptionParser
def extract_dict_features(pair_file, feature_file):
with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
for line in fin:
sentence, predicate, labels = line.strip().split('\t')
sentence_list = sentence.split()
labels_list = labels.split()
verb_index = labels_list.index('B-V')
mark = [0] * len(labels_list)
if verb_index > 0:
mark[verb_index - 1] = 1
ctx_n1 = sentence_list[verb_index - 1]
else:
ctx_n1 = 'bos'
if verb_index > 1:
mark[verb_index - 2] = 1
ctx_n2 = sentence_list[verb_index - 2]
else:
ctx_n2 = 'bos'
mark[verb_index] = 1
ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 1:
mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels_list) - 2:
mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2]
else:
ctx_p2 = 'eos'
feature_str = sentence + '\t' \
+ predicate + '\t' \
+ ctx_n2 + '\t' \
+ ctx_n1 + '\t' \
+ ctx_0 + '\t' \
+ ctx_p1 + '\t' \
+ ctx_p2 + '\t' \
+ ' '.join([str(i) for i in mark]) + '\t' \
+ labels
feature_out.write(feature_str + '\n')
if __name__ == '__main__':
usage = '-p pair_file -f feature_file'
parser = OptionParser(usage)
parser.add_option('-p', dest='pair_file', help='the pair file')
parser.add_option('-f', dest='feature_file', help='the feature file')
(options, args) = parser.parse_args()
extract_dict_features(options.pair_file, options.feature_file)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
from optparse import OptionParser
def read_labels(props_file):
'''
a sentence maybe has more than one verb, each verb has its label sequence
label[], is a 3-dimension list.
the first dim is to store all sentence's label seqs, len is the sentence number
the second dim is to store all label sequences for one sentences
the third dim is to store each label for one word
'''
labels = []
with open(props_file) as fin:
label_seqs_for_one_sentences = []
one_seg_in_file = []
for line in fin:
line = line.strip()
if line == '':
for i in xrange(len(one_seg_in_file[0])):
a_kind_lable = [x[i] for x in one_seg_in_file]
label_seqs_for_one_sentences.append(a_kind_lable)
labels.append(label_seqs_for_one_sentences)
one_seg_in_file = []
label_seqs_for_one_sentences = []
else:
part = line.split()
one_seg_in_file.append(part)
return labels
def read_sentences(words_file):
sentences = []
with open(words_file) as fin:
s = ''
for line in fin:
line = line.strip()
if line == '':
sentences.append(s)
s = ''
else:
s += line + ' '
return sentences
def transform_labels(sentences, labels):
sen_lab_pair = []
for i in xrange(len(sentences)):
if len(labels[i]) == 1:
continue
else:
verb_list = []
for x in labels[i][0]:
if x != '-':
verb_list.append(x)
for j in xrange(1, len(labels[i])):
label_list = labels[i][j]
current_tag = 'O'
is_in_bracket = False
label_seq = []
verb_word = ''
for ll in label_list:
if ll == '*' and is_in_bracket == False:
label_seq.append('O')
elif ll == '*' and is_in_bracket == True:
label_seq.append('I-' + current_tag)
elif ll == '*)':
label_seq.append('I-' + current_tag)
is_in_bracket = False
elif ll.find('(') != -1 and ll.find(')') != -1:
current_tag = ll[1:ll.find('*')]
label_seq.append('B-' + current_tag)
is_in_bracket = False
elif ll.find('(') != -1 and ll.find(')') == -1:
current_tag = ll[1:ll.find('*')]
label_seq.append('B-' + current_tag)
is_in_bracket = True
else:
print 'error:', ll
sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq))
return sen_lab_pair
def write_file(sen_lab_pair, output_file):
with open(output_file, 'w') as fout:
for x in sen_lab_pair:
sentence = x[0]
label_seq = ' '.join(x[2])
assert len(sentence.split()) == len(x[2])
fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n')
if __name__ == '__main__':
usage = '-w words_file -p props_file -o output_file'
parser = OptionParser(usage)
parser.add_option('-w', dest='words_file', help='the words file')
parser.add_option('-p', dest='props_file', help='the props file')
parser.add_option('-o', dest='output_file', help='the output_file')
(options, args) = parser.parse_args()
sentences = read_sentences(options.words_file)
labels = read_labels(options.props_file)
sen_lab_pair = transform_labels(sentences, labels)
write_file(sen_lab_pair, options.output_file)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb
tar -xzvf conll05st-tests.tar.gz
rm conll05st-tests.tar.gz
cp ./conll05st-release/test.wsj/words/test.wsj.words.gz .
cp ./conll05st-release/test.wsj/props/test.wsj.props.gz .
gunzip test.wsj.words.gz
gunzip test.wsj.props.gz
python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
python extract_dict_feature.py -p test.wsj.seq_pair -f feature
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
settings.predicate_dict = predicate_dict
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
integer_value_sequence(len(label_dict))
]
def get_batch_size(yeild_data):
return len(yeild_data[0])
@provider(
init_hook=hook,
should_shuffle=True,
calc_batch_size=get_batch_size,
can_over_batch_size=True,
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
label_slot = [settings.label_dict.get(w) for w in label_list]
yield word_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
import sys
from paddle.trainer_config_helpers import *
#file paths
word_dict_file = './data/wordDict.txt'
label_dict_file = './data/targetDict.txt'
predicate_file = './data/verbDict.txt'
train_list_file = './data/train.list'
test_list_file = './data/test.list'
is_test = get_config_arg('is_test', bool, False)
is_predict = get_config_arg('is_predict', bool, False)
if not is_predict:
#load dictionaries
word_dict = dict()
label_dict = dict()
predicate_dict = dict()
with open(word_dict_file, 'r') as f_word, \
open(label_dict_file, 'r') as f_label, \
open(predicate_file, 'r') as f_pre:
for i, line in enumerate(f_word):
w = line.strip()
word_dict[w] = i
for i, line in enumerate(f_label):
w = line.strip()
label_dict[w] = i
for i, line in enumerate(f_pre):
w = line.strip()
predicate_dict[w] = i
if is_test:
train_list_file = None
#define data provider
define_py_data_sources2(
train_list=train_list_file,
test_list=test_list_file,
module='dataprovider',
obj='process',
args={
'word_dict': word_dict,
'label_dict': label_dict,
'predicate_dict': predicate_dict
})
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(predicate_dict)
else:
word_dict_len = get_config_arg('dict_len', int)
label_dict_len = get_config_arg('label_len', int)
pred_len = get_config_arg('pred_len', int)
############################## Hyper-parameters ##################################
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
########################### Optimizer #######################################
settings(
batch_size=150,
learning_method=MomentumOptimizer(momentum=0),
learning_rate=2e-2,
regularization=L2Regularization(8e-4),
is_async=False,
model_average=ModelAverage(
average_window=0.5, max_average_window=10000), )
####################################### network ##############################
#8 features and 1 target
word = data_layer(name='word_data', size=word_dict_len)
predicate = data_layer(name='verb_data', size=pred_len)
ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
std_0 = ParameterAttribute(initial_std=0.)
std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(
size=word_dim,
input=predicate,
param_attr=ParameterAttribute(
name='vemb', initial_std=default_std))
mark_embedding = embedding_layer(
name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
embedding_layer(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = mixed_layer(
name='hidden0',
size=hidden_dim,
bias_attr=std_default,
input=[
full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = ParameterAttribute(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(
name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = mixed_layer(
name='hidden' + str(i),
size=hidden_dim,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = lstmemory(
name='lstm' + str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = mixed_layer(
name='output',
size=label_dict_len,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
if not is_predict:
crf_l = crf_layer(
name='crf',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(name='crfw'))
eval = sum_evaluator(input=crf_dec_l)
outputs(crf_l)
else:
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
param_attr=ParameterAttribute(name='crfw'))
outputs(crf_dec_l)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
"""
Usage: run following command to show help message.
python predict.py -h
"""
UNK_IDX = 0
class Prediction():
def __init__(self, train_conf, dict_file, model_dir, label_file,
predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
model_dir: directory of model.
"""
self.dict = {}
self.labels = {}
self.predicate_dict = {}
self.labels_reverse = {}
self.load_dict_label(dict_file, label_file, predicate_dict_file)
len_dict = len(self.dict)
len_label = len(self.labels)
len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(model_dir)
slots = [
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_pred), integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
def load_dict_label(self, dict_file, label_file, predicate_dict_file):
"""
Load dictionary from self.dict_file.
"""
for line_count, line in enumerate(open(dict_file, 'r')):
self.dict[line.strip()] = line_count
for line_count, line in enumerate(open(label_file, 'r')):
self.labels[line.strip()] = line_count
self.labels_reverse[line_count] = line.strip()
for line_count, line in enumerate(open(predicate_dict_file, 'r')):
self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
"""
with open(data_file, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
).split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
] * sen_len
ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
yield word_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
def predict(self, data_file, output_file):
"""
data_file: file name of input data.
"""
input = self.converter(self.get_data(data_file))
output = self.network.forwardTest(input)
lab = output[0]["id"].tolist()
with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
index = 0
for line in fin:
sen = line.split('\t')[0]
len_sen = len(sen.split())
line_labels = lab[index:index + len_sen]
index += len_sen
fout.write(sen + '\t' + ' '.join(
[self.labels_reverse[i] for i in line_labels]) + '\n')
def option_parser():
usage = (
"python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-c",
"--tconf",
action="store",
dest="train_conf",
help="network config")
parser.add_option(
"-d",
"--dict",
action="store",
dest="dict_file",
help="dictionary file")
parser.add_option(
"-l",
"--label",
action="store",
dest="label_file",
default=None,
help="label file")
parser.add_option(
"-p",
"--predict_dict_file",
action="store",
dest="predict_dict_file",
default=None,
help="predict_dict_file")
parser.add_option(
"-i",
"--data",
action="store",
dest="data_file",
help="data file to predict")
parser.add_option(
"-w",
"--model",
action="store",
dest="model_path",
default=None,
help="model path")
parser.add_option(
"-o",
"--output_file",
action="store",
dest="output_file",
default=None,
help="output file")
return parser.parse_args()
def main():
options, args = option_parser()
train_conf = options.train_conf
data_file = options.data_file
dict_file = options.dict_file
model_path = options.model_path
label_file = options.label_file
predict_dict_file = options.predict_dict_file
output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
predict = Prediction(train_conf, dict_file, model_path, label_file,
predict_dict_file)
predict.predict(data_file, output_file)
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
sort -n | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
best_model_path="output/pass-${LOG[1]}"
config_file=db_lstm.py
dict_file=./data/wordDict.txt
label_file=./data/targetDict.txt
predicate_dict_file=./data/verbDict.txt
input_file=./data/feature
output_file=predict.res
python predict.py \
-c $config_file \
-w $best_model_path \
-l $label_file \
-p $predicate_dict_file \
-d $dict_file \
-i $input_file \
-o $output_file
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
sort -n | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
evaluate_pass="output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
model_list=./model.list
touch $model_list | echo $evaluate_pass > $model_list
paddle train \
--config=./db_lstm.py \
--model_list=$model_list \
--job=test \
--use_gpu=false \
--config_args=is_test=1 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=./db_lstm.py \
--use_gpu=0 \
--log_period=5000 \
--trainer_count=1 \
--show_parameter_stats_period=5000 \
--save_dir=./output \
--num_passes=10000 \
--average_test_period=10000000 \
--init_model_path=./data \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1
data/aclImdb
data/imdb
data/pre-imdb
data/mosesdecoder-master
logs/
model_output
dataprovider_copy_1.py
model.list
test.log
train.log
*.pyc
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
def hook(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
integer_value_sequence(len(settings.word_dict)), integer_value(2)
]
settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@provider(init_hook=hook)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line_count, line in enumerate(fdata):
label, comment = line.strip().split('\t\t')
label = int(label)
words = comment.split()
word_slot = [
settings.word_dict[w] for w in words if w in settings.word_dict
]
if not word_slot:
continue
yield word_slot, label
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
"""
Usage: run following command to show help message.
python predict.py -h
"""
class SentimentPrediction():
def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
model_dir: directory of model.
"""
self.train_conf = train_conf
self.dict_file = dict_file
self.word_dict = {}
self.dict_dim = self.load_dict()
self.model_dir = model_dir
if model_dir is None:
self.model_dir = os.path.dirname(train_conf)
self.label = None
if label_file is not None:
self.load_label(label_file)
conf = parse_config(train_conf, "is_predict=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(self.model_dir)
input_types = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(input_types)
def load_dict(self):
"""
Load dictionary from self.dict_file.
"""
for line_count, line in enumerate(open(self.dict_file, 'r')):
self.word_dict[line.strip().split('\t')[0]] = line_count
return len(self.word_dict)
def load_label(self, label_file):
"""
Load label.
"""
self.label = {}
for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0]
def get_index(self, data):
"""
transform word into integer index according to the dictionary.
"""
words = data.strip().split()
word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
return word_slot
def batch_predict(self, data_batch):
input = self.converter(data_batch)
output = self.network.forwardTest(input)
prob = output[0]["value"]
labs = np.argsort(-prob)
for idx, lab in enumerate(labs):
if self.label is None:
print("predicting label is %d" % (lab[0]))
else:
print("predicting label is %s" % (self.label[lab[0]]))
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-n",
"--tconf",
action="store",
dest="train_conf",
help="network config")
parser.add_option(
"-d",
"--dict",
action="store",
dest="dict_file",
help="dictionary file")
parser.add_option(
"-b",
"--label",
action="store",
dest="label",
default=None,
help="dictionary file")
parser.add_option(
"-c",
"--batch_size",
type="int",
action="store",
dest="batch_size",
default=1,
help="the batch size for prediction")
parser.add_option(
"-w",
"--model",
action="store",
dest="model_path",
default=None,
help="model path")
return parser.parse_args()
def main():
options, args = option_parser()
train_conf = options.train_conf
batch_size = options.batch_size
dict_file = options.dict_file
model_path = options.model_path
label = options.label
swig_paddle.initPaddle("--use_gpu=0")
predict = SentimentPrediction(train_conf, dict_file, model_path, label)
batch = []
for line in sys.stdin:
words = predict.get_index(line)
if words:
batch.append([words])
else:
print('All the words in [%s] are not in the dictionary.' % line)
if len(batch) == batch_size:
predict.batch_predict(batch)
batch = []
if len(batch) > 0:
predict.batch_predict(batch)
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
#Note the default model is pass-00002, you shold make sure the model path
#exists or change the mode path.
model=model_output/pass-00002/
config=trainer_config.py
label=data/pre-imdb/labels.list
cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \
--tconf=$config\
--model=$model \
--label=$label \
--dict=./data/pre-imdb/dict.txt \
--batch_size=1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import random
import operator
import numpy as np
from subprocess import Popen, PIPE
from os.path import join as join_path
from optparse import OptionParser
from paddle.utils.preprocess_util import *
"""
Usage: run following command to show help message.
python preprocess.py -h
"""
def save_dict(dict, filename, is_reverse=True):
"""
Save dictionary into file.
dict: input dictionary.
filename: output file name, string.
is_reverse: True, descending order by value.
False, ascending order by value.
"""
f = open(filename, 'w')
for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
reverse=is_reverse):
f.write('%s\t%s\n' % (k, v))
f.close()
def tokenize(sentences):
"""
Use tokenizer.perl to tokenize input sentences.
tokenizer.perl is tool of Moses.
sentences : a list of input sentences.
return: a list of processed text.
"""
dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
assert isinstance(sentences, list)
text = "\n".join(sentences)
tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
tok_text, _ = tokenizer.communicate(text)
toks = tok_text.split('\n')[:-1]
return toks
def read_lines(path):
"""
path: String, file path.
return a list of sequence.
"""
seqs = []
with open(path, 'r') as f:
for line in f.readlines():
line = line.strip()
if len(line):
seqs.append(line)
return seqs
class SentimentDataSetCreate():
"""
A class to process data for sentiment analysis task.
"""
def __init__(self,
data_path,
output_path,
use_okenizer=True,
multi_lines=False):
"""
data_path: string, traing and testing dataset path
output_path: string, output path, store processed dataset
multi_lines: whether a file has multi lines.
In order to shuffle fully, it needs to read all files into
memory, then shuffle them if one file has multi lines.
"""
self.output_path = output_path
self.data_path = data_path
self.train_dir = 'train'
self.test_dir = 'test'
self.train_list = "train.list"
self.test_list = "test.list"
self.label_list = "labels.list"
self.classes_num = 0
self.batch_size = 50000
self.batch_dir = 'batches'
self.dict_file = "dict.txt"
self.dict_with_test = False
self.dict_size = 0
self.word_count = {}
self.tokenizer = use_okenizer
self.overwrite = False
self.multi_lines = multi_lines
self.train_dir = join_path(data_path, self.train_dir)
self.test_dir = join_path(data_path, self.test_dir)
self.train_list = join_path(output_path, self.train_list)
self.test_list = join_path(output_path, self.test_list)
self.label_list = join_path(output_path, self.label_list)
self.dict_file = join_path(output_path, self.dict_file)
def data_list(self, path):
"""
create dataset from path
path: data path
return: data list
"""
label_set = get_label_set_from_dir(path)
data = []
for lab_name in label_set.keys():
file_paths = list_files(join_path(path, lab_name))
for p in file_paths:
data.append({"label" : label_set[lab_name],\
"seq_path": p})
return data, label_set
def create_dict(self, data):
"""
create dict for input data.
data: list, [sequence, sequnce, ...]
"""
for seq in data:
for w in seq.strip().lower().split():
if w not in self.word_count:
self.word_count[w] = 1
else:
self.word_count[w] += 1
def create_dataset(self):
"""
create file batches and dictionary of train data set.
If the self.overwrite is false and train.list already exists in
self.output_path, this function will not create and save file
batches from the data set path.
return: dictionary size, class number.
"""
out_path = self.output_path
if out_path and not os.path.exists(out_path):
os.makedirs(out_path)
# If self.overwrite is false or self.train_list has existed,
# it will not process dataset.
if not (self.overwrite or not os.path.exists(self.train_list)):
print "%s already exists." % self.train_list
return
# Preprocess train data.
train_data, train_lab_set = self.data_list(self.train_dir)
print "processing train set..."
file_lists = self.save_data(train_data, "train", self.batch_size, True,
True)
save_list(file_lists, self.train_list)
# If have test data path, preprocess test data.
if os.path.exists(self.test_dir):
test_data, test_lab_set = self.data_list(self.test_dir)
assert (train_lab_set == test_lab_set)
print "processing test set..."
file_lists = self.save_data(test_data, "test", self.batch_size,
False, self.dict_with_test)
save_list(file_lists, self.test_list)
# save labels set.
save_dict(train_lab_set, self.label_list, False)
self.classes_num = len(train_lab_set.keys())
# save dictionary.
save_dict(self.word_count, self.dict_file, True)
self.dict_size = len(self.word_count)
def save_data(self,
data,
prefix="",
batch_size=50000,
is_shuffle=False,
build_dict=False):
"""
Create batches for a Dataset object.
data: the Dataset object to process.
prefix: the prefix of each batch.
batch_size: number of data in each batch.
build_dict: whether to build dictionary for data
return: list of batch names
"""
if is_shuffle and self.multi_lines:
return self.save_data_multi_lines(data, prefix, batch_size,
build_dict)
if is_shuffle:
random.shuffle(data)
num_batches = int(math.ceil(len(data) / float(batch_size)))
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
"%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, len(data))
# read a batch of data
label_list, data_list = self.get_data_list(begin, end, data)
if build_dict:
self.create_dict(data_list)
self.save_file(label_list, data_list, batch_name)
batch_names.append(batch_name)
return batch_names
def get_data_list(self, begin, end, data):
"""
begin: int, begining index of data.
end: int, ending index of data.
data: a list of {"seq_path": seqquence path, "label": label index}
return a list of label and a list of sequence.
"""
label_list = []
data_list = []
for j in range(begin, end):
seqs = read_lines(data[j]["seq_path"])
lab = int(data[j]["label"])
#File may have multiple lines.
for seq in seqs:
data_list.append(seq)
label_list.append(lab)
if self.tokenizer:
data_list = tokenize(data_list)
return label_list, data_list
def save_data_multi_lines(self,
data,
prefix="",
batch_size=50000,
build_dict=False):
"""
In order to shuffle fully, there is no need to load all data if
each file only contains one sample, it only needs to shuffle list
of file name. But one file contains multi lines, each line is one
sample. It needs to read all data into memory to shuffle fully.
This interface is mainly for data containning multi lines in each
file, which consumes more memory if there is a great mount of data.
data: the Dataset object to process.
prefix: the prefix of each batch.
batch_size: number of data in each batch.
build_dict: whether to build dictionary for data
return: list of batch names
"""
assert self.multi_lines
label_list = []
data_list = []
# read all data
label_list, data_list = self.get_data_list(0, len(data), data)
if build_dict:
self.create_dict(data_list)
length = len(label_list)
perm_list = np.array([i for i in xrange(length)])
random.shuffle(perm_list)
num_batches = int(math.ceil(length / float(batch_size)))
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
"%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, length)
sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
sub_data = [data_list[perm_list[i]] for i in range(begin, end)]
self.save_file(sub_label, sub_data, batch_name)
batch_names.append(batch_name)
return batch_names
def save_file(self, label_list, data_list, filename):
"""
Save data into file.
label_list: a list of int value.
data_list: a list of sequnece.
filename: output file name.
"""
f = open(filename, 'w')
print "saving file: %s" % filename
for lab, seq in zip(label_list, data_list):
f.write('%s\t\t%s\n' % (lab, seq))
f.close()
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
parser.add_option(
"-i",
"--data",
action="store",
dest="input",
help="Input data directory.")
parser.add_option(
"-o",
"--output",
action="store",
dest="output",
default=None,
help="Output directory.")
parser.add_option(
"-t",
"--tokenizer",
action="store",
dest="use_tokenizer",
default=True,
help="Whether to use tokenizer.")
parser.add_option("-m", "--multi_lines", action="store",
dest="multi_lines", default=False,
help="If input text files have multi lines and they "\
"need to be shuffled, you should set -m True,")
return parser.parse_args()
def main():
options, args = option_parser()
data_dir = options.input
output_dir = options.output
use_tokenizer = options.use_tokenizer
multi_lines = options.multi_lines
if output_dir is None:
outname = os.path.basename(options.input)
output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
multi_lines)
data_creator.create_dataset()
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
echo "Start to preprcess..."
data_dir="./data/imdb"
python preprocess.py -i $data_dir
echo "Done."
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from os.path import join as join_path
from paddle.trainer_config_helpers import *
def sentiment_data(data_dir=None,
is_test=False,
is_predict=False,
train_list="train.list",
test_list="test.list",
dict_file="dict.txt"):
"""
Predefined data provider for sentiment analysis.
is_test: whether this config is used for test.
is_predict: whether this config is used for prediction.
train_list: text file name, containing a list of training set.
test_list: text file name, containing a list of testing set.
dict_file: text file name, containing dictionary.
"""
dict_dim = len(open(join_path(data_dir, "dict.txt")).readlines())
class_dim = len(open(join_path(data_dir, 'labels.list')).readlines())
if is_predict:
return dict_dim, class_dim
if data_dir is not None:
train_list = join_path(data_dir, train_list)
test_list = join_path(data_dir, test_list)
dict_file = join_path(data_dir, dict_file)
train_list = train_list if not is_test else None
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(open(dict_file, 'r')):
word_dict[line.split('\t')[0]] = i
define_py_data_sources2(
train_list,
test_list,
module="dataprovider",
obj="process",
args={'dictionary': word_dict})
return dict_dim, class_dim
def bidirectional_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
lstm_dim=128,
is_predict=False):
data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim)
bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer("label", 1)
outputs(classification_cost(input=output, label=lbl))
else:
outputs(output)
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3,
is_predict=False):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
is_predict: is predicting or not.
Some layers is not needed in network when predicting.
"""
hid_lr = 1e-3
assert stacked_num % 2 == 1
layer_attr = ExtraLayerAttribute(drop_rate=0.5)
fc_para_attr = ParameterAttribute(learning_rate=hid_lr)
lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = ParameterAttribute(initial_std=0., l2_rate=0.)
relu = ReluActivation()
linear = LinearActivation()
data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim)
fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
lstm1 = lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = fc_layer(
input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
output = fc_layer(
input=[fc_last, lstm_last],
size=class_dim,
act=SoftmaxActivation(),
bias_attr=bias_attr,
param_attr=para_attr)
if is_predict:
outputs(output)
else:
outputs(classification_cost(input=output, label=data_layer('label', 1)))
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
sort -n | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
evaluate_pass="model_output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
model_list=./model.list
touch $model_list | echo $evaluate_pass > $model_list
net_conf=trainer_config.py
paddle train --config=$net_conf \
--model_list=$model_list \
--job=test \
--use_gpu=false \
--trainer_count=4 \
--config_args=is_test=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
config=trainer_config.py
output=./model_output
paddle train --config=$config \
--save_dir=$output \
--job=train \
--use_gpu=false \
--trainer_count=4 \
--num_passes=10 \
--log_period=10 \
--dot_period=20 \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.v2 as paddle
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
conv_3 = paddle.networks.sequence_conv_pool(
input=emb, context_len=3, hidden_size=hid_dim)
conv_4 = paddle.networks.sequence_conv_pool(
input=emb, context_len=4, hidden_size=hid_dim)
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
"""
assert stacked_num % 2 == 1
layer_attr = paddle.attr.Extra(drop_rate=0.5)
fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
relu = paddle.activation.Relu()
linear = paddle.activation.Linear()
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
fc1 = paddle.layer.fc(input=emb,
size=hid_dim,
act=linear,
bias_attr=bias_attr)
lstm1 = paddle.layer.lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = paddle.layer.fc(input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = paddle.layer.lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = paddle.layer.pooling(
input=inputs[0], pooling_type=paddle.pooling.Max())
lstm_last = paddle.layer.pooling(
input=inputs[1], pooling_type=paddle.pooling.Max())
output = paddle.layer.fc(input=[fc_last, lstm_last],
size=class_dim,
act=paddle.activation.Softmax(),
bias_attr=bias_attr,
param_attr=para_attr)
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
if __name__ == '__main__':
# init
paddle.init(use_gpu=False)
#data
print 'load dictionary...'
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100)
test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
feeding = {'word': 0, 'label': 1}
# network config
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
adam_optimizer = paddle.optimizer.Adam(
learning_rate=2e-3,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=adam_optimizer)
trainer.train(
reader=train_reader,
event_handler=event_handler,
feeding=feeding,
num_passes=2)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sentiment_net import *
from paddle.trainer_config_helpers import *
# whether this config is used for test
is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False)
data_dir = "./data/pre-imdb"
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config #####################
settings(
batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
model_average=ModelAverage(0.5),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25)
#################### Network Config ######################
stacked_lstm_net(
dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
data/wmt14
data/pre-wmt14
data/wmt14_model
data/paraphrase
data/pre-paraphrase
data/paraphrase_model
translation/gen.log
translation/gen_result
translation/train.log
paraphrase/train.log
dataprovider_copy_1.py
translation/thirdparty.tgz
translation/thirdparty/train.conf
translation/thirdparty/dataprovider.py
translation/thirdparty/seqToseq_net.py
translation/thirdparty/*.dict
*.pyc
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
name='src_forward_gru', input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
name='src_backward_gru',
input=src_embedding,
size=encoder_size,
reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
name="decoder_boot_mixed",
size=decoder_size,
act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
name="simple_attention",
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(
name="input_recurrent",
size=decoder_size * 3,
# enable error clipping
layer_attr=paddle.attr.ExtraAttr(
error_clipping_threshold=100.0)) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
# uncomment to enable local threshold for gradient clipping
# param_attr=paddle.attr.ParamAttr(gradient_clipping_threshold=9.9),
size=decoder_size)
with paddle.layer.mixed(
name="gru_step_output",
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main():
paddle.init(
use_gpu=False,
trainer_count=1,
# log gradient clipping info
log_clipping=True,
# log error clipping info
log_error_clipping=True)
is_generating = False
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# train the network
if not is_generating:
cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(
learning_rate=5e-5,
# uncomment to enable global threshold for gradient clipping
# gradient_clipping_threshold=10.0,
regularization=paddle.optimizer.L2Regularization(rate=8e-4))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=8192),
batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost,
event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
# start to train
trainer.train(
reader=wmt14_reader, event_handler=event_handler, num_passes=2)
# generate a english sequence to french
else:
# use the first 3 samples for generation
gen_creator = paddle.dataset.wmt14.gen(dict_size)
gen_data = []
gen_num = 3
for item in gen_creator():
gen_data.append((item[0], ))
if len(gen_data) == gen_num:
break
beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating)
# get the pretrained model, whose bleu = 26.92
parameters = paddle.dataset.wmt14.model()
# prob is the prediction probabilities, and id is the prediction word.
beam_result = paddle.infer(
output_layer=beam_gen,
parameters=parameters,
input=gen_data,
field=['prob', 'id'])
# get the dictionary
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
# the delimited element of generated sequences is -1,
# the first element of each generated sequence is the sequence length
seq_list = []
seq = []
for w in beam_result[1]:
if w != -1:
seq.append(w)
else:
seq_list.append(' '.join([trg_dict.get(w) for w in seq[1:]]))
seq = []
prob = beam_result[0]
beam_size = 3
for i in xrange(gen_num):
print "\n*******************************************************\n"
print "src:", ' '.join(
[src_dict.get(w) for w in gen_data[i][0]]), "\n"
for j in xrange(beam_size):
print "prob = %f:" % (prob[i][j]), seq_list[i * beam_size + j]
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
# download the in-house paraphrase dataset
wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz
# untar the dataset
tar -zxvf paraphrase.tar.gz
rm paraphrase.tar.gz
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
dim=32
pretrained_dir='../../model_zoo/embedding/'
preModel=$pretrained_dir'model_'$dim'.emb'
preDict=$pretrained_dir'baidu.dict'
usrDict_dir='pre-paraphrase/'
srcDict=$usrDict_dir'src.dict'
trgDict=$usrDict_dir'trg.dict'
usrModel_dir='paraphrase_model/'
mkdir $usrModel_dir
srcModel=$usrModel_dir'_source_language_embedding'
trgModel=$usrModel_dir'_target_language_embedding'
echo 'extract desired parameters based on user dictionary'
script=$pretrained_dir'extract_para.py'
python $script --preModel $preModel --preDict $preDict \
--usrModel $srcModel --usrDict $srcDict -d $dim
python $script --preModel $preModel --preDict $preDict \
--usrModel $trgModel --usrDict $trgDict -d $dim
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
mkdir wmt14
cd wmt14
# download the dataset
wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz
wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz
# untar the dataset
tar -zxvf bitexts.tgz
tar -zxvf dev+test.tgz
gunzip bitexts.selected/*
mv bitexts.selected train
rm bitexts.tgz
rm dev+test.tgz
# separate the dev and test dataset
mkdir test gen
mv dev/ntst1213.* test
mv dev/ntst14.* gen
rm -rf dev
set +x
# rename the suffix, .fr->.src, .en->.trg
for dir in train test gen
do
filelist=`ls $dir`
cd $dir
for file in $filelist
do
if [ ${file##*.} = "fr" ]; then
mv $file ${file/%fr/src}
elif [ ${file##*.} = 'en' ]; then
mv $file ${file/%en/trg}
fi
done
cd ..
done
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
# download the pretrained model
wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz
# untar the model
tar -zxvf wmt14_model.tar.gz
rm wmt14_model.tar.gz
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 2
START = "<s>"
END = "<e>"
def hook(settings, src_dict_path, trg_dict_path, is_generating, file_list,
**kwargs):
# job_mode = 1: training mode
# job_mode = 0: generating mode
settings.job_mode = not is_generating
def fun(dict_path):
out_dict = dict()
with open(dict_path, "r") as fin:
out_dict = {
line.strip(): line_count
for line_count, line in enumerate(fin)
}
return out_dict
settings.src_dict = fun(src_dict_path)
settings.trg_dict = fun(trg_dict_path)
settings.logger.info("src dict len : %d" % (len(settings.src_dict)))
if settings.job_mode:
settings.slots = {
'source_language_word':
integer_value_sequence(len(settings.src_dict)),
'target_language_word':
integer_value_sequence(len(settings.trg_dict)),
'target_language_next_word':
integer_value_sequence(len(settings.trg_dict))
}
settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
else:
settings.slots = {
'source_language_word':
integer_value_sequence(len(settings.src_dict)),
'sent_id':
integer_value_sequence(len(open(file_list[0], "r").readlines()))
}
def _get_ids(s, dictionary):
words = s.strip().split()
return [dictionary[START]] + \
[dictionary.get(w, UNK_IDX) for w in words] + \
[dictionary[END]]
@provider(init_hook=hook, pool_size=50000)
def process(settings, file_name):
with open(file_name, 'r') as f:
for line_count, line in enumerate(f):
line_split = line.strip().split('\t')
if settings.job_mode and len(line_split) != 2:
continue
src_seq = line_split[0] # one source sequence
src_ids = _get_ids(src_seq, settings.src_dict)
if settings.job_mode:
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
continue
trg_ids_next = trg_ids + [settings.trg_dict[END]]
trg_ids = [settings.trg_dict[START]] + trg_ids
yield {
'source_language_word': src_ids,
'target_language_word': trg_ids,
'target_language_next_word': trg_ids_next
}
else:
yield {'source_language_word': src_ids, 'sent_id': [line_count]}
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
from seqToseq_net import *
is_generating = False
### Data Definiation
train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase",
is_generating = is_generating)
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 50,
learning_rate = 5e-4)
### Network Architecture
gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cd ..
paddle train \
--config='paraphrase/train.conf' \
--save_dir='paraphrase/model' \
--init_model_path='data/paraphrase_model' \
--load_missing_parameter_strategy=rand \
--use_gpu=false \
--num_passes=16 \
--show_parameter_stats_period=100 \
--trainer_count=4 \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'paraphrase/train.log'
paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1
#!/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example:
python preprocess.py -i INPUT [-d DICTSIZE] [-m]
Options:
-h, --help show this help message and exit
-i INPUT input original dataset path
-d DICTSIZE specified word count of dictionary
-m --mergeDict merge source and target dictionary
"""
import os
import sys
import string
from optparse import OptionParser
from paddle.utils.preprocess_util import save_list, DatasetCreater
class SeqToSeqDatasetCreater(DatasetCreater):
"""
A class to process data for sequence to sequence application.
"""
def __init__(self, data_path, output_path):
"""
data_path: the path to store the train data, test data and gen data
output_path: the path to store the processed dataset
"""
DatasetCreater.__init__(self, data_path)
self.gen_dir_name = 'gen'
self.gen_list_name = 'gen.list'
self.output_path = output_path
def concat_file(self, file_path, file1, file2, output_path, output):
"""
Concat file1 and file2 to be one output file
The i-th line of output = i-th line of file1 + '\t' + i-th line of file2
file_path: the path to store file1 and file2
output_path: the path to store output file
"""
file1 = os.path.join(file_path, file1)
file2 = os.path.join(file_path, file2)
output = os.path.join(output_path, output)
if not os.path.exists(output):
os.system('paste ' + file1 + ' ' + file2 + ' > ' + output)
def cat_file(self, dir_path, suffix, output_path, output):
"""
Cat all the files in dir_path with suffix to be one output file
dir_path: the base directory to store input file
suffix: suffix of file name
output_path: the path to store output file
"""
cmd = 'cat '
file_list = os.listdir(dir_path)
file_list.sort()
for file in file_list:
if file.endswith(suffix):
cmd += os.path.join(dir_path, file) + ' '
output = os.path.join(output_path, output)
if not os.path.exists(output):
os.system(cmd + '> ' + output)
def build_dict(self, file_path, dict_path, dict_size=-1):
"""
Create the dictionary for the file, Note that
1. Valid characters include all printable characters
2. There is distinction between uppercase and lowercase letters
3. There is 3 special token:
<s>: the start of a sequence
<e>: the end of a sequence
<unk>: a word not included in dictionary
file_path: the path to store file
dict_path: the path to store dictionary
dict_size: word count of dictionary
if is -1, dictionary will contains all the words in file
"""
if not os.path.exists(dict_path):
dictory = dict()
with open(file_path, "r") as fdata:
for line in fdata:
line = line.split('\t')
for line_split in line:
words = line_split.strip().split()
for word in words:
if word not in dictory:
dictory[word] = 1
else:
dictory[word] += 1
output = open(dict_path, "w+")
output.write('<s>\n<e>\n<unk>\n')
count = 3
for key, value in sorted(
dictory.items(), key=lambda d: d[1], reverse=True):
output.write(key + "\n")
count += 1
if count == dict_size:
break
self.dict_size = count
def create_dataset(self,
dict_size=-1,
mergeDict=False,
suffixes=['.src', '.trg']):
"""
Create seqToseq dataset
"""
# dataset_list and dir_list has one-to-one relationship
train_dataset = os.path.join(self.data_path, self.train_dir_name)
test_dataset = os.path.join(self.data_path, self.test_dir_name)
gen_dataset = os.path.join(self.data_path, self.gen_dir_name)
dataset_list = [train_dataset, test_dataset, gen_dataset]
train_dir = os.path.join(self.output_path, self.train_dir_name)
test_dir = os.path.join(self.output_path, self.test_dir_name)
gen_dir = os.path.join(self.output_path, self.gen_dir_name)
dir_list = [train_dir, test_dir, gen_dir]
# create directory
for dir in dir_list:
if not os.path.exists(dir):
os.mkdir(dir)
# checkout dataset should be parallel corpora
suffix_len = len(suffixes[0])
for dataset in dataset_list:
file_list = os.listdir(dataset)
if len(file_list) % 2 == 1:
raise RuntimeError("dataset should be parallel corpora")
file_list.sort()
for i in range(0, len(file_list), 2):
if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
raise RuntimeError(
"source and target file name should be equal")
# cat all the files with the same suffix in dataset
for suffix in suffixes:
for dataset in dataset_list:
outname = os.path.basename(dataset) + suffix
self.cat_file(dataset, suffix, dataset, outname)
# concat parallel corpora and create file.list
print 'concat parallel corpora for dataset'
id = 0
list = ['train.list', 'test.list', 'gen.list']
for dataset in dataset_list:
outname = os.path.basename(dataset)
self.concat_file(dataset, outname + suffixes[0],
outname + suffixes[1], dir_list[id], outname)
save_list([os.path.join(dir_list[id], outname)],
os.path.join(self.output_path, list[id]))
id += 1
# build dictionary for train data
dict = ['src.dict', 'trg.dict']
dict_path = [
os.path.join(self.output_path, dict[0]),
os.path.join(self.output_path, dict[1])
]
if mergeDict:
outname = os.path.join(train_dir, train_dataset.split('/')[-1])
print 'build src dictionary for train data'
self.build_dict(outname, dict_path[0], dict_size)
print 'build trg dictionary for train data'
os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
else:
outname = os.path.join(train_dataset, self.train_dir_name)
for id in range(0, 2):
suffix = suffixes[id]
print 'build ' + suffix[1:] + ' dictionary for train data'
self.build_dict(outname + suffix, dict_path[id], dict_size)
print 'dictionary size is', self.dict_size
def main():
usage = "usage: \n" \
"python %prog -i INPUT [-d DICTSIZE] [-m]"
parser = OptionParser(usage)
parser.add_option(
"-i", action="store", dest="input", help="input original dataset path")
parser.add_option(
"-d",
action="store",
dest="dictsize",
help="specified word count of dictionary")
parser.add_option(
"-m",
"--mergeDict",
action="store_true",
dest="mergeDict",
help="merge source and target dictionary")
(options, args) = parser.parse_args()
if options.input[-1] == os.path.sep:
options.input = options.input[:-1]
outname = os.path.basename(options.input)
output_path = os.path.join(os.path.dirname(options.input), 'pre-' + outname)
dictsize = int(options.dictsize) if options.dictsize else -1
if not os.path.exists(output_path):
os.mkdir(output_path)
data_creator = SeqToSeqDatasetCreater(options.input, output_path)
data_creator.create_dataset(dictsize, options.mergeDict)
if __name__ == "__main__":
main()
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
from paddle.trainer_config_helpers import *
def seq_to_seq_data(data_dir,
is_generating,
dict_size=30000,
train_list='train.list',
test_list='test.list',
gen_list='gen.list',
gen_result='gen_result'):
"""
Predefined seqToseq train data provider for application
is_generating: whether this config is used for generating
dict_size: word count of dictionary
train_list: a text file containing a list of training data
test_list: a text file containing a list of testing data
gen_list: a text file containing a list of generating data
gen_result: a text file containing generating result
"""
src_lang_dict = os.path.join(data_dir, 'src.dict')
trg_lang_dict = os.path.join(data_dir, 'trg.dict')
if is_generating:
train_list = None
test_list = os.path.join(data_dir, gen_list)
else:
train_list = os.path.join(data_dir, train_list)
test_list = os.path.join(data_dir, test_list)
define_py_data_sources2(
train_list,
test_list,
module="dataprovider",
obj="process",
args={
"src_dict_path": src_lang_dict,
"trg_dict_path": trg_lang_dict,
"is_generating": is_generating
})
return {
"src_dict_path": src_lang_dict,
"trg_dict_path": trg_lang_dict,
"gen_result": gen_result
}
def gru_encoder_decoder(data_conf,
is_generating,
word_vector_dim=512,
encoder_size=512,
decoder_size=512,
beam_size=3,
max_length=250,
error_clipping=50):
"""
A wrapper for an attention version of GRU Encoder-Decoder network
is_generating: whether this config is used for generating
encoder_size: dimension of hidden unit in GRU Encoder network
decoder_size: dimension of hidden unit in GRU Decoder network
word_vector_dim: dimension of word vector
beam_size: expand width in beam search
max_length: a stop condition of sequence generation
"""
for k, v in data_conf.iteritems():
globals()[k] = v
source_dict_dim = len(open(src_dict_path, "r").readlines())
target_dict_dim = len(open(trg_dict_path, "r").readlines())
gen_trans_file = gen_result
src_word_id = data_layer(name='source_language_word', size=source_dict_dim)
src_embedding = embedding_layer(
input=src_word_id,
size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding'))
src_forward = simple_gru(
input=src_embedding,
size=encoder_size,
naive=True,
gru_layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
src_backward = simple_gru(
input=src_embedding,
size=encoder_size,
reverse=True,
naive=True,
gru_layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward)
with mixed_layer(
size=decoder_size,
act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)
gru_step = gru_step_naive_layer(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size,
layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
with mixed_layer(
size=target_dict_dim, bias_attr=True,
act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_inputs = [
StaticInput(
input=encoded_vector, is_seq=True), StaticInput(
input=encoded_proj, is_seq=True)
]
if not is_generating:
trg_embedding = embedding_layer(
input=data_layer(
name='target_language_word', size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl)
outputs(cost)
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = GeneratedInput(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
seqtext_printer_evaluator(
input=beam_gen,
id_input=data_layer(
name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
gen_file=$1
beam_size=$2
# find top1 generating result
top1=$(printf '%s_top1.txt' `basename $gen_file .txt`)
if [ $beam_size -eq 1 ]; then
awk -F "\t" '{sub(" <e>","",$2);sub(" ","",$2);print $2}' $gen_file >$top1
else
awk 'BEGIN{
FS="\t";
OFS="\t";
read_pos = 2} {
if (NR == read_pos){
sub(" <e>","",$3);
sub(" ","",$3);
print $3;
read_pos += (2 + res_num);
}}' res_num=$beam_size $gen_file >$top1
fi
# evalute bleu value
bleu_script=multi-bleu.perl
standard_res=../data/wmt14/gen/ntst14.trg
bleu_res=`perl $bleu_script $standard_res <$top1`
echo $bleu_res
rm $top1
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
from seqToseq_net import *
# whether this config is used for generating
is_generating = True
### Data Definiation
gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14",
is_generating = is_generating,
gen_result = "./translation/gen_result")
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 1,
learning_rate = 0)
### Network Architecture
gru_encoder_decoder(gen_conf, is_generating)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cd ..
paddle train \
--job=test \
--config='translation/gen.conf' \
--save_dir='data/wmt14_model' \
--use_gpu=false \
--num_passes=13 \
--test_pass=12 \
--trainer_count=1 \
2>&1 | tee 'translation/gen.log'
paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
echo "Downloading multi-bleu.perl"
wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl --no-check-certificate
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
from seqToseq_net import *
# whether this config is used for generating
is_generating = False
### Data Definiation
data_dir = "./data/pre-wmt14"
train_conf = seq_to_seq_data(data_dir = data_dir,
is_generating = is_generating)
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 50,
learning_rate = 5e-4)
### Network Architecture
gru_encoder_decoder(train_conf, is_generating)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cd ..
paddle train \
--config='translation/train.conf' \
--save_dir='translation/model' \
--use_gpu=false \
--num_passes=16 \
--show_parameter_stats_period=100 \
--trainer_count=4 \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'translation/train.log'
paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1
import gzip
import math
import paddle.v2 as paddle
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.embedding(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0,
sparse_update=True))
return wordemb
def main():
# for local training
cluster_train = False
if not cluster_train:
paddle.init(use_gpu=False, trainer_count=1)
else:
paddle.init(
use_gpu=False,
trainer_count=2,
port=7164,
ports_num=1,
ports_num_for_sparse=1,
num_gradient_servers=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
'w') as f:
trainer.save_parameter_to_tar(f)
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adagrad = paddle.optimizer.AdaGrad(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost,
parameters,
adagrad,
is_local=not cluster_train)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
...@@ -59,6 +59,11 @@ context_projection ...@@ -59,6 +59,11 @@ context_projection
.. autoclass:: paddle.v2.layer.context_projection .. autoclass:: paddle.v2.layer.context_projection
:noindex: :noindex:
row_conv
--------
.. autoclass:: paddle.v2.layer.row_conv
:noindex:
Image Pooling Layer Image Pooling Layer
=================== ===================
...@@ -346,6 +351,12 @@ sampling_id ...@@ -346,6 +351,12 @@ sampling_id
.. autoclass:: paddle.v2.layer.sampling_id .. autoclass:: paddle.v2.layer.sampling_id
:noindex: :noindex:
multiplex
---------
.. autoclass:: paddle.v2.layer.multiplex
:noindex:
Slicing and Joining Layers Slicing and Joining Layers
========================== ==========================
...@@ -441,3 +452,19 @@ eos ...@@ -441,3 +452,19 @@ eos
--- ---
.. autoclass:: paddle.v2.layer.eos .. autoclass:: paddle.v2.layer.eos
:noindex: :noindex:
Miscs
=====
dropout
--------------
.. autoclass:: paddle.v2.layer.dropout
:noindex:
Activation with learnable parameter
===================================
prelu
--------
.. autoclass:: paddle.v2.layer.prelu
:noindex:
...@@ -125,11 +125,3 @@ simple_attention ...@@ -125,11 +125,3 @@ simple_attention
:members: simple_attention :members: simple_attention
:noindex: :noindex:
Miscs
=====
dropout_layer
--------------
.. automodule:: paddle.v2.networks
:members: dropout_layer
:noindex:
...@@ -74,14 +74,25 @@ typedef enum { ...@@ -74,14 +74,25 @@ typedef enum {
typedef struct { typedef struct {
char* name; char* name;
paddle_element_type element_type; paddle_element_type element_type;
void* content; unsigned char* content;
int content_len; int content_len;
} paddle_parameter, paddle_gradient; } paddle_parameter, paddle_gradient;
typedef struct paddle_pserver_client paddle_pserver_client; typedef int paddle_pserver_client;
paddle_pserver_client* paddle_new_pserver_client(); /**
void paddle_pserver_client_release(paddle_pserver_client* client); * @brief creates a pserver client that talks to etcd for coordination.
*/
paddle_pserver_client paddle_new_etcd_pserver_client(char* etcd_addr);
/**
* @brief creates a pserver client given pserver addresses.
*
* @param pserver_addrs comma-separated pserver addresses.
* @param selected if current pserver client is selected to initialize all parameter servers.
*/
paddle_pserver_client paddle_new_pserver_client(char* pserver_addrs, int selected);
void paddle_pserver_client_release(paddle_pserver_client c);
/** /**
* @brief paddle_begin_init_params begins to initialize parameters on * @brief paddle_begin_init_params begins to initialize parameters on
...@@ -95,7 +106,7 @@ void paddle_pserver_client_release(paddle_pserver_client* client); ...@@ -95,7 +106,7 @@ void paddle_pserver_client_release(paddle_pserver_client* client);
* @return 1 if the trainer is selected to initialize parameter * @return 1 if the trainer is selected to initialize parameter
* servers, otherwise 0. * servers, otherwise 0.
*/ */
int paddle_begin_init_params(paddle_pserver_client* client); int paddle_begin_init_params(paddle_pserver_client client);
/** /**
* @brief paddle_init_param initializes the parameter on parameter * @brief paddle_init_param initializes the parameter on parameter
...@@ -109,7 +120,7 @@ int paddle_begin_init_params(paddle_pserver_client* client); ...@@ -109,7 +120,7 @@ int paddle_begin_init_params(paddle_pserver_client* client);
* @paddle_begin_init_param). Or simply exit the program and wait for * @paddle_begin_init_param). Or simply exit the program and wait for
* the cluster management system to restart the trainer. * the cluster management system to restart the trainer.
*/ */
int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, const unsigned char* param_config_proto, int config_len); int paddle_init_param(paddle_pserver_client client, paddle_parameter param, const unsigned char* param_config_proto, int config_len);
/** /**
* @brief paddle_finish_init_params tells parameter servers client has * @brief paddle_finish_init_params tells parameter servers client has
...@@ -120,7 +131,7 @@ int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, con ...@@ -120,7 +131,7 @@ int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, con
* @paddle_begin_init_param). Or simply exit the program and wait for * @paddle_begin_init_param). Or simply exit the program and wait for
* the cluster management system to restart the trainer. * the cluster management system to restart the trainer.
*/ */
int paddle_finish_init_params(paddle_pserver_client* client); int paddle_finish_init_params(paddle_pserver_client client);
/** /**
* @brief paddle_send_grads sends gradients to parameter servers for * @brief paddle_send_grads sends gradients to parameter servers for
...@@ -131,7 +142,7 @@ int paddle_finish_init_params(paddle_pserver_client* client); ...@@ -131,7 +142,7 @@ int paddle_finish_init_params(paddle_pserver_client* client);
* @param learning_rate the learning rate for the gradients. * @param learning_rate the learning rate for the gradients.
* @return 0 if successful, otherwise -1. * @return 0 if successful, otherwise -1.
*/ */
int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grads, int len); int paddle_send_grads(paddle_pserver_client client, const paddle_gradient* grads, int len);
/** /**
* @brief paddle_get_params gets parameters from parameter servers. * @brief paddle_get_params gets parameters from parameter servers.
...@@ -139,13 +150,15 @@ int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grad ...@@ -139,13 +150,15 @@ int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grad
* paddle_get_params will block until parameters are initialized on * paddle_get_params will block until parameters are initialized on
* the parameter servers. * the parameter servers.
* *
* @param names the array of names of the parameters to get. * @param dst the destination array of parameter pointers to save to.
* @param dst the destination array of parameters to save to. * The parameter pointer must be pre-popullated with required parameter name,
* and the content of parameter must be pre-allocated of the size of required
* parameter on pserver.
* @param len the length of the names array and the paddle_parameter * @param len the length of the names array and the paddle_parameter
* array. * array.
* @return 0 if successful, otherwise -1. * @return 0 if successful, otherwise -1.
*/ */
int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_parameter* dst, int len); int paddle_get_params(paddle_pserver_client client, paddle_parameter** dst, int len);
/** /**
* @brief paddle_save_model indicates parameters to save the parameter * @brief paddle_save_model indicates parameters to save the parameter
...@@ -154,5 +167,5 @@ int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_ ...@@ -154,5 +167,5 @@ int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_
* @param path the path to save parameters. * @param path the path to save parameters.
* @return 0 if successful, otherwise -1. * @return 0 if successful, otherwise -1.
*/ */
int paddle_save_model(paddle_pserver_client* client, const char* path); int paddle_save_model(paddle_pserver_client client, const char* path);
``` ```
# Design Doc: Remote Parameter Updater for Cluster Train
For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters.
## Parameter Updater
Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here.
### Remote Parameter Updater
Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md))
In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig.
#### Sparse Remote Parameter Updater
Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage.
### Interface Design
TBD
...@@ -22,6 +22,7 @@ To compile the source code, your computer must be equipped with the following de ...@@ -22,6 +22,7 @@ To compile the source code, your computer must be equipped with the following de
- **CMake**: CMake >= 3.0 (at least CMake 3.4 on Mac OS X) - **CMake**: CMake >= 3.0 (at least CMake 3.4 on Mac OS X)
- **BLAS**: MKL, OpenBlas or ATLAS - **BLAS**: MKL, OpenBlas or ATLAS
- **Python**: only support Python 2.7 - **Python**: only support Python 2.7
- **Go**
**Note:** For CUDA 7.0 and CUDA 7.5, GCC 5.0 and up are not supported! **Note:** For CUDA 7.0 and CUDA 7.5, GCC 5.0 and up are not supported!
For CUDA 8.0, GCC versions later than 5.3 are not supported! For CUDA 8.0, GCC versions later than 5.3 are not supported!
...@@ -107,6 +108,18 @@ As a simple example, consider the following: ...@@ -107,6 +108,18 @@ As a simple example, consider the following:
sudo apt-get install -y python python-pip python-numpy libpython-dev bison sudo apt-get install -y python python-pip python-numpy libpython-dev bison
sudo pip install 'protobuf==3.1.0.post1' sudo pip install 'protobuf==3.1.0.post1'
# Install Go
# You can follow https://golang.org/doc/install for a detailed explanation.
wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \
tar -C $HOME -xzf go.tgz && \
mkdir $HOME/gopath && \
rm go.tgz
# Setup environment variables
export GOROOT=$HOME/go
export GOPATH=$HOME/gopath
export PATH=$PATH:$GOROOT/bin
# install cmake 3.4 # install cmake 3.4
curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && sudo make install && \ cd cmake-3.4.1 && ./bootstrap && make -j4 && sudo make install && \
......
...@@ -7,4 +7,4 @@ ...@@ -7,4 +7,4 @@
build_and_install/index_cn.rst build_and_install/index_cn.rst
concepts/use_concepts_cn.rst concepts/use_concepts_cn.rst
- `深度学习入门课程 <http://book.paddlepaddle.org/>`_ - `深度学习入门课程 <http://book.paddlepaddle.org/index.cn.html>`_
...@@ -6,4 +6,4 @@ GET STARTED ...@@ -6,4 +6,4 @@ GET STARTED
build_and_install/index_en.rst build_and_install/index_en.rst
- `Deep Learning 101 <http://book.paddlepaddle.org/index.en.html>`_ - `Deep Learning 101 <http://book.paddlepaddle.org/index.html>`_
...@@ -4,6 +4,7 @@ RNN相关模型 ...@@ -4,6 +4,7 @@ RNN相关模型
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
rnn_config_cn.rst
recurrent_group_cn.md recurrent_group_cn.md
hierarchical_layer_cn.rst hierarchical_layer_cn.rst
hrnn_rnn_api_compare_cn.rst hrnn_rnn_api_compare_cn.rst
RNN Models RNN Models
========== ==========
.. toctree::
:maxdepth: 1
rnn_config_en.rst
...@@ -5,36 +5,13 @@ RNN配置 ...@@ -5,36 +5,13 @@ RNN配置
中配置循环神经网络(RNN)。PaddlePaddle 中配置循环神经网络(RNN)。PaddlePaddle
高度支持灵活和高效的循环神经网络配置。 在本教程中,您将了解如何: 高度支持灵活和高效的循环神经网络配置。 在本教程中,您将了解如何:
- 准备用来学习循环神经网络的序列数据。
- 配置循环神经网络架构。 - 配置循环神经网络架构。
- 使用学习完成的循环神经网络模型生成序列。 - 使用学习完成的循环神经网络模型生成序列。
我们将使用 vanilla 循环神经网络和 sequence to sequence 我们将使用 vanilla 循环神经网络和 sequence to sequence
模型来指导你完成这些步骤。sequence to sequence 模型来指导你完成这些步骤。sequence to sequence
模型的代码可以在\ ``demo / seqToseq``\ 找到。 模型的代码可以在 `book/08.machine_translation <https://github.com/PaddlePaddle/book/tree/develop/08.machine_translation>`_ 找到。
wmt14数据的提供文件在 `python/paddle/v2/dataset/wmt14.py <https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/dataset/wmt14.py>`_ 。
准备序列数据
------------
PaddlePaddle
不需要对序列数据进行任何预处理,例如填充。唯一需要做的是将相应类型设置为输入。例如,以下代码段定义了三个输入。
它们都是序列,它们的大小是\ ``src_dict``\ ,\ ``trg_dict``\ 和\ ``trg_dict``\ :
.. code:: python
settings.input_types = [
integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))]
在\ ``process``\ 函数中,每个\ ``yield``\ 函数将返回三个整数列表。每个整数列表被视为一个整数序列:
.. code:: python
yield src_ids, trg_ids, trg_ids_next
有关如何编写数据提供程序的更多细节描述,请参考 :ref:`api_pydataprovider2` 。完整的数据提供文件在
``demo/seqToseq/dataprovider.py``\ 。
配置循环神经网络架构 配置循环神经网络架构
-------------------- --------------------
...@@ -85,16 +62,16 @@ vanilla ...@@ -85,16 +62,16 @@ vanilla
act=None, act=None,
rnn_layer_attr=None): rnn_layer_attr=None):
def __rnn_step__(ipt): def __rnn_step__(ipt):
out_mem = memory(name=name, size=size) out_mem = paddle.layer.memory(name=name, size=size)
rnn_out = mixed_layer(input = [full_matrix_projection(ipt), rnn_out = paddle.layer.mixed(input = [paddle.layer.full_matrix_projection(input=ipt),
full_matrix_projection(out_mem)], paddle.layer.full_matrix_projection(input=out_mem)],
name = name, name = name,
bias_attr = rnn_bias_attr, bias_attr = rnn_bias_attr,
act = act, act = act,
layer_attr = rnn_layer_attr, layer_attr = rnn_layer_attr,
size = size) size = size)
return rnn_out return rnn_out
return recurrent_group(name='%s_recurrent_group' % name, return paddle.layer.recurrent_group(name='%s_recurrent_group' % name,
step=__rnn_step__, step=__rnn_step__,
reverse=reverse, reverse=reverse,
input=input) input=input)
...@@ -140,43 +117,52 @@ Sequence to Sequence Model with Attention ...@@ -140,43 +117,52 @@ Sequence to Sequence Model with Attention
.. code:: python .. code:: python
# 定义源语句的数据层 # 定义源语句的数据层
src_word_id = data_layer(name='source_language_word', size=source_dict_dim) src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
# 计算每个词的词向量 # 计算每个词的词向量
src_embedding = embedding_layer( src_embedding = paddle.layer.embedding(
input=src_word_id, input=src_word_id,
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding')) param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
# 应用前向循环神经网络 # 应用前向循环神经网络
src_forward = grumemory(input=src_embedding, size=encoder_size) src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
# 应用反向递归神经网络(reverse=True表示反向循环神经网络) # 应用反向递归神经网络(reverse=True表示反向循环神经网络)
src_backward = grumemory(input=src_embedding, src_backward = paddle.networks.simple_gru(
size=encoder_size, input=src_embedding, size=encoder_size, reverse=True)
reverse=True)
# 将循环神经网络的前向和反向部分混合在一起 # 将循环神经网络的前向和反向部分混合在一起
encoded_vector = concat_layer(input=[src_forward, src_backward]) encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
# 投射编码向量到 decoder_size # 投射编码向量到 decoder_size
encoder_proj = mixed_layer(input = [full_matrix_projection(encoded_vector)], encoded_proj = paddle.layer.mixed(
size = decoder_size) size=decoder_size,
input=paddle.layer.full_matrix_projection(encoded_vector))
# 计算反向RNN的第一个实例 # 计算反向RNN的第一个实例
backward_first = first_seq(input=src_backward) backward_first = paddle.layer.first_seq(input=src_backward)
# 投射反向RNN的第一个实例到 decoder size # 投射反向RNN的第一个实例到 decoder size
decoder_boot = mixed_layer(input=[full_matrix_projection(backward_first)], size=decoder_size, act=TanhActivation()) decoder_boot = paddle.layer.mixed(
size=decoder_size,
act=paddle.activation.Tanh(),
input=paddle.layer.full_matrix_projection(backward_first))
解码器使用 ``recurrent_group`` 来定义循环神经网络。单步函数和输出函数在 解码器使用 ``recurrent_group`` 来定义循环神经网络。单步函数和输出函数在
``gru_decoder_with_attention`` 中定义: ``gru_decoder_with_attention`` 中定义:
.. code:: python .. code:: python
group_inputs=[StaticInput(input=encoded_vector,is_seq=True), group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
StaticInput(input=encoded_proj,is_seq=True)] group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True)
trg_embedding = embedding_layer( group_inputs = [group_input1, group_input2]
input=data_layer(name='target_language_word', trg_embedding = paddle.layer.embedding(
size=target_dict_dim), input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding')) param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
group_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
# 对于配备有注意力机制的解码器,在训练中, # 对于配备有注意力机制的解码器,在训练中,
...@@ -185,7 +171,8 @@ Sequence to Sequence Model with Attention ...@@ -185,7 +171,8 @@ Sequence to Sequence Model with Attention
# StaticInput 意味着不同时间步的输入都是相同的值, # StaticInput 意味着不同时间步的输入都是相同的值,
# 否则它以一个序列输入,不同时间步的输入是不同的。 # 否则它以一个序列输入,不同时间步的输入是不同的。
# 所有输入序列应该有相同的长度。 # 所有输入序列应该有相同的长度。
decoder = recurrent_group(name=decoder_group_name, decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=group_inputs) input=group_inputs)
...@@ -198,27 +185,32 @@ attention,门控循环单元单步函数和输出函数: ...@@ -198,27 +185,32 @@ attention,门控循环单元单步函数和输出函数:
# 定义解码器的Memory # 定义解码器的Memory
# Memory的输出定义在 gru_step 内 # Memory的输出定义在 gru_step 内
# 注意 gru_step 应该与它的Memory名字相同 # 注意 gru_step 应该与它的Memory名字相同
decoder_mem = memory(name='gru_decoder', decoder_mem = paddle.layer.memory(
size=decoder_size, name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
boot_layer=decoder_boot)
# 计算 attention 加权编码向量 # 计算 attention 加权编码向量
context = simple_attention(encoded_sequence=enc_vec, context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj, encoded_proj=enc_proj,
decoder_state=decoder_mem) decoder_state=decoder_mem)
# 混合当前词向量和attention加权编码向量 # 混合当前词向量和attention加权编码向量
decoder_inputs = mixed_layer(inputs = [full_matrix_projection(context), decoder_inputs = paddle.layer.mixed(
full_matrix_projection(current_word)], size=decoder_size * 3,
size = decoder_size * 3) input=[
paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
# 定义门控循环单元循环神经网络单步函数 # 定义门控循环单元循环神经网络单步函数
gru_step = gru_step_layer(name='gru_decoder', gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs, input=decoder_inputs,
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
# 定义输出函数 # 定义输出函数
out = mixed_layer(input=[full_matrix_projection(input=gru_step)], out = paddle.layer.mixed(
size=target_dict_dim, size=target_dict_dim,
bias_attr=True, bias_attr=True,
act=SoftmaxActivation()) act=paddle.activation.Softmax(),
input=paddle.layer.full_matrix_projection(input=gru_step))
return out return out
生成序列 生成序列
...@@ -238,28 +230,23 @@ attention,门控循环单元单步函数和输出函数: ...@@ -238,28 +230,23 @@ attention,门控循环单元单步函数和输出函数:
- ``beam_size``: beam search 算法中的beam大小。 - ``beam_size``: beam search 算法中的beam大小。
- ``max_length``: 生成序列的最大长度。 - ``max_length``: 生成序列的最大长度。
- 使用 ``seqtext_printer_evaluator``
根据索引矩阵和字典打印文本。这个函数需要设置:
- ``id_input``: 数据的整数ID,用于标识生成的文件中的相应输出。
- ``dict_file``: 用于将词ID转换为词的字典文件。
- ``result_file``: 生成结果文件的路径。
代码如下: 代码如下:
.. code:: python .. code:: python
group_inputs=[StaticInput(input=encoded_vector,is_seq=True), group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
StaticInput(input=encoded_proj,is_seq=True)] group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
# 在生成时,解码器基于编码源序列和最后生成的目标词预测下一目标词。 # 在生成时,解码器基于编码源序列和最后生成的目标词预测下一目标词。
# 编码源序列(编码器输出)必须由只读Memory的 StaticInput 指定。 # 编码源序列(编码器输出)必须由只读Memory的 StaticInput 指定。
# 这里, GeneratedInputs 自动获取上一个生成的词,并在最开始初始化为起始词,如 <s>。 # 这里, GeneratedInputs 自动获取上一个生成的词,并在最开始初始化为起始词,如 <s>。
trg_embedding = GeneratedInput( trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim, size=target_dict_dim,
embedding_name='_target_language_embedding', embedding_name='_target_language_embedding',
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
group_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name, beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=group_inputs, input=group_inputs,
bos_id=0, # Beginnning token. bos_id=0, # Beginnning token.
...@@ -267,12 +254,8 @@ attention,门控循环单元单步函数和输出函数: ...@@ -267,12 +254,8 @@ attention,门控循环单元单步函数和输出函数:
beam_size=beam_size, beam_size=beam_size,
max_length=max_length) max_length=max_length)
seqtext_printer_evaluator(input=beam_gen, return beam_gen
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
注意,这种生成技术只用于类似解码器的生成过程。如果你正在处理序列标记任务,请参阅 :ref:`semantic_role_labeling` 了解更多详细信息。 注意,这种生成技术只用于类似解码器的生成过程。如果你正在处理序列标记任务,请参阅 `book/06.understand_sentiment <https://github.com/PaddlePaddle/book/tree/develop/06.understand_sentiment>`_ 了解更多详细信息。
完整的配置文件在\ ``demo/seqToseq/seqToseq_net.py``\ 完整的配置文件在 `book/08.machine_translation/train.py <https://github.com/PaddlePaddle/book/blob/develop/08.machine_translation/train.py>`_
...@@ -3,34 +3,11 @@ RNN Configuration ...@@ -3,34 +3,11 @@ RNN Configuration
This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to: This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to:
- prepare sequence data for learning recurrent neural networks.
- configure recurrent neural network architecture. - configure recurrent neural network architecture.
- generate sequence with learned recurrent neural network models. - generate sequence with learned recurrent neural network models.
We will use vanilla recurrent neural network, and sequence to sequence model to guide you through these steps. The code of sequence to sequence model can be found at :code:`demo/seqToseq`. We will use vanilla recurrent neural network, and sequence to sequence model to guide you through these steps. The code of sequence to sequence model can be found at `book/08.machine_translation <https://github.com/PaddlePaddle/book/tree/develop/08.machine_translation>`_ .
And the data preparation of this model can be found at `python/paddle/v2/dataset/wmt14.py <https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/dataset/wmt14.py>`_
=====================
Prepare Sequence Data
=====================
PaddlePaddle does not need any preprocessing to sequence data, such as padding. The only thing that needs to be done is to set the type of the corresponding type to input. For example, the following code snippets defines three input. All of them are sequences, and the size of them are :code:`src_dict`, :code:`trg_dict`, and :code:`trg_dict`:
.. code-block:: python
settings.input_types = [
integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))]
Then at the :code:`process` function, each :code:`yield` function will return three integer lists. Each integer list is treated as a sequence of integers:
.. code-block:: python
yield src_ids, trg_ids, trg_ids_next
For more details description of how to write a data provider, please refer to :ref:`api_pydataprovider2` . The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
=============================================== ===============================================
Configure Recurrent Neural Network Architecture Configure Recurrent Neural Network Architecture
...@@ -75,16 +52,16 @@ Its **output function** simply takes :math:`x_t` as the output. ...@@ -75,16 +52,16 @@ Its **output function** simply takes :math:`x_t` as the output.
act=None, act=None,
rnn_layer_attr=None): rnn_layer_attr=None):
def __rnn_step__(ipt): def __rnn_step__(ipt):
out_mem = memory(name=name, size=size) out_mem = paddle.layer.memory(name=name, size=size)
rnn_out = mixed_layer(input = [full_matrix_projection(ipt), rnn_out = paddle.layer.mixed(input = [paddle.layer.full_matrix_projection(input=ipt),
full_matrix_projection(out_mem)], paddle.layer.full_matrix_projection(input=out_mem)],
name = name, name = name,
bias_attr = rnn_bias_attr, bias_attr = rnn_bias_attr,
act = act, act = act,
layer_attr = rnn_layer_attr, layer_attr = rnn_layer_attr,
size = size) size = size)
return rnn_out return rnn_out
return recurrent_group(name='%s_recurrent_group' % name, return paddle.layer.recurrent_group(name='%s_recurrent_group' % name,
step=__rnn_step__, step=__rnn_step__,
reverse=reverse, reverse=reverse,
input=input) input=input)
...@@ -113,43 +90,52 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge ...@@ -113,43 +90,52 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge
.. code-block:: python .. code-block:: python
# Define the data layer of the source sentence. # Define the data layer of the source sentence.
src_word_id = data_layer(name='source_language_word', size=source_dict_dim) src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
# Calculate the word embedding of each word. # Calculate the word embedding of each word.
src_embedding = embedding_layer( src_embedding = paddle.layer.embedding(
input=src_word_id, input=src_word_id,
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding')) param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
# Apply forward recurrent neural network. # Apply forward recurrent neural network.
src_forward = grumemory(input=src_embedding, size=encoder_size) src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
# Apply backward recurrent neural network. reverse=True means backward recurrent neural network. # Apply backward recurrent neural network. reverse=True means backward recurrent neural network.
src_backward = grumemory(input=src_embedding, src_backward = paddle.networks.simple_gru(
size=encoder_size, input=src_embedding, size=encoder_size, reverse=True)
reverse=True)
# Mix the forward and backward parts of the recurrent neural network together. # Mix the forward and backward parts of the recurrent neural network together.
encoded_vector = concat_layer(input=[src_forward, src_backward]) encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
# Project encoding vector to decoder_size. # Project encoding vector to decoder_size.
encoder_proj = mixed_layer(input = [full_matrix_projection(encoded_vector)], encoded_proj = paddle.layer.mixed(
size = decoder_size) size=decoder_size,
input=paddle.layer.full_matrix_projection(encoded_vector))
# Compute the first instance of the backward RNN. # Compute the first instance of the backward RNN.
backward_first = first_seq(input=src_backward) backward_first = paddle.layer.first_seq(input=src_backward)
# Project the first instance of backward RNN to decoder size. # Project the first instance of backward RNN to decoder size.
decoder_boot = mixed_layer(input=[full_matrix_projection(backward_first)], size=decoder_size, act=TanhActivation()) decoder_boot = paddle.layer.mixed(
size=decoder_size,
act=paddle.activation.Tanh(),
input=paddle.layer.full_matrix_projection(backward_first))
The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`: The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`:
.. code-block:: python .. code-block:: python
group_inputs=[StaticInput(input=encoded_vector,is_seq=True), group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
StaticInput(input=encoded_proj,is_seq=True)] group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True)
trg_embedding = embedding_layer( group_inputs = [group_input1, group_input2]
input=data_layer(name='target_language_word', trg_embedding = paddle.layer.embedding(
size=target_dict_dim), input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding')) param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
group_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training, # For decoder equipped with attention mechanism, in training,
...@@ -158,7 +144,8 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network. ...@@ -158,7 +144,8 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network.
# StaticInput means the same value is utilized at different time steps. # StaticInput means the same value is utilized at different time steps.
# Otherwise, it is a sequence input. Inputs at different time steps are different. # Otherwise, it is a sequence input. Inputs at different time steps are different.
# All sequence inputs should have the same length. # All sequence inputs should have the same length.
decoder = recurrent_group(name=decoder_group_name, decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=group_inputs) input=group_inputs)
...@@ -171,27 +158,32 @@ The implementation of the step function is listed as below. First, it defines th ...@@ -171,27 +158,32 @@ The implementation of the step function is listed as below. First, it defines th
# Defines the memory of the decoder. # Defines the memory of the decoder.
# The output of this memory is defined in gru_step. # The output of this memory is defined in gru_step.
# Notice that the name of gru_step should be the same as the name of this memory. # Notice that the name of gru_step should be the same as the name of this memory.
decoder_mem = memory(name='gru_decoder', decoder_mem = paddle.layer.memory(
size=decoder_size, name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
boot_layer=decoder_boot)
# Compute attention weighted encoder vector. # Compute attention weighted encoder vector.
context = simple_attention(encoded_sequence=enc_vec, context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj, encoded_proj=enc_proj,
decoder_state=decoder_mem) decoder_state=decoder_mem)
# Mix the current word embedding and the attention weighted encoder vector. # Mix the current word embedding and the attention weighted encoder vector.
decoder_inputs = mixed_layer(inputs = [full_matrix_projection(context), decoder_inputs = paddle.layer.mixed(
full_matrix_projection(current_word)], size=decoder_size * 3,
size = decoder_size * 3) input=[
paddle.layer.full_matrix_projection(input=context),
paddle.layer.full_matrix_projection(input=current_word)
])
# Define Gated recurrent unit recurrent neural network step function. # Define Gated recurrent unit recurrent neural network step function.
gru_step = gru_step_layer(name='gru_decoder', gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs, input=decoder_inputs,
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
# Defines the output function. # Defines the output function.
out = mixed_layer(input=[full_matrix_projection(input=gru_step)], out = paddle.layer.mixed(
size=target_dict_dim, size=target_dict_dim,
bias_attr=True, bias_attr=True,
act=SoftmaxActivation()) act=paddle.activation.Softmax(),
input=paddle.layer.full_matrix_projection(input=gru_step))
return out return out
...@@ -208,30 +200,26 @@ After training the model, we can use it to generate sequences. A common practice ...@@ -208,30 +200,26 @@ After training the model, we can use it to generate sequences. A common practice
- :code:`beam_size`: the beam size used in beam search. - :code:`beam_size`: the beam size used in beam search.
- :code:`max_length`: the maximum length of the generated sentences. - :code:`max_length`: the maximum length of the generated sentences.
* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set:
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
- :code:`dict_file`: the dictionary file for converting word id to word.
- :code:`result_file`: the path of the generation result file.
The code is listed below: The code is listed below:
.. code-block:: python .. code-block:: python
group_inputs=[StaticInput(input=encoded_vector,is_seq=True), group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True)
StaticInput(input=encoded_proj,is_seq=True)] group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
# In generation, decoder predicts a next target word based on # In generation, decoder predicts a next target word based on
# the encoded source sequence and the last generated target word. # the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by # The encoded source sequence (encoder's output) must be specified by
# StaticInput which is a read-only memory. # StaticInput which is a read-only memory.
# Here, GeneratedInputs automatically fetchs the last generated word, # Here, GeneratedInputs automatically fetchs the last generated word,
# which is initialized by a start mark, such as <s>. # which is initialized by a start mark, such as <s>.
trg_embedding = GeneratedInput( trg_embedding = paddle.layer.GeneratedInput(
size=target_dict_dim, size=target_dict_dim,
embedding_name='_target_language_embedding', embedding_name='_target_language_embedding',
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
group_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name, beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=group_inputs, input=group_inputs,
bos_id=0, # Beginnning token. bos_id=0, # Beginnning token.
...@@ -239,13 +227,9 @@ The code is listed below: ...@@ -239,13 +227,9 @@ The code is listed below:
beam_size=beam_size, beam_size=beam_size,
max_length=max_length) max_length=max_length)
seqtext_printer_evaluator(input=beam_gen, return beam_gen
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to :ref:`semantic_role_labeling` for more details. Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `book/06.understand_sentiment <https://github.com/PaddlePaddle/book/tree/develop/06.understand_sentiment>`_ for more details.
The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`. The full configuration file is located at `book/08.machine_translation/train.py <https://github.com/PaddlePaddle/book/blob/develop/08.machine_translation/train.py>`_ .
...@@ -84,7 +84,7 @@ no changes added to commit (use "git add" and/or "git commit -a") ...@@ -84,7 +84,7 @@ no changes added to commit (use "git add" and/or "git commit -a")
➜ docker build -t paddle:dev . ➜ docker build -t paddle:dev .
``` ```
随后可以用这个开发镜像开build PaddlePaddle的源码。比如如果要build一个不依赖GPU,但是支持AVX指令集,并且包括unit tests的PaddlePaddle,可以: 随后可以用这个开发镜像开build PaddlePaddle的源码。比如如果要build一个不依赖GPU,但是支持AVX指令集,并且包括unit tests的PaddlePaddle,可以:
```bash ```bash
➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev ➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev
......
...@@ -4,9 +4,9 @@ We sincerely appreciate your contributions. You can use fork and pull request ...@@ -4,9 +4,9 @@ We sincerely appreciate your contributions. You can use fork and pull request
workflow to merge your code. workflow to merge your code.
## Code Requirements ## Code Requirements
- Your code must be fully documented by - Your code comments must be fully documented by
[doxygen](http://www.stack.nl/~dimitri/doxygen/) style. [Doxygen](http://www.stack.nl/~dimitri/doxygen/) style.
- Make sure the compiler option WITH\_STYLE\_CHECK is on and the compiler - Make sure the compiler option `WITH_STYLE_CHECK` is on and the compiler
passes the code style check. passes the code style check.
- All code must have unit test. - All code must have unit test.
- Pass all unit tests. - Pass all unit tests.
...@@ -20,32 +20,25 @@ It's just that simple. ...@@ -20,32 +20,25 @@ It's just that simple.
## Clone ## Clone
Paddle is currently using [git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/). Clone remote repository.
The **develop** is the main branch, and other user's branches are feature branches.
Once you've created a fork, you can use your favorite git client to clone your ```bash
repo or just head straight to the command line: ➜ git clone https://github.com/USERNAME/Paddle
cd Paddle
```shell
# Clone your fork to your local machine
git clone --branch develop https://github.com/USERNAME/Paddle.git
```
If your repository doesn't contain **develop** branch, just create it by your own.
```shell
git clone https://github.com/USERNAME/Paddle.git Paddle
cd Paddle
git checkout -b develop # create develop branch.
git remote add upstream https://github.com/PaddlePaddle/Paddle.git # add upstream to baidu/Paddle
git pull upstream develop # update to upstream
``` ```
Then you can start to develop by making a local developement branch ## Create a local branch
Paddle is currently using [Git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/).
```shell All feature and bug fix development work should be done on a new branch, generally create new branch from `develop` branch .
git checkout -b MY_COOL_STUFF_BRANCH
```bash
➜ git checkout -b my-cool-stuff
``` ```
Before the checkout, you need to keep the current branch directory clean, otherwise the untracked file will be brought to the new branch, which can be inspected by `git status`.
## Using `pre-commit` hook ## Using `pre-commit` hook
Paddle developers use [pre-commit](http://pre-commit.com/) tool to manage git Paddle developers use [pre-commit](http://pre-commit.com/) tool to manage git
...@@ -58,89 +51,169 @@ To use [pre-commit](http://pre-commit.com/), you should install it by ...@@ -58,89 +51,169 @@ To use [pre-commit](http://pre-commit.com/), you should install it by
`pip install pre-commit`, and currently, Paddle uses `clang-format` to format `pip install pre-commit`, and currently, Paddle uses `clang-format` to format
c/cpp sources. Please make sure clang-format 3.8+ installed. c/cpp sources. Please make sure clang-format 3.8+ installed.
Then just run `pre-commit install` in your Paddle clone directory. When you Install and run it as follow:
commit your code, the pre-commit hook will check the local code if there is
```bash
➜ pip install pre-commit
➜ pre-commit install
```
When you commit your code, the pre-commit hook will check the local code if there is
anything not suitable to commit, and so on. anything not suitable to commit, and so on.
## Start to develop
In this tutorial, I delete a line in README.md and created a new file.
We can use `git status` to inspect the changes of current directory, `git diff` to see difference.
```bash
➜ git status
On branch test
Changes not staged for commit:
(use "git add <file>..." to update what will be committed)
(use "git checkout -- <file>..." to discard changes in working directory)
modified: README.md
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
no changes added to commit (use "git add" and/or "git commit -a")
```
## Build and Test
We package PaddlePaddle's compile environment into a Docker image, called the develop image named `paddle:dev`, it contains all compiling tools that PaddlePaddle needs.
If you want to build the develop image, just run:
```bash
➜ docker build -t paddle:dev .
```
Then we can use the develop image to build PaddlePaddle source. For example:
```bash
➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev
```
The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated.
Then we can generate the production image by copying the compiled PaddlePaddle program into the image by
```bash
➜ docker build -t paddle:prod -f build/Dockerfile .
```
Run unit test finally:
```bash
➜ docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest"
```
For more details, you can read [this doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst).
## Commit ## Commit
Commit your changes by following command lines: Next we cancel the changes to the README.md file and then commit our changes by following command lines:
```bash
➜ git checkout -- README.md
➜ git status
On branch test
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
nothing added to commit but untracked files present (use "git add" to track)
➜ git add test
```
```shell We should write a description of each commit by `git commit` to allow others to know
# show the working tree status the changes in these files.
git status
# add modified files ```bash
git add xx ➜ git commit
env EDITOR=vim git commit # You can write your comments by vim/nano/emacs. CRLF end-lines remover...............................(no files to check)Skipped
yapf.................................................(no files to check)Skipped
Check for added large files..............................................Passed
Check for merge conflicts................................................Passed
Check for broken symlinks................................................Passed
Detect Private Key...................................(no files to check)Skipped
Fix End of Files.....................................(no files to check)Skipped
clang-formater.......................................(no files to check)Skipped
[my-cool-stuff c703c041] add test file
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 233
``` ```
The first line of commit infomation is the title. The second and later lines
are the details if any.
## Keeping Fork Up to Date ## Keeping Fork Up to Date
Before pull your request, you should sync your code from the latest PaddlePaddle. Before pull your request, you should sync your code from the latest PaddlePaddle.
To do this, you'll need to add a remote at first: To do this, you'll need to add a remote at first:
```shell ```bash
# see the current configured remote repository ➜ git remote add upstream https://github.com/PaddlePaddle/Paddle
git remote -v ➜ git remote
# add upstream repository origin
git remote add upstream https://github.com/PaddlePaddle/Paddle.git upstream
# verify the new upstream
git remote -v
``` ```
Update your fork with the latest upstream changes: Update your fork with the latest upstream changes:
```shell ```bash
git pull --rebase upstream develop ➜ git fetch upstream
➜ git pull upstream develop
``` ```
If there are no unique commits locally, git will simply perform a fast-forward.
However, if you have been making changes (in the vast majority of cases you
probably shouldn't be), you may have to deal with conflicts.
Now, your local master branch is up-to-date with everything modified upstream. Now, your local master branch is up-to-date with everything modified upstream.
## Push to GitHub ## Push to GitHub
```shell ```bash
# push to your repository in Github # push to your repository in Github
git push -u origin MY_COOL_STUFF_BRANCH # create remote branch MY_COOL_STUFF_BRANCH to origin. ➜ git push origin my-cool-stuff
``` ```
## Pull Request ## Create an issue and a Pull Request
Create an Issue to describe the problem and record its number.
Go to the page for your fork on GitHub, select your development branch, Go to the page for your fork on GitHub, select your development branch,
and click the **pull request button**. and click the `New pull request`.
## Update your pull request with the lastest version <img width="295" alt="screen shot 2017-04-26 at 9 09 28 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436054/a6d98c66-2ac4-11e7-9cb1-18dd13150230.png">
During the code review, your pull request may become stale because new commits in Then select the target branch:
baidu/Paddle. GitHub allows autmotic update if there is no conflict. You can do this
by clicking the "Update Branch" button in your pull request page. However, in the case <img width="750" alt="screen shot 2017-04-26 at 9 11 52 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436139/f83b1e6c-2ac4-11e7-8c0e-add499023c46.png">
of conflict, you need to do the update manually. You need to do the following on
your local repository: We can add `resolve #Issue number` in PR description to close the issue automatically after the PR is merge. More details in <https://help.github.com/articles/closing-issues-via-commit-messages/>.
```shell
git checkout MY_COOL_STUFF_BRANCH Then wait for review, if there need to modify, refer to the above steps to update the corresponding origin branch.
git pull upstream develop
# You may need to resolve the conflict according to the git prompt. ## Delete origin branch
# Make and test your code.
git push origin MY_COOL_STUFF_BRANCH After the PR is merge into the main repository, we can delete the remote branch on the PR page.
<img width="775" alt="screen shot 2017-04-26 at 9 18 24 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436457/e4cdd472-2ac5-11e7-9272-badc76c4a23e.png">
Or just run:
```bash
➜ git push origin :my-cool-stuff
``` ```
Now your Pull Request is updated with the latest version.
## Revise your pull request ## Delete local branch
When you revise your pull request according to reviewer's comments, please use 'git commit' instead of 'git commit --amend' to commit your changes so that the reviewers can see the difference between the new pull requrest and the old pull request. Finally, we delete local branch:
The possible commands are ```bash
➜ git checkout develop
```shell # delete my-cool-stuff branch
git checkout MY_COOL_STUFF_BRANCH ➜ git branch -D my-cool-stuff
git pull upstream develop # update local to newest code base.
# May be some conflicts will occured.
# And develop your cool stuff
env EDITOR=vim git commit # add your revise log
git push origin MY_COOL_STUFF_BRANCH
``` ```
...@@ -17,7 +17,7 @@ function(GO_LIBRARY NAME BUILD_TYPE) ...@@ -17,7 +17,7 @@ function(GO_LIBRARY NAME BUILD_TYPE)
endif() endif()
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
file(RELATIVE_PATH rel ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
# find Paddle directory. # find Paddle directory.
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
...@@ -26,25 +26,23 @@ function(GO_LIBRARY NAME BUILD_TYPE) ...@@ -26,25 +26,23 @@ function(GO_LIBRARY NAME BUILD_TYPE)
# automatically get all dependencies specified in the source code # automatically get all dependencies specified in the source code
# for given target. # for given target.
add_custom_target(goGet env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ${rel}/...) add_custom_target(${NAME}_goGet env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ${rel}/...)
# make a symlink that references Paddle inside $GOPATH, so go get # make a symlink that references Paddle inside $GOPATH, so go get
# will use the local changes in Paddle rather than checkout Paddle # will use the local changes in Paddle rather than checkout Paddle
# in github. # in github.
add_custom_target(copyPaddle add_custom_target(${NAME}_copyPaddle
COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}) COMMAND rm -rf ${PADDLE_IN_GOPATH}/Paddle
add_dependencies(goGet copyPaddle) COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}/Paddle)
add_dependencies(${NAME}_goGet ${NAME}_copyPaddle)
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
${CMAKE_GO_FLAGS} ${GO_SOURCE} ${CMAKE_GO_FLAGS} ${GO_SOURCE}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN}) add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN})
add_dependencies(${NAME} goGet) add_dependencies(${NAME} ${NAME}_goGet)
if(NOT BUILD_TYPE STREQUAL "STATIC")
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} DESTINATION bin)
endif()
endfunction(GO_LIBRARY) endfunction(GO_LIBRARY)
package main package main
import ( import (
"fmt"
"net" "net"
"net/http" "net/http"
"net/rpc" "net/rpc"
"os"
"path/filepath"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/namsral/flag" "github.com/namsral/flag"
"github.com/PaddlePaddle/Paddle/go/master" "github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/recordio"
) )
func main() { func main() {
port := flag.Int("port", 8080, "port of the master server.") port := flag.Int("port", 8080, "port of the master server.")
dataset := flag.String("training_dataset", "", "dataset: comma separated path to RecordIO paths, supports golb patterns.")
faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).") faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).")
taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.")
taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.")
flag.Parse() flag.Parse()
if *dataset == "" {
panic("no dataset specified.")
}
if *faultTolerance { if *faultTolerance {
panic("fault tolernance not implemented.") panic("fault tolernance not implemented.")
}
var chunks []master.Chunk
var paths []string
ss := strings.Split(*dataset, ",")
fmt.Println(ss)
for _, s := range ss {
match, err := filepath.Glob(s)
if err != nil {
panic(err)
}
paths = append(paths, match...)
}
if len(paths) == 0 {
panic("no valid datset specified.")
}
idx := 0
for _, path := range paths {
f, err := os.Open(path)
if err != nil {
panic(err)
}
index, err := recordio.LoadIndex(f)
if err != nil {
panic(err)
}
f.Close()
count := index.NumChunks()
for i := 0; i < count; i++ {
chunk := master.Chunk{
Idx: idx,
Path: path,
Index: *index.ChunkIndex(i),
}
chunks = append(chunks, chunk)
}
} }
s := master.NewService(chunks, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) s := master.NewService(*chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
err := rpc.Register(s) err := rpc.Register(s)
if err != nil { if err != nil {
panic(err) panic(err)
......
...@@ -4,6 +4,8 @@ import ( ...@@ -4,6 +4,8 @@ import (
"errors" "errors"
"net/rpc" "net/rpc"
"sync" "sync"
log "github.com/sirupsen/logrus"
) )
// TODO(helin): add TCP re-connect logic // TODO(helin): add TCP re-connect logic
...@@ -21,6 +23,18 @@ func New() *Conn { ...@@ -21,6 +23,18 @@ func New() *Conn {
return c return c
} }
// Close closes the connection.
func (c *Conn) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.client == nil {
return nil
}
return c.client.Close()
}
// Connect connects the connection to a address. // Connect connects the connection to a address.
func (c *Conn) Connect(addr string) error { func (c *Conn) Connect(addr string) error {
c.mu.Lock() c.mu.Lock()
...@@ -50,12 +64,20 @@ func (c *Conn) Connect(addr string) error { ...@@ -50,12 +64,20 @@ func (c *Conn) Connect(addr string) error {
c.waitConn = nil c.waitConn = nil
} }
} else { } else {
err := client.Close()
if err != nil {
log.Errorln(err)
}
return errors.New("client already set from a concurrent goroutine") return errors.New("client already set from a concurrent goroutine")
} }
return nil return nil
} }
// TODO(helin): refactor Call to be able to perform given retry
// policy.
// Call make a RPC call. // Call make a RPC call.
// //
// Call will be blocked until the connection to remote RPC service // Call will be blocked until the connection to remote RPC service
......
cmake_minimum_required(VERSION 3.0)
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
project(cxx_go C Go)
include(golang)
include(flags)
set(MASTER_LIB_NAME "paddle_master")
go_library(${MASTER_LIB_NAME} SHARED)
if(PROJ_ROOT)
add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/master/lib${MASTER_LIB_NAME}.so
COMMAND rm ${CMAKE_CURRENT_BINARY_DIR}/lib${MASTER_LIB_NAME}.h
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/lib${MASTER_LIB_NAME}.so ${PROJ_ROOT}/python/paddle/v2/master/
DEPENDS ${MASTER_LIB_NAME})
add_custom_target(paddle_master_shared ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/master/lib${MASTER_LIB_NAME}.so)
endif(PROJ_ROOT)
package main
/*
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define PADDLE_MASTER_OK 0
#define PADDLE_MASTER_ERROR -1
typedef int paddle_master_client;
*/
import "C"
import (
"sync"
"unsafe"
"github.com/PaddlePaddle/Paddle/go/master"
log "github.com/sirupsen/logrus"
)
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex
var handleMap = make(map[C.paddle_master_client]*master.Client)
var curHandle C.paddle_master_client
func add(c *master.Client) C.paddle_master_client {
mu.Lock()
defer mu.Unlock()
client := curHandle
curHandle++
handleMap[client] = c
return client
}
func get(client C.paddle_master_client) *master.Client {
mu.Lock()
defer mu.Unlock()
return handleMap[client]
}
func remove(client C.paddle_master_client) *master.Client {
mu.Lock()
defer mu.Unlock()
h := handleMap[client]
delete(handleMap, client)
return h
}
type addresser string
func (a addresser) Address() string {
return string(a)
}
//export paddle_new_master_client
func paddle_new_master_client(addr *C.char, bufSize int) C.paddle_master_client {
a := C.GoString(addr)
c := master.NewClient(addresser(a), bufSize)
return add(c)
}
//export paddle_release_master_client
func paddle_release_master_client(client C.paddle_master_client) {
remove(client)
}
//export paddle_set_dataset
func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int) C.int {
c := get(client)
var paths []string
for i := 0; i < int(size); i++ {
ptr := (**C.char)(unsafe.Pointer(uintptr(unsafe.Pointer(path)) + uintptr(i)*unsafe.Sizeof(*path)))
str := C.GoString(*ptr)
paths = append(paths, str)
}
err := c.SetDataset(paths)
if err != nil {
log.Errorln(err)
return C.PADDLE_MASTER_ERROR
}
return C.PADDLE_MASTER_OK
}
//export paddle_next_record
func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int {
c := get(client)
r := c.NextRecord()
if len(r) == 0 {
*record = (*C.uchar)(nullPtr)
return 0
}
size := C.size_t(len(r))
*record = (*C.uchar)(C.malloc(size))
C.memcpy(unsafe.Pointer(*record), unsafe.Pointer(&r[0]), size)
return C.int(size)
}
//export mem_free
func mem_free(p unsafe.Pointer) {
// "free" may be a better name for this function, but doing so
// will cause calling any function of this library from Python
// ctypes hanging.
C.free(p)
}
func main() {}
package master
import (
"os"
"time"
"github.com/PaddlePaddle/Paddle/go/connection"
"github.com/PaddlePaddle/recordio"
log "github.com/sirupsen/logrus"
)
// Addresser provide the address of the master server.
type Addresser interface {
Address() string
}
// Client is the client of the master server.
type Client struct {
conn *connection.Conn
ch chan []byte
}
// NewClient creates a new Client.
//
// bufSize is the record buffer size. NextRecord will read from this
// buffer.
func NewClient(addr Addresser, bufSize int) *Client {
c := &Client{}
c.conn = connection.New()
c.ch = make(chan []byte, bufSize)
go c.monitorMaster(addr)
go c.getRecords()
return c
}
func (c *Client) getRecords() {
for {
t, err := c.getTask()
if err != nil {
// TODO(helin): wait before move on with next
// getTask call.
log.Errorln(err)
continue
}
for _, chunk := range t.Chunks {
f, err := os.Open(chunk.Path)
if err != nil {
log.Errorln(err)
continue
}
s := recordio.NewRangeScanner(f, &chunk.Index, -1, -1)
for s.Scan() {
c.ch <- s.Record()
}
if s.Err() != nil {
log.Errorln(err, chunk.Path)
}
err = f.Close()
if err != nil {
log.Errorln(err)
}
}
// We treat a task as finished whenever the last data
// instance of the task is read. This is not exactly
// correct, but a reasonable approximation.
c.taskFinished(t.ID)
}
}
func (c *Client) monitorMaster(addr Addresser) {
lastMaster := ""
monitor := func() {
// get the lastest address of the master server,
// connect to the new address once address changed.
curMaster := addr.Address()
if curMaster != lastMaster {
if curMaster == "" {
err := c.conn.Close()
if err != nil {
log.Errorln(err)
}
} else {
err := c.conn.Connect(curMaster)
if err != nil {
log.Errorln(err)
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curMaster = lastMaster
}
}
}
lastMaster = curMaster
}
monitor()
ticker := time.NewTicker(10 * time.Second)
for _ = range ticker.C {
monitor()
}
}
// SetDataset set dataset for the master server to dispatch.
//
// SetDataset can be call multiple times from different nodes. But
// only the first call will be honored.
func (c *Client) SetDataset(globPaths []string) error {
return c.conn.Call("Service.SetDataset", globPaths, nil)
}
// getTask gets a new task from the master server.
func (c *Client) getTask() (Task, error) {
var t Task
err := c.conn.Call("Service.GetTask", 0, &t)
return t, err
}
// TaskFinished tells the master server a task is finished.
func (c *Client) taskFinished(taskID int) error {
return c.conn.Call("Service.TaskFinished", taskID, nil)
}
// NextRecord returns next record in the dataset.
//
// NextRecord will block until the next record is available. It is
// thread-safe.
func (c *Client) NextRecord() []byte {
return <-c.ch
}
package master
import (
"fmt"
"net"
"net/http"
"net/rpc"
"os"
"strconv"
"strings"
"testing"
"time"
log "github.com/sirupsen/logrus"
"github.com/PaddlePaddle/Paddle/go/connection"
"github.com/PaddlePaddle/recordio"
)
const (
totalTask = 20
chunkPerTask = 10
)
func init() {
log.SetLevel(log.ErrorLevel)
}
type TestAddresser string
func (a TestAddresser) Address() string {
return string(a)
}
func TestGetFinishTask(t *testing.T) {
const path = "/tmp/master_client_test_0"
l, err := net.Listen("tcp", ":0")
if err != nil {
panic(err)
}
ss := strings.Split(l.Addr().String(), ":")
p, err := strconv.Atoi(ss[len(ss)-1])
if err != nil {
panic(err)
}
go func(l net.Listener) {
s := NewService(chunkPerTask, time.Second, 1)
server := rpc.NewServer()
err := server.Register(s)
if err != nil {
panic(err)
}
mux := http.NewServeMux()
mux.Handle(rpc.DefaultRPCPath, server)
err = http.Serve(l, mux)
if err != nil {
panic(err)
}
}(l)
f, err := os.Create(path)
if err != nil {
panic(err)
}
for i := 0; i < totalTask*chunkPerTask; i++ {
w := recordio.NewWriter(f, -1, -1)
w.Write(nil)
// call Close to force RecordIO writing a chunk.
w.Close()
}
f.Close()
// Manually intialize client to avoid calling c.getRecords()
c := &Client{}
c.conn = connection.New()
go c.monitorMaster(TestAddresser(fmt.Sprintf(":%d", p)))
c.SetDataset([]string{path})
checkOnePass := func(i int) {
var tasks []Task
for idx := 0; idx < totalTask; idx++ {
task, err := c.getTask()
if err != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i)
}
tasks = append(tasks, task)
}
_, err = c.getTask()
if err == nil {
t.Fatalf("Should get error, pass: %d\n", i)
}
err = c.taskFinished(tasks[0].ID)
if err != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i)
}
tasks = tasks[1:]
task, err := c.getTask()
if err != nil {
t.Fatal(err)
}
tasks = append(tasks, task)
for _, task := range tasks {
err = c.taskFinished(task.ID)
if err != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i)
}
}
}
for i := 0; i < 10; i++ {
checkOnePass(i)
}
}
package master_test
import (
"fmt"
"net"
"net/http"
"net/rpc"
"os"
"strconv"
"strings"
"testing"
"time"
"github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/recordio"
)
func TestNextRecord(t *testing.T) {
const (
path = "/tmp/master_client_TestFull"
total = 50
)
l, err := net.Listen("tcp", ":0")
if err != nil {
panic(err)
}
ss := strings.Split(l.Addr().String(), ":")
p, err := strconv.Atoi(ss[len(ss)-1])
if err != nil {
panic(err)
}
go func(l net.Listener) {
s := master.NewService(10, time.Second, 1)
server := rpc.NewServer()
err := server.Register(s)
if err != nil {
panic(err)
}
mux := http.NewServeMux()
mux.Handle(rpc.DefaultRPCPath, server)
err = http.Serve(l, mux)
if err != nil {
panic(err)
}
}(l)
f, err := os.Create(path)
if err != nil {
panic(err)
}
w := recordio.NewWriter(f, -1, -1)
for i := 0; i < total; i++ {
w.Write([]byte{byte(i)})
}
w.Close()
f.Close()
c := master.NewClient(master.TestAddresser(fmt.Sprintf(":%d", p)), 10)
c.SetDataset([]string{path})
for pass := 0; pass < 50; pass++ {
received := make(map[byte]bool)
for i := 0; i < total; i++ {
r := c.NextRecord()
if len(r) != 1 {
t.Fatal("Length should be 1.", r)
}
if received[r[0]] {
t.Fatal("Received duplicate.", received, r)
}
received[r[0]] = true
}
}
}
...@@ -2,29 +2,25 @@ package master ...@@ -2,29 +2,25 @@ package master
import ( import (
"errors" "errors"
"log" "os"
"path/filepath"
"sync" "sync"
"time" "time"
"github.com/PaddlePaddle/recordio" log "github.com/sirupsen/logrus"
)
const (
targetTaskCount = 300
)
// errors "github.com/PaddlePaddle/recordio"
var (
ErrNoMoreTask = errors.New("no more task for current pass")
ErrPendingTaskNotFound = errors.New("pending task not found")
) )
// Service is the master server service. // Service is the master server service.
type Service struct { type Service struct {
chunksPerTask int
timeoutDur time.Duration timeoutDur time.Duration
timeoutMax int timeoutMax int
ready chan struct{}
mu sync.Mutex mu sync.Mutex
initDone bool
taskQueues taskQueues taskQueues taskQueues
} }
...@@ -55,7 +51,6 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { ...@@ -55,7 +51,6 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
if len(cur.Task.Chunks) > 0 { if len(cur.Task.Chunks) > 0 {
cur.Task.ID = id cur.Task.ID = id
id++
result = append(result, cur) result = append(result, cur)
} }
...@@ -63,21 +58,21 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { ...@@ -63,21 +58,21 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
} }
// NewService creates a new service. // NewService creates a new service.
func NewService(chunks []Chunk, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service {
s := &Service{} s := &Service{}
s.chunksPerTask = chunksPerTask
s.timeoutDur = timeoutDur s.timeoutDur = timeoutDur
s.timeoutMax = timeoutMax s.timeoutMax = timeoutMax
s.taskQueues = taskQueues{} s.taskQueues = taskQueues{}
s.taskQueues.Pending = make(map[int]taskEntry) s.taskQueues.Pending = make(map[int]taskEntry)
s.taskQueues.Todo = partition(chunks, chunksPerTask) s.ready = make(chan struct{})
return s return s
} }
// Chunk is a chunk of data consisted of several data instances. // Chunk is a chunk of data consisted of several data instances.
type Chunk struct { type Chunk struct {
Idx int // index of the chunk within the file
Path string Path string
Index recordio.Index // block index Index recordio.Index // chunk index
} }
// Task is the basic unit of data instances assigned to trainers. // Task is the basic unit of data instances assigned to trainers.
...@@ -105,25 +100,87 @@ func (s *Service) snapshot() error { ...@@ -105,25 +100,87 @@ func (s *Service) snapshot() error {
return nil return nil
} }
// GetTask gets a new task from the service. func readChunks(globPaths []string) ([]Chunk, error) {
func (s *Service) GetTask(dummy int, task *Task) error { var chunks []Chunk
var paths []string
for _, s := range globPaths {
match, err := filepath.Glob(s)
if err != nil {
return nil, err
}
paths = append(paths, match...)
}
if len(paths) == 0 {
return nil, errors.New("no valid dataset specified")
}
for _, path := range paths {
f, err := os.Open(path)
if err != nil {
return nil, err
}
index, err := recordio.LoadIndex(f)
if err != nil {
return nil, err
}
err = f.Close()
if err != nil {
return nil, err
}
count := index.NumChunks()
for i := 0; i < count; i++ {
chunk := Chunk{
Path: path,
Index: *index.ChunkIndex(i),
}
chunks = append(chunks, chunk)
}
}
return chunks, nil
}
// SetDataset sets dataset to dispatch for the master server.
//
// SetDataset can be call multiple times. But only the first call will
// be honored.
func (s *Service) SetDataset(globPaths []string, dummy *int) error {
if len(globPaths) == 0 {
return errors.New("no dataset specified")
}
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
if s.initDone {
// Already initialized. All trainer will call
// SetDataset, but we only handle the first one. Treat
// other calls as successful but do nothing.
return nil
}
if len(s.taskQueues.Todo) == 0 { chunks, err := readChunks(globPaths)
return ErrNoMoreTask if err != nil {
return err
} }
t := s.taskQueues.Todo[0] s.taskQueues.Todo = partition(chunks, s.chunksPerTask)
t.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:] err = s.snapshot()
s.taskQueues.Pending[t.Task.ID] = t
err := s.snapshot()
if err != nil { if err != nil {
log.Errorln(err)
return err return err
} }
time.AfterFunc(s.timeoutDur, func(taskID int, epoch int) func() { close(s.ready)
s.initDone = true
return nil
}
func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
return func() { return func() {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
...@@ -142,7 +199,7 @@ func (s *Service) GetTask(dummy int, task *Task) error { ...@@ -142,7 +199,7 @@ func (s *Service) GetTask(dummy int, task *Task) error {
defer func() { defer func() {
err := s.snapshot() err := s.snapshot()
if err != nil { if err != nil {
log.Println(err) log.Errorln(err)
} }
}() }()
...@@ -150,29 +207,108 @@ func (s *Service) GetTask(dummy int, task *Task) error { ...@@ -150,29 +207,108 @@ func (s *Service) GetTask(dummy int, task *Task) error {
t.NumTimeout++ t.NumTimeout++
if t.NumTimeout > s.timeoutMax { if t.NumTimeout > s.timeoutMax {
log.Warningf("Task %v timed out %d times, discard.\n", t.Task, t.NumTimeout)
s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task)
return return
} }
log.Warningf("Task %v timed out %d times, retry.\n", t.Task, t.NumTimeout)
s.taskQueues.Todo = append(s.taskQueues.Todo, t) s.taskQueues.Todo = append(s.taskQueues.Todo, t)
} }
}(t.Task.ID, t.Epoch)) }
// must be called with lock held.
func (s *Service) logFields() log.Fields {
return log.Fields{
"todoLen": len(s.taskQueues.Todo),
"pendingLen": len(s.taskQueues.Pending),
"doneLen": len(s.taskQueues.Done),
"failedLen": len(s.taskQueues.Failed),
}
}
// GetTask gets a new task from the service.
func (s *Service) GetTask(dummy int, task *Task) error {
select {
case <-s.ready:
}
s.mu.Lock()
defer s.mu.Unlock()
if len(s.taskQueues.Todo) == 0 {
if len(s.taskQueues.Done) == 0 {
if len(s.taskQueues.Pending) == 0 {
err := errors.New("all task failed")
log.WithFields(s.logFields()).Warningln("All tasks failed.")
return err
}
// TODO(helin): client need to retry in this
// error case. Gotcha: RPC client can't
// compare returned error with predefined
// errors like io.EOF, because the error
// instance deserialized from RPC is a
// different instance than the error defined
// in package. So we need to figure out a way
// for client to check this error correctly.
err := errors.New("no more available task")
log.WithFields(s.logFields()).Warningln("No more available task.")
return err
}
s.taskQueues.Todo = s.taskQueues.Done
s.taskQueues.Done = nil
log.WithFields(s.logFields()).Infoln("No more todo task, but trainer is requesting task to do. Move all done task to todo.")
}
t := s.taskQueues.Todo[0]
t.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:]
s.taskQueues.Pending[t.Task.ID] = t
err := s.snapshot()
if err != nil {
return err
}
*task = t.Task
log.WithFields(s.logFields()).Infof("Task #%d dispatched.", task.ID)
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.ID, t.Epoch))
return nil return nil
} }
// TaskFinished tell the service that a task is finished. // TaskFinished tell the service that a task is finished.
func (s *Service) TaskFinished(taskID int, dummy *int) error { func (s *Service) TaskFinished(taskID int, dummy *int) error {
select {
case <-s.ready:
}
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID] t, ok := s.taskQueues.Pending[taskID]
if !ok { if !ok {
return ErrPendingTaskNotFound err := errors.New("pending task not found")
log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID)
return err
} }
// task finished, reset timeout // task finished, reset timeout
t.NumTimeout = 0 t.NumTimeout = 0
s.taskQueues.Done = append(s.taskQueues.Done, t) s.taskQueues.Done = append(s.taskQueues.Done, t)
delete(s.taskQueues.Pending, taskID) delete(s.taskQueues.Pending, taskID)
return s.snapshot()
log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID)
if len(s.taskQueues.Pending) == 0 && len(s.taskQueues.Todo) == 0 {
log.WithFields(s.logFields()).Infoln("No more todo and pending task, start a new pass.")
s.taskQueues.Todo = append(s.taskQueues.Todo, s.taskQueues.Done...)
s.taskQueues.Done = nil
}
err := s.snapshot()
if err != nil {
log.Errorln(err)
}
return err
} }
...@@ -9,5 +9,15 @@ project(cxx_go C Go) ...@@ -9,5 +9,15 @@ project(cxx_go C Go)
include(golang) include(golang)
include(flags) include(flags)
go_library(client STATIC) go_library(paddle_pserver_cclient STATIC)
if(PROJ_ROOT)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.h ${PROJ_ROOT}/paddle/trainer/
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.a ${PROJ_ROOT}/paddle/trainer/
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS paddle_pserver_cclient)
add_custom_target(paddle_pserver_cclient_lib ALL DEPENDS ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a)
endif(PROJ_ROOT)
add_subdirectory(test) add_subdirectory(test)
package main package main
/* /*
#include <stdlib.h>
#include <string.h> #include <string.h>
typedef enum { typedef enum {
PADDLE_ELEMENT_TYPE_INT32 = 0, PADDLE_ELEMENT_TYPE_INT32 = 0,
...@@ -19,39 +18,27 @@ typedef struct { ...@@ -19,39 +18,27 @@ typedef struct {
int content_len; int content_len;
} paddle_parameter, paddle_gradient; } paddle_parameter, paddle_gradient;
static inline void paddle_release_param(paddle_parameter* param) { typedef int paddle_pserver_client;
if (param != NULL) { #define PSERVER_ERROR -1
if (param->name != NULL) { #define PSERVER_OK 0
free(param->name);
}
if (param->content != NULL) {
free(param->content);
}
free(param);
}
}
typedef int client;
*/ */
import "C" import "C"
import ( import (
"log"
"strings" "strings"
"sync" "sync"
"unsafe" "unsafe"
"github.com/PaddlePaddle/Paddle/go/pserver" "github.com/PaddlePaddle/Paddle/go/pserver"
log "github.com/sirupsen/logrus"
) )
var nullPtr = unsafe.Pointer(uintptr(0)) var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex var mu sync.Mutex
var handleMap = make(map[C.client]*pserver.Client) var handleMap = make(map[C.paddle_pserver_client]*pserver.Client)
var curHandle C.client var curHandle C.paddle_pserver_client
func add(c *pserver.Client) C.client { func add(c *pserver.Client) C.paddle_pserver_client {
mu.Lock() mu.Lock()
defer mu.Unlock() defer mu.Unlock()
client := curHandle client := curHandle
...@@ -60,13 +47,13 @@ func add(c *pserver.Client) C.client { ...@@ -60,13 +47,13 @@ func add(c *pserver.Client) C.client {
return client return client
} }
func get(client C.client) *pserver.Client { func get(client C.paddle_pserver_client) *pserver.Client {
mu.Lock() mu.Lock()
defer mu.Unlock() defer mu.Unlock()
return handleMap[client] return handleMap[client]
} }
func remove(client C.client) *pserver.Client { func remove(client C.paddle_pserver_client) *pserver.Client {
mu.Lock() mu.Lock()
defer mu.Unlock() defer mu.Unlock()
h := handleMap[client] h := handleMap[client]
...@@ -100,7 +87,7 @@ func (l lister) List() []pserver.Server { ...@@ -100,7 +87,7 @@ func (l lister) List() []pserver.Server {
} }
//export paddle_new_pserver_client //export paddle_new_pserver_client
func paddle_new_pserver_client(addrs *C.char, selected int) C.client { func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_client {
a := C.GoString(addrs) a := C.GoString(addrs)
as := strings.Split(a, ",") as := strings.Split(a, ",")
servers := make([]pserver.Server, len(as)) servers := make([]pserver.Server, len(as))
...@@ -113,27 +100,27 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.client { ...@@ -113,27 +100,27 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.client {
} }
//export paddle_new_etcd_pserver_client //export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.client { func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.paddle_pserver_client {
// TODO(helin): fault tolerant pserver client using etcd. // TODO(helin): fault tolerant pserver client using etcd.
panic("not implemented.") panic("not implemented.")
} }
//export paddle_pserver_client_release //export paddle_pserver_client_release
func paddle_pserver_client_release(client C.client) { func paddle_pserver_client_release(client C.paddle_pserver_client) {
remove(client) remove(client)
} }
//export paddle_begin_init_params //export paddle_begin_init_params
func paddle_begin_init_params(client C.client) C.int { func paddle_begin_init_params(client C.paddle_pserver_client) C.int {
c := get(client) c := get(client)
if selected := c.BeginInitParams(); selected { if selected := c.BeginInitParams(); selected {
return 1 return 1
} }
return 0 return C.PSERVER_OK
} }
//export paddle_init_param //export paddle_init_param
func paddle_init_param(client C.client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int {
et := pserver.ElementType(param.element_type) et := pserver.ElementType(param.element_type)
name := C.GoString(param.name) name := C.GoString(param.name)
content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len))
...@@ -143,31 +130,41 @@ func paddle_init_param(client C.client, param C.paddle_parameter, param_config u ...@@ -143,31 +130,41 @@ func paddle_init_param(client C.client, param C.paddle_parameter, param_config u
} }
c := get(client) c := get(client)
err := c.InitParam(pc) err := c.InitParam(pc)
if err != nil { if err != nil {
log.Println(err) if err.Error() == pserver.AlreadyInitialized {
return -1 log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.\n", name)
return C.PSERVER_OK
}
log.Errorln(err)
return C.PSERVER_ERROR
} }
return 0 return C.PSERVER_OK
} }
//export paddle_finish_init_params //export paddle_finish_init_params
func paddle_finish_init_params(client C.client) C.int { func paddle_finish_init_params(client C.paddle_pserver_client) C.int {
c := get(client) c := get(client)
err := c.FinishInitParams() err := c.FinishInitParams()
if err != nil { if err != nil {
log.Println(err) if err.Error() == pserver.AlreadyInitialized {
return -1 log.Warningln("parameters already initialized, treat paddle_finish_init_params as sucessful.")
return C.PSERVER_OK
} }
return 0 log.Errorln(err)
return C.PSERVER_ERROR
}
return C.PSERVER_OK
} }
//export paddle_send_grads //export paddle_send_grads
func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C.int { func paddle_send_grads(client C.paddle_pserver_client, grads **C.paddle_gradient, total C.int) C.int {
var gs []pserver.Gradient var gs []pserver.Gradient
for i := 0; i < int(total); i++ { for i := 0; i < int(total); i++ {
grad := (*C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) grad := *(**C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads))))
et := pserver.ElementType(grad.element_type) et := pserver.ElementType(grad.element_type)
name := C.GoString(grad.name) name := C.GoString(grad.name)
content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len)) content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len))
...@@ -177,84 +174,82 @@ func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C ...@@ -177,84 +174,82 @@ func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C
c := get(client) c := get(client)
err := c.SendGrads(gs) err := c.SendGrads(gs)
if err != nil { if err != nil {
log.Println(err) log.Errorln(err)
return -1 return C.PSERVER_ERROR
} }
return 0 return C.PSERVER_OK
} }
//export paddle_get_params //export paddle_get_params
func paddle_get_params(client C.client, names **C.char, dst **C.paddle_parameter, total C.int) C.int { func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, total C.int) C.int {
var ns []string var ns []string
for i := 0; i < int(total); i++ { for i := 0; i < int(total); i++ {
name := *(**C.char)(unsafe.Pointer((uintptr(unsafe.Pointer(names)) + uintptr(i)*unsafe.Sizeof(*names)))) param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
ns = append(ns, C.GoString(name)) ns = append(ns, C.GoString(param.name))
} }
c := get(client) c := get(client)
ps, err := c.GetParams(ns) ps, err := c.GetParams(ns)
if err != nil { if err != nil {
log.Println(err) log.Errorln(err)
return -1 return C.PSERVER_ERROR
} }
for i := 0; i < int(total); i++ { if len(ps) != len(ns) {
if i >= len(ps) { pn := make([]string, len(ps))
break for i, p := range ps {
pn[i] = p.Name
}
log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", "))
return C.PSERVER_ERROR
}
for i := range ps {
if ns[i] != ps[i].Name {
pn := make([]string, len(ps))
for i, p := range ps {
pn[i] = p.Name
}
log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", "))
return C.PSERVER_ERROR
}
} }
for i := 0; i < int(total); i++ {
p := ps[i] p := ps[i]
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
nameReady := false
contentAllocated := false
if unsafe.Pointer(param) == nullPtr { if unsafe.Pointer(param) == nullPtr {
param = (*C.paddle_parameter)(C.calloc(1, C.size_t(unsafe.Sizeof(*param)))) log.Errorln("must pre-allocate parameter.")
} else { return C.PSERVER_ERROR
if unsafe.Pointer(param.name) != nullPtr {
if n := C.GoString(param.name); n != p.Name {
log.Println("Warning: the pre-allocated parameter name does not match the parameter name, it will be freed.", n, p.Name)
C.free(unsafe.Pointer(param.name))
} else {
nameReady = true
}
} }
if unsafe.Pointer(param.content) != nullPtr { if unsafe.Pointer(param.content) != nullPtr {
if int(param.content_len) == len(p.Content) { if int(param.content_len) != len(p.Content) {
contentAllocated = true log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content))
} else { return C.PSERVER_ERROR
log.Println("Warning: the pre-allocated content len does not match parameter content len, the pre-allocated content will be freed.", param.content_len, len(p.Content))
C.free(unsafe.Pointer(param.content))
}
} }
} }
if !nameReady {
param.name = C.CString(p.Name)
}
if !contentAllocated {
param.content = (*C.uchar)(C.malloc(C.size_t(len(p.Content))))
}
C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content)))
param.content_len = C.int(len(p.Content)) param.content_len = C.int(len(p.Content))
param.element_type = C.paddle_element_type(p.ElementType) param.element_type = C.paddle_element_type(p.ElementType)
} }
return 0 return C.PSERVER_OK
} }
//export paddle_save_model //export paddle_save_model
func paddle_save_model(client C.client, path *C.char) C.int { func paddle_save_model(client C.paddle_pserver_client, path *C.char) C.int {
p := C.GoString(path) p := C.GoString(path)
c := get(client) c := get(client)
err := c.Save(p) err := c.Save(p)
if err != nil { if err != nil {
log.Println(err) log.Errorln(err)
return -1 return C.PSERVER_ERROR
} }
return 0 return C.PSERVER_OK
} }
func main() {} // Required but ignored func main() {} // Required but ignored
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
include_directories(${CMAKE_BINARY_DIR})
add_executable(main main.c) add_executable(main main.c)
add_dependencies(main client) add_dependencies(main paddle_pserver_cclient)
add_executable(test_cclient test_cclient.c)
add_dependencies(test_cclient paddle_pserver_cclient)
if(APPLE) if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
else()
set(CMAKE_EXE_LINKER_FLAGS "-pthread")
endif() endif()
target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a)
if(PROJ_ROOT)
include_directories(${CMAKE_CURRENT_BINARY_DIR}/..)
target_link_libraries(main ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread)
target_link_libraries(test_cclient ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread)
else(PROJ_ROOT)
include_directories(${CMAKE_BINARY_DIR})
target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread)
target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread)
endif(PROJ_ROOT)
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include "libclient.h" #include "libpaddle_pserver_cclient.h"
void fail() { // TODO(helin): Fix: gtest using cmake is not working, using this
// TODO(helin): fix: gtest using cmake is not working, using this // hacky way for now.
// hacky way for now. #define fail() \
printf("test failed.\n"); fprintf(stderr, "info: %s:%d: ", __FILE__, __LINE__); \
exit(-1); exit(-1);
void sendGrads(paddle_pserver_client c) {
unsigned char grad_a[2000] = {2};
unsigned char grad_b[3000] = {3};
paddle_gradient grad1 = {
"param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000};
paddle_gradient grad2 = {
"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000};
paddle_gradient* grads[2] = {&grad1, &grad2};
if (paddle_send_grads(c, grads, 2)) {
fail();
}
}
void getParams(paddle_pserver_client c) {
paddle_parameter param_a;
paddle_parameter param_b;
char name_a[] = "param_a";
char name_b[] = "param_b";
// Must pre-allocate the prameter content before calling paddle_get_params.
unsigned char content_a[2000] = {};
unsigned char content_b[3000] = {};
param_a.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param_a.name = name_a;
param_a.content = content_a;
param_a.content_len = 2000;
param_b.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param_b.name = name_b;
param_b.content = content_b;
param_b.content_len = 3000;
paddle_parameter* params[2] = {&param_a, &param_b};
if (paddle_get_params(c, params, 2)) {
fail();
}
} }
int main() { int main() {
char addr[] = "localhost:3000"; char addr[] = "localhost:3000";
client c = paddle_new_pserver_client(addr, 1); paddle_pserver_client c = paddle_new_pserver_client(addr, 1);
retry: retry:
if (paddle_begin_init_params(c)) { if (paddle_begin_init_params(c)) {
paddle_parameter param; paddle_parameter param;
char name_a[] = "param_a"; char name_a[] = "param_a";
char name_b[] = "param_b"; char name_b[] = "param_b";
unsigned char content[] = {0x00, 0x11, 0x22}; unsigned char content_a[2000] = {1};
unsigned char content_b[3000] = {0};
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param.name = name_a; param.name = name_a;
param.content = content; param.content = content_a;
param.content_len = 3; param.content_len = 2000;
if (paddle_init_param(c, param, NULL, 0) != 0) { int error = paddle_init_param(c, param, NULL, 0);
if (error != 0) {
goto retry; goto retry;
} }
param.element_type = PADDLE_ELEMENT_TYPE_INT32;
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param.name = name_b; param.name = name_b;
param.content = content; param.content = content_b;
param.content_len = 3; param.content_len = 3000;
if (paddle_init_param(c, param, NULL, 0) != 0) { error = paddle_init_param(c, param, NULL, 0);
if (error != 0) {
goto retry; goto retry;
} }
if (paddle_finish_init_params(c) != 0) {
goto retry;
}
} else {
fail();
}
unsigned char content[] = {0x00, 0x11, 0x22};
paddle_gradient grads[2] = {
{"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3},
{"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}};
if (!paddle_send_grads(c, grads, 2)) { error = paddle_finish_init_params(c);
fail(); if (error != 0) {
goto retry;
} }
paddle_parameter* params[2] = {NULL, NULL};
char* names[] = {"param_a", "param_b"};
if (!paddle_get_params(c, names, params, 2)) {
fail();
} }
// get parameters again by reusing the allocated parameter buffers. int i;
if (!paddle_get_params(c, names, params, 2)) { for (i = 0; i < 100; i++) {
fail(); sendGrads(c);
getParams(c);
} }
paddle_release_param(params[0]); if (paddle_save_model(c, "/tmp/")) {
paddle_release_param(params[1]);
if (!paddle_save_model(c, "/tmp/")) {
fail(); fail();
} }
......
#include <stdio.h>
#include <stdlib.h>
#include "libpaddle_pserver_cclient.h"
typedef float real;
void fail() {
// TODO(helin): fix: gtest using cmake is not working, using this
// hacky way for now.
printf("test failed.\n");
exit(-1);
}
void print_parameter(paddle_gradient* param) {
if (param == NULL) {
printf("param is NULL!!\n");
} else {
printf("==== parameter ====\n");
printf("name: %s\n", param->name);
printf("content_len: %d\n", param->content_len);
printf("content_type: %d\n", param->element_type);
int i;
for (i = 0; i < param->content_len / (int)sizeof(real); ++i) {
printf("%f ", ((float*)param->content)[i]);
}
printf("\n\n");
}
}
int main() {
char addr[] = "localhost:3000";
paddle_pserver_client c = paddle_new_pserver_client(addr, 1);
char* names[] = {"param_a", "param_b"};
retry:
printf("init parameter to pserver:\n");
real param_content1[] = {0.1, 0.2, 0.3};
real param_content2[] = {0.4, 0.5, 0.6};
paddle_parameter** params =
(paddle_parameter**)malloc(sizeof(paddle_parameter*) * 2);
params[0] = (paddle_parameter*)malloc(sizeof(paddle_parameter));
params[0]->name = names[0];
params[0]->content = (unsigned char*)param_content1;
params[0]->content_len = 3 * sizeof(real);
params[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
params[1] = (paddle_parameter*)malloc(sizeof(paddle_parameter));
params[1]->name = names[1];
params[1]->content = (unsigned char*)param_content2;
params[1]->content_len = 3 * sizeof(real);
params[1]->element_type = PADDLE_ELEMENT_TYPE_INT32;
if (paddle_begin_init_params(c)) {
if (paddle_init_param(c, *params[0], NULL, 0) != 0) {
goto retry;
}
if (paddle_init_param(c, *params[1], NULL, 0) != 0) {
goto retry;
}
if (paddle_finish_init_params(c) != 0) {
goto retry;
}
} else {
fail();
}
printf("get inited parameters from pserver:\n");
// get parameters again by reusing the allocated parameter buffers.
if (paddle_get_params(c, params, 2) != 0) {
fail();
}
print_parameter(params[0]);
print_parameter(params[1]);
printf("send gradient to pserver:\n");
real gradient_content1[] = {0.01, 0.02, 0.03};
real gradinet_content2[] = {0.04, 0.05, 0.06};
paddle_gradient** grads =
(paddle_gradient**)malloc(sizeof(paddle_gradient*) * 2);
grads[0] = (paddle_gradient*)malloc(sizeof(paddle_gradient));
grads[0]->name = names[0];
grads[0]->content = (unsigned char*)gradient_content1;
grads[0]->content_len = 3 * sizeof(real);
grads[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
grads[1] = (paddle_gradient*)malloc(sizeof(paddle_gradient));
grads[1]->name = names[1];
grads[1]->content = (unsigned char*)gradinet_content2;
grads[1]->content_len = 3 * sizeof(real);
grads[1]->element_type = PADDLE_ELEMENT_TYPE_INT32;
printf("print gradient sent to pserver:\n");
print_parameter(grads[0]);
print_parameter(grads[1]);
if (paddle_send_grads(c, grads, 2) != 0) {
fail();
}
printf("get updated parameters from pserver:\n");
// get parameters again by reusing the allocated parameter buffers.
if (paddle_get_params(c, params, 2) != 0) {
fail();
}
print_parameter(params[0]);
print_parameter(params[1]);
if (paddle_save_model(c, "/tmp/") != 0) {
fail();
}
return 0;
}
...@@ -71,11 +71,6 @@ def main(): ...@@ -71,11 +71,6 @@ def main():
#predict = convolutional_neural_network(images) #predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label) cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum( optimizer = paddle.optimizer.Momentum(
...@@ -85,7 +80,9 @@ def main(): ...@@ -85,7 +80,9 @@ def main():
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer,
is_local=False,
pserver_spec="localhost:3000")
lists = [] lists = []
...@@ -95,9 +92,6 @@ def main(): ...@@ -95,9 +92,6 @@ def main():
print "Pass %d, Batch %d, Cost %f, %s" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics) event.pass_id, event.batch_id, event.cost, event.metrics)
with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass): elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch( result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128)) paddle.dataset.mnist.test(), batch_size=128))
......
...@@ -24,7 +24,9 @@ def main(): ...@@ -24,7 +24,9 @@ def main():
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer,
is_local=False,
pserver_spec="localhost:3000")
# event_handler to print training and testing info # event_handler to print training and testing info
def event_handler(event): def event_handler(event):
......
...@@ -2,11 +2,11 @@ package pserver ...@@ -2,11 +2,11 @@ package pserver
import ( import (
"hash/fnv" "hash/fnv"
"log"
"sort" "sort"
"time" "time"
"github.com/PaddlePaddle/Paddle/go/pserver/internal/connection" "github.com/PaddlePaddle/Paddle/go/connection"
log "github.com/sirupsen/logrus"
) )
// TODO(helin): add RPC call retry logic // TODO(helin): add RPC call retry logic
...@@ -47,7 +47,7 @@ func NewClient(l Lister, pserverNum int, sel Selector) *Client { ...@@ -47,7 +47,7 @@ func NewClient(l Lister, pserverNum int, sel Selector) *Client {
// monitorPservers monitors pserver addresses, and updates connection // monitorPservers monitors pserver addresses, and updates connection
// when the address changes. // when the address changes.
func (c *Client) monitorPservers(l Lister, pserverNum int) { func (c *Client) monitorPservers(l Lister, pserverNum int) {
knownServers := make([]Server, pserverNum) lastServers := make([]Server, pserverNum)
ticker := time.NewTicker(10 * time.Second) ticker := time.NewTicker(10 * time.Second)
monitor := func() { monitor := func() {
curServers := make([]Server, pserverNum) curServers := make([]Server, pserverNum)
...@@ -56,25 +56,37 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { ...@@ -56,25 +56,37 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
curServers[l.Index] = l curServers[l.Index] = l
} }
for i := range knownServers { for i := range lastServers {
if knownServers[i].Addr != curServers[i].Addr { if lastServers[i].Addr == curServers[i].Addr {
continue
}
if curServers[i].Addr == "" {
err := c.pservers[i].Close()
if err != nil {
log.Errorln(err)
}
continue
}
err := c.pservers[i].Connect(curServers[i].Addr) err := c.pservers[i].Connect(curServers[i].Addr)
if err != nil { if err != nil {
log.Println(err) log.Errorln(err)
// connect to addr failed, set // connect to addr failed, set
// to last known addr in order // to last known addr in order
// to retry next time. // to retry next time.
curServers[i].Addr = knownServers[i].Addr curServers[i].Addr = lastServers[i].Addr
}
} }
} }
knownServers = curServers lastServers = curServers
} }
monitor() monitor()
for _ = range ticker.C { for range ticker.C {
monitor() monitor()
} }
} }
...@@ -93,16 +105,14 @@ func (c *Client) BeginInitParams() bool { ...@@ -93,16 +105,14 @@ func (c *Client) BeginInitParams() bool {
// InitParam initializes the parameter on parameter servers. // InitParam initializes the parameter on parameter servers.
func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error { func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error {
var dummy int return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, nil)
return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, &dummy)
} }
// FinishInitParams tells parameter servers client has sent all // FinishInitParams tells parameter servers client has sent all
// parameters to parameter servers as initialization. // parameters to parameter servers as initialization.
func (c *Client) FinishInitParams() error { func (c *Client) FinishInitParams() error {
for _, p := range c.pservers { for _, p := range c.pservers {
var dummy int err := p.Call("Service.FinishInitParams", 0, nil)
err := p.Call("Service.FinishInitParams", dummy, &dummy)
if err != nil { if err != nil {
return err return err
} }
...@@ -116,8 +126,7 @@ func (c *Client) SendGrads(grads []Gradient) error { ...@@ -116,8 +126,7 @@ func (c *Client) SendGrads(grads []Gradient) error {
errCh := make(chan error, len(grads)) errCh := make(chan error, len(grads))
for _, g := range grads { for _, g := range grads {
go func(g Gradient) { go func(g Gradient) {
var dummy int err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, nil)
err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, &dummy)
errCh <- err errCh <- err
}(g) }(g)
} }
...@@ -196,8 +205,7 @@ func (c *Client) Save(path string) error { ...@@ -196,8 +205,7 @@ func (c *Client) Save(path string) error {
errCh := make(chan error, len(c.pservers)) errCh := make(chan error, len(c.pservers))
for _, p := range c.pservers { for _, p := range c.pservers {
var dummy int err := p.Call("Service.Save", path, nil)
err := p.Call("Service.Save", path, &dummy)
errCh <- err errCh <- err
} }
......
...@@ -117,7 +117,7 @@ func TestClientFull(t *testing.T) { ...@@ -117,7 +117,7 @@ func TestClientFull(t *testing.T) {
for i := range params { for i := range params {
if names[i] != params[i].Name { if names[i] != params[i].Name {
t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i]) t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i].Name)
} }
} }
} }
...@@ -32,7 +32,13 @@ int update_SGD(void* optimizer, ...@@ -32,7 +32,13 @@ int update_SGD(void* optimizer,
const void* gradient, const void* gradient,
int num_bytes) { int num_bytes) {
SGD_optimizer* o = (SGD_optimizer*)optimizer; SGD_optimizer* o = (SGD_optimizer*)optimizer;
// TODO float* parameter = (float*)buffer;
float* grad = (float*)gradient;
int i;
for (i = 0; i < num_bytes / sizeof(float); ++i) {
parameter[i] -= o->learning_rate * grad[i];
}
return 0; return 0;
} }
......
...@@ -9,8 +9,10 @@ import ( ...@@ -9,8 +9,10 @@ import (
// ElementType is the type of elements of a Parameter. // ElementType is the type of elements of a Parameter.
type ElementType int type ElementType int
var ErrAlreadyInitialized = errors.New("pserver already initialized") const (
var ErrUninitialized = errors.New("pserver not fully initialized") AlreadyInitialized = "pserver already initialized"
Uninitialized = "pserver not fully initialized"
)
// Supported element types // Supported element types
const ( const (
...@@ -49,7 +51,7 @@ type Service struct { ...@@ -49,7 +51,7 @@ type Service struct {
// NewService creates a new service. // NewService creates a new service.
func NewService() *Service { func NewService() *Service {
s := &Service{opt: newOptimizer(sgd, 0.01)} s := &Service{opt: newOptimizer(sgd, 0.005)}
s.paramMap = make(map[string]Parameter) s.paramMap = make(map[string]Parameter)
s.initialized = make(chan struct{}) s.initialized = make(chan struct{})
return s return s
...@@ -59,7 +61,7 @@ func NewService() *Service { ...@@ -59,7 +61,7 @@ func NewService() *Service {
func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error { func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error {
select { select {
case <-s.initialized: case <-s.initialized:
return ErrAlreadyInitialized return errors.New(AlreadyInitialized)
default: default:
} }
...@@ -80,7 +82,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er ...@@ -80,7 +82,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er
func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error { func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
select { select {
case <-s.initialized: case <-s.initialized:
return ErrAlreadyInitialized return errors.New(AlreadyInitialized)
default: default:
} }
...@@ -94,7 +96,7 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error { ...@@ -94,7 +96,7 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error {
select { select {
case <-s.initialized: case <-s.initialized:
default: default:
return ErrUninitialized return errors.New(Uninitialized)
} }
s.mu.Lock() s.mu.Lock()
......
...@@ -15,8 +15,7 @@ func TestFull(t *testing.T) { ...@@ -15,8 +15,7 @@ func TestFull(t *testing.T) {
p.Name = "param_a" p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32 p.ElementType = pserver.Int32
var dummy int err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil)
err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
...@@ -25,12 +24,12 @@ func TestFull(t *testing.T) { ...@@ -25,12 +24,12 @@ func TestFull(t *testing.T) {
p1.Name = "param_b" p1.Name = "param_b"
p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
p1.ElementType = pserver.Float32 p1.ElementType = pserver.Float32
err = s.InitParam(pserver.ParameterWithConfig{p1, nil}, &dummy) err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
err = s.FinishInitParams(0, &dummy) err = s.FinishInitParams(0, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
...@@ -46,11 +45,11 @@ func TestFull(t *testing.T) { ...@@ -46,11 +45,11 @@ func TestFull(t *testing.T) {
} }
g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) g1, g2 := pserver.Gradient(p1), pserver.Gradient(p)
err = s.SendGrad(g1, &dummy) err = s.SendGrad(g1, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
err = s.SendGrad(g2, &dummy) err = s.SendGrad(g2, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
...@@ -74,23 +73,21 @@ func TestFull(t *testing.T) { ...@@ -74,23 +73,21 @@ func TestFull(t *testing.T) {
func TestMultipleInit(t *testing.T) { func TestMultipleInit(t *testing.T) {
s := pserver.NewService() s := pserver.NewService()
var dummy int err := s.FinishInitParams(0, nil)
err := s.FinishInitParams(0, &dummy)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
err = s.FinishInitParams(0, &dummy) err = s.FinishInitParams(0, nil)
if err != pserver.ErrAlreadyInitialized { if err.Error() != pserver.AlreadyInitialized {
t.FailNow() t.FailNow()
} }
} }
func TestUninitialized(t *testing.T) { func TestUninitialized(t *testing.T) {
s := pserver.NewService() s := pserver.NewService()
var dummy int err := s.SendGrad(pserver.Gradient{}, nil)
err := s.SendGrad(pserver.Gradient{}, &dummy) if err.Error() != pserver.Uninitialized {
if err != pserver.ErrUninitialized {
t.FailNow() t.FailNow()
} }
} }
...@@ -98,13 +95,14 @@ func TestUninitialized(t *testing.T) { ...@@ -98,13 +95,14 @@ func TestUninitialized(t *testing.T) {
func TestBlockUntilInitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) {
s := pserver.NewService() s := pserver.NewService()
ch := make(chan struct{}, 2) ch := make(chan struct{}, 2)
errCh := make(chan error, 2)
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(1) wg.Add(1)
go func() { go func() {
var param pserver.Parameter var param pserver.Parameter
err := s.GetParam("param_a", &param) err := s.GetParam("param_a", &param)
if err != nil { if err != nil {
t.FailNow() errCh <- err
} }
wg.Done() wg.Done()
ch <- struct{}{} ch <- struct{}{}
...@@ -112,10 +110,9 @@ func TestBlockUntilInitialized(t *testing.T) { ...@@ -112,10 +110,9 @@ func TestBlockUntilInitialized(t *testing.T) {
wg.Add(1) wg.Add(1)
go func() { go func() {
var dummy int err := s.Save("", nil)
err := s.Save("", &dummy)
if err != nil { if err != nil {
t.FailNow() errCh <- err
} }
wg.Done() wg.Done()
ch <- struct{}{} ch <- struct{}{}
...@@ -127,6 +124,8 @@ func TestBlockUntilInitialized(t *testing.T) { ...@@ -127,6 +124,8 @@ func TestBlockUntilInitialized(t *testing.T) {
case <-ch: case <-ch:
// some function returned before initialization is completed. // some function returned before initialization is completed.
t.FailNow() t.FailNow()
case <-errCh:
t.FailNow()
default: default:
} }
...@@ -134,13 +133,12 @@ func TestBlockUntilInitialized(t *testing.T) { ...@@ -134,13 +133,12 @@ func TestBlockUntilInitialized(t *testing.T) {
p.Name = "param_a" p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32 p.ElementType = pserver.Int32
var dummy int err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil)
err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
err = s.FinishInitParams(0, &dummy) err = s.FinishInitParams(0, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
......
...@@ -9,6 +9,7 @@ add_subdirectory(pserver) ...@@ -9,6 +9,7 @@ add_subdirectory(pserver)
add_subdirectory(trainer) add_subdirectory(trainer)
add_subdirectory(scripts) add_subdirectory(scripts)
add_subdirectory(optimizer) add_subdirectory(optimizer)
add_subdirectory(strings)
# Do not build go directory until go cmake is working smoothly. # Do not build go directory until go cmake is working smoothly.
# if(CMAKE_Go_COMPILER) # if(CMAKE_Go_COMPILER)
......
...@@ -16,7 +16,7 @@ set(API_HEADER ...@@ -16,7 +16,7 @@ set(API_HEADER
Internal.h) Internal.h)
add_library(paddle_api STATIC ${API_SOURCES}) add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api gen_proto_cpp) add_dependencies(paddle_api gen_proto_cpp paddle_pserver_cclient_lib)
INCLUDE(${SWIG_USE_FILE}) INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle)
...@@ -41,10 +41,11 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS ...@@ -41,10 +41,11 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_network paddle_network
paddle_proto paddle_proto
${external_project_dependencies} ${external_project_dependencies}
${RDMA_LIBS}
) )
IF(APPLE) IF(APPLE)
SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load") SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load -framework CoreFoundation -framework Security")
ELSE(APPLE) ELSE(APPLE)
SET(START_GROUP "-Xlinker -start-group") SET(START_GROUP "-Xlinker -start-group")
SET(END_GROUP "-Xlinker -end-group") SET(END_GROUP "-Xlinker -end-group")
...@@ -73,6 +74,7 @@ SWIG_LINK_LIBRARIES(swig_paddle ...@@ -73,6 +74,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
${CMAKE_DL_LIBS} ${CMAKE_DL_LIBS}
${EXTERNAL_LIBS} ${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT} ${CMAKE_THREAD_LIBS_INIT}
${RDMA_LD_FLAGS}
${START_END} ${START_END}
) )
......
...@@ -179,6 +179,7 @@ namespace std { ...@@ -179,6 +179,7 @@ namespace std {
%newobject ParameterOptimizer::needSpecialTraversal; %newobject ParameterOptimizer::needSpecialTraversal;
%newobject ParameterUpdater::createLocalUpdater; %newobject ParameterUpdater::createLocalUpdater;
%newobject ParameterUpdater::createRemoteUpdater; %newobject ParameterUpdater::createRemoteUpdater;
%newobject ParameterUpdater::createNewRemoteUpdater;
%feature("director") UpdateCallback; %feature("director") UpdateCallback;
%feature("autodoc", 1); // To generate method stub, for code hint in ide %feature("autodoc", 1); // To generate method stub, for code hint in ide
......
...@@ -841,6 +841,8 @@ public: ...@@ -841,6 +841,8 @@ public:
static ParameterUpdater* createRemoteUpdater(OptimizationConfig* config, static ParameterUpdater* createRemoteUpdater(OptimizationConfig* config,
int passCount, int passCount,
bool useSparseUpdater); bool useSparseUpdater);
static ParameterUpdater* createNewRemoteUpdater(
OptimizationConfig* config, const std::string pserverSpec);
~ParameterUpdater(); ~ParameterUpdater();
/** /**
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
#include "paddle/trainer/NewRemoteParameterUpdater.h"
#include "paddle/trainer/RemoteParameterUpdater.h" #include "paddle/trainer/RemoteParameterUpdater.h"
#include "paddle/trainer/ThreadParameterUpdater.h" #include "paddle/trainer/ThreadParameterUpdater.h"
...@@ -28,6 +29,14 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater( ...@@ -28,6 +29,14 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater(
return updater; return updater;
} }
ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
OptimizationConfig *config, const std::string pserverSpec) {
auto updater = new ParameterUpdater();
updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
config->m->getConfig(), pserverSpec));
return updater;
}
ParameterUpdater *ParameterUpdater::createRemoteUpdater( ParameterUpdater *ParameterUpdater::createRemoteUpdater(
OptimizationConfig *config, int passCount, bool useSparseUpdater) { OptimizationConfig *config, int passCount, bool useSparseUpdater) {
auto updater = new ParameterUpdater(); auto updater = new ParameterUpdater();
......
...@@ -17,10 +17,9 @@ limitations under the License. */ ...@@ -17,10 +17,9 @@ limitations under the License. */
#include <stdio.h> #include <stdio.h>
#include "hl_base.h" #include "hl_base.h"
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include "hl_neon_matrix_kernel.cuh" #ifndef __CUDA_ARCH__
#else #include "hl_cpu_matrix_kernel_detail.cuh"
#include "hl_sse_matrix_kernel.cuh"
#endif #endif
/** /**
...@@ -114,35 +113,6 @@ void hl_cpu_apply_quaternary_op(Op op, ...@@ -114,35 +113,6 @@ void hl_cpu_apply_quaternary_op(Op op,
} }
} }
template <class Agg, class Op, class Saver>
void hl_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda) {
for (int i = 0; i < dimM; i++) {
real tmp = agg.init();
for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda,
real *B, int ldb) {
for (int i = 0; i < dimM; i++) {
real tmp = agg.init();
for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
template <class Agg, class Op, class Saver> template <class Agg, class Op, class Saver>
void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv, void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN, int dimM, int dimN,
......
...@@ -13,26 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,26 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_SSE_MATRIX_KERNEL_CUH_ #ifndef HL_MATRIX_KERNEL_DETAIL_CUH_
#define HL_SSE_MATRIX_KERNEL_CUH_ #define HL_MATRIX_KERNEL_DETAIL_CUH_
#include "hl_matrix_type.cuh" #include "hl_matrix_type.cuh"
#define VECTOR_SIZE 16
#ifndef PADDLE_TYPE_DOUBLE
/* number of float in vector */
#define VECTOR_LEN 4
#define VECTOR_SET _mm_set_ps1
#else
#if defined(__APPLE__) || defined(__OSX__)
#define _mm_set_pd1 _mm_set1_pd
#endif
/* number of double in vector */
#define VECTOR_LEN 2
#define VECTOR_SET _mm_set_pd1
#endif
inline bool hl_check_align(size_t size) { inline bool hl_check_align(size_t size) {
return !(size & (VECTOR_SIZE - 1)); return !(size & (VECTOR_SIZE - 1));
} }
...@@ -41,27 +26,63 @@ inline bool hl_check_align(void *ptr) { ...@@ -41,27 +26,63 @@ inline bool hl_check_align(void *ptr) {
return hl_check_align(reinterpret_cast<size_t>(ptr)); return hl_check_align(reinterpret_cast<size_t>(ptr));
} }
#ifndef PADDLE_TYPE_DOUBLE template <class Agg, class Op, class Saver>
template <class Agg> void hl_matrix_row_op(Agg agg, Op op, Saver sv,
inline real hl_agg_op(Agg agg, vecType mm) { int dimM, int dimN,
__m128 lo = _mm_unpacklo_ps(mm, mm); real *dst, int ld,
__m128 hi = _mm_unpackhi_ps(mm, mm); real *A, int lda) {
__m128 tmp1 = agg.vecOp(lo, hi); for (int i = 0; i < dimM; i++) {
__m128 tmp2 = _mm_movehl_ps(tmp1, tmp1); real tmp = agg.init();
__m128 ret = agg.vecOp(tmp1, tmp2); for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
return _mm_cvtss_f32(ret); template <class Agg, class Op, class Saver>
void hl_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda,
real *B, int ldb) {
for (int i = 0; i < dimM; i++) {
real tmp = agg.init();
for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[j] = sv(dst[j], tmp);
}
} }
#else
template <class Agg> template <class Agg, class Op, class Saver>
inline real hl_agg_op(Agg agg, vecType mm) { void hl_matrix_column_op(Agg agg, Op op, Saver sv,
__m128d lo = _mm_unpacklo_pd(mm, mm); int dimM, int dimN,
__m128d hi = _mm_unpackhi_pd(mm, mm); real *dst,
__m128d ret = agg.vecOp(lo, hi); real *A, int lda,
real *B, int ldb) {
return _mm_cvtsd_f64(ret); for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[j] = sv(dst[j], tmp);
}
} }
#endif
template <class Agg, class Op, class Saver> template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
...@@ -118,35 +139,6 @@ void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, ...@@ -118,35 +139,6 @@ void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
} }
} }
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
/* /*
* MaxRow greater than or equal dimN * MaxRow greater than or equal dimN
* dimN is multiples of VECTOR_LEN * dimN is multiples of VECTOR_LEN
...@@ -315,4 +307,4 @@ void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, ...@@ -315,4 +307,4 @@ void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
} }
} }
#endif /* HL_SSE_MATRIX_KERNEL_CUH_ */ #endif /* HL_MATRIX_KERNEL_DETAIL_CUH_ */
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CPU_SCALAR_CUH_
#define HL_CPU_SCALAR_CUH_
#define VECTOR_SIMD false
#define VECTOR_SET hl_vec_set
#ifndef PADDLE_TYPE_DOUBLE
/* size of float */
#define VECTOR_SIZE 4
#else
/* size of double */
#define VECTOR_SIZE 8
#endif
typedef real vecType;
/* Consider a real as a vector */
#define VECTOR_LEN 1
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
return mm;
}
INLINE real hl_vec_set(const real r) {
return r;
}
INLINE real hl_vec_classification_error(const real a,
const real b,
const real p,
const real r) {
return ((a > p) == (b > p)) ? 0.0f : 1.0f;
}
#endif // HL_CPU_SCALAR_CUH_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CPU_SIMD_NEON_CUH_
#define HL_CPU_SIMD_NEON_CUH_
#include <arm_neon.h>
#define VECTOR_SIMD true
#define VECTOR_SIZE 16
#define VECTOR_SET hl_vec_set
#ifndef PADDLE_TYPE_DOUBLE
typedef float32x4_t vecType;
/* number of float in vector */
#define VECTOR_LEN 4
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
float32x4_t rev = vrev64q_f32(mm);
float32x4_t tmp1 = agg.vecOp(rev, rev);
float32x2_t lo = vget_high_f32(rev);
float32x2_t hi = vget_low_f32(rev);
float32x4_t tmp2 = vcombine_f32(hi, lo);
float32x4_t ret = agg.vecOp(tmp1, tmp2);
return vgetq_lane_f32(ret, 0);
}
inline float32x4_t hl_vec_set(const real f) {
return vdupq_n_f32(f);
}
inline float32x4_t hl_vec_classification_error(const float32x4_t a,
const float32x4_t b,
const float32x4_t p,
const float32x4_t r) {
uint32x4_t tmp1 = vcgtq_f32(a, p);
uint32x4_t tmp2 = vcgtq_f32(b, p);
uint32x4_t tmp3 = veorq_u32(tmp1, tmp2);
return vcvtq_f32_u32(vandq_u32(tmp3, vcvtq_u32_f32(r)));
}
#else
#ifdef __aarch64__
typedef float64x2_t vecType;
/* number of float in vector */
#define VECTOR_LEN 2
#define VECTOR_SET vdupq_n_f64
#error To be implemented
#else
#error NEON instructions does not support double precision
#endif // __aarch64__
#endif
#endif // HL_CPU_SIMD_NEON_CUH_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CPU_SIMD_SSE_CUH_
#define HL_CPU_SIMD_SSE_CUH_
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#define VECTOR_SIMD true
#define VECTOR_SIZE 16
#define VECTOR_SET hl_vec_set
#ifndef PADDLE_TYPE_DOUBLE
typedef __m128 vecType;
/* number of float in vector */
#define VECTOR_LEN 4
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
__m128 lo = _mm_unpacklo_ps(mm, mm);
__m128 hi = _mm_unpackhi_ps(mm, mm);
__m128 tmp1 = agg.vecOp(lo, hi);
__m128 tmp2 = _mm_movehl_ps(tmp1, tmp1);
__m128 ret = agg.vecOp(tmp1, tmp2);
return _mm_cvtss_f32(ret);
}
inline __m128 hl_vec_set(const real f) {
return _mm_set_ps1(f);
}
inline __m128 hl_vec_classification_error(const __m128 a,
const __m128 b,
const __m128 p,
const __m128 r) {
__m128 tmp1 = _mm_cmpgt_ps(a, p);
__m128 tmp2 = _mm_cmpgt_ps(b, p);
__m128 tmp3 = _mm_xor_ps(tmp1, tmp2);
return _mm_and_ps(tmp3, r);
}
#else
typedef __m128d vecType;
/* number of double in vector */
#define VECTOR_LEN 2
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
__m128d lo = _mm_unpacklo_pd(mm, mm);
__m128d hi = _mm_unpackhi_pd(mm, mm);
__m128d ret = agg.vecOp(lo, hi);
return _mm_cvtsd_f64(ret);
}
inline __m128d hl_vec_set(const real d) {
#if defined(__APPLE__) || defined(__OSX__)
return _mm_set1_pd(d);
#else
return _mm_set_pd1(d);
#endif
}
inline __m128d hl_vec_classification_error(const __m128d a,
const __m128d b,
const __m128d p,
const __m128d r) {
__m128d tmp1 = _mm_cmpgt_pd(a, p);
__m128d tmp2 = _mm_cmpgt_pd(b, p);
__m128d tmp3 = _mm_xor_pd(tmp1, tmp2);
return _mm_and_pd(tmp3, r);
}
#endif
#endif // HL_CPU_SIMD_SSE_CUH_
...@@ -18,26 +18,6 @@ limitations under the License. */ ...@@ -18,26 +18,6 @@ limitations under the License. */
#include "hl_matrix_type.cuh" #include "hl_matrix_type.cuh"
#ifdef __CUDA_ARCH__
/**
* CUDA kernel inline function
*/
#define INLINE __device__ inline
#else
/**
* CPP inline function
*/
#define INLINE inline
#endif
#ifndef PADDLE_TYPE_DOUBLE
#define DEVICE_FMAX fmaxf
#define DEVICE_FMIN fminf
#else
#define DEVICE_FMAX fmax
#define DEVICE_FMIN fmin
#endif
class BaseOp { class BaseOp {
public: public:
static const bool sse = false; static const bool sse = false;
...@@ -66,10 +46,8 @@ typedef BaseOp SSESquaredDiff; ...@@ -66,10 +46,8 @@ typedef BaseOp SSESquaredDiff;
typedef BaseOp SSEFirst; typedef BaseOp SSEFirst;
typedef BaseOp SSESecond; typedef BaseOp SSESecond;
typedef BaseOp SSEClassificationError; typedef BaseOp SSEClassificationError;
#elif defined(__ARM__NEON__) || defined(__ARM_NEON)
#include "hl_matrix_base_neon.cuh"
#else #else
#include "hl_matrix_base_sse.cuh" #include "hl_matrix_base_detail.cuh"
#endif #endif
namespace aggregate { namespace aggregate {
......
...@@ -12,32 +12,34 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,32 +12,34 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_MATRIX_BASE_DETAIL_CUH_
#define HL_MATRIX_BASE_DETAIL_CUH_
#ifndef HL_MATRIX_BASE_NEON_CUH_ #include "hl_matrix_type.cuh"
#define HL_MATRIX_BASE_NEON_CUH_ #include "hl_tensor_ops.h"
namespace aggregate { namespace aggregate {
class SSESum { class SSESum {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return vaddq_f32(a, b); return hppl::binary::add<vecType>()(a, b);
} }
}; };
class SSEMax { class SSEMax {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return vmaxq_f32(a, b); return hppl::binary::max<vecType>()(a, b);
} }
}; };
class SSEMin { class SSEMin {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return vminq_f32(a, b); return hppl::binary::min<vecType>()(a, b);
} }
}; };
} // namespace aggregate } // namespace aggregate
...@@ -46,8 +48,8 @@ namespace base { ...@@ -46,8 +48,8 @@ namespace base {
namespace unary { namespace unary {
class SSEIdentity { class SSEIdentity {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a) const { INLINE vecType vecOp(const vecType a) const {
return a; return a;
} }
}; };
...@@ -56,106 +58,96 @@ public: ...@@ -56,106 +58,96 @@ public:
namespace binary { namespace binary {
class SSEAdd { class SSEAdd {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return vaddq_f32(a, b); return hppl::binary::add<vecType>()(a, b);
} }
}; };
class SSEAdd2 { class SSEAdd2 {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
const real p1; const real p1;
const real p2; const real p2;
float32x4_t mp1; vecType mp1;
float32x4_t mp2; vecType mp2;
public: public:
SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) { SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) {
mp1 = vdupq_n_f32(p1); mp1 = hl_vec_set(p1);
mp2 = vdupq_n_f32(p2); mp2 = hl_vec_set(p2);
} }
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
float32x4_t tmp1, tmp2; return hppl::binary::add_scale<vecType>(mp1, mp2)(a, b);
tmp1 = vmulq_f32(mp1, a);
tmp2 = vmulq_f32(mp2, b);
return vaddq_f32(tmp1, tmp2);
} }
}; };
class SSESub { class SSESub {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return vsubq_f32(a, b); return hppl::binary::sub<vecType>()(a, b);
} }
}; };
class SSEMul { class SSEMul {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return vmulq_f32(a, b); return hppl::binary::mul<vecType>()(a, b);
} }
}; };
class SSEDiv { class SSEDiv {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
float32x4_t tmp; return hppl::binary::div<vecType>()(a, b);
tmp = vrecpeq_f32(b);
return vmulq_f32(a, tmp);
} }
}; };
class SSESquaredDiff { class SSESquaredDiff {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
float32x4_t tmp; vecType tmp = hppl::binary::sub<vecType>()(a, b);
tmp = vsubq_f32(a, b); return hppl::binary::mul<vecType>()(tmp, tmp);
return vmulq_f32(tmp, tmp);
} }
}; };
class SSEFirst { class SSEFirst {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return a; return a;
} }
}; };
class SSESecond { class SSESecond {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { INLINE vecType vecOp(const vecType a, const vecType b) const {
return b; return b;
} }
}; };
class SSEClassificationError { class SSEClassificationError {
public: public:
static const bool sse = true; static const bool sse = VECTOR_SIMD;
const real p; const real p;
float32x4_t mp; vecType mp;
uint32x4_t result; vecType result;
public: public:
explicit SSEClassificationError(const real s) : p(s) { explicit SSEClassificationError(const real s) : p(s) {
mp = vdupq_n_f32(p); mp = hl_vec_set(p);
result = vdupq_n_u32(1); result = hl_vec_set(1.0f);
} }
// TODO: to be check INLINE vecType vecOp(const vecType a, const vecType b) const {
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const { return hl_vec_classification_error(a, b, mp, result);
uint32x4_t tmp1 = vcgtq_f32(a, mp);
uint32x4_t tmp2 = vcgtq_f32(b, mp);
uint32x4_t tmp3 = veorq_u32(tmp1, tmp2);
return vcvtq_f32_u32(vandq_u32(tmp3, result));
} }
}; };
} // namespace binary } // namespace binary
} // namespace base } // namespace base
#endif /* HL_MATRIX_BASE_NEON_CUH_ */ #endif /* HL_MATRIX_BASE_DETAIL_CUH_ */
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_MATRIX_BASE_SSE_CUH_
#define HL_MATRIX_BASE_SSE_CUH_
namespace aggregate {
class SSESum {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_add_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_add_pd(a, b);
}
};
class SSEMax {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_max_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_max_pd(a, b);
}
};
class SSEMin {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_min_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_min_pd(a, b);
}
};
} // namespace aggregate
namespace base {
namespace unary {
class SSEIdentity {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a) const {
return a;
}
INLINE __m128d vecOp(const __m128d a) const {
return a;
}
};
} // namespace unary
namespace binary {
class SSEAdd {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_add_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_add_pd(a, b);
}
};
class SSEAdd2 {
public:
static const bool sse = true;
const real p1;
const real p2;
union {__m128 f; __m128d d;} mp1;
union {__m128 f; __m128d d;} mp2;
public:
SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) {
if (sizeof(real) == sizeof(float)) {
mp1.f = _mm_set1_ps(p1);
mp2.f = _mm_set1_ps(p2);
} else {
mp1.d = _mm_set1_pd(p1);
mp2.d = _mm_set1_pd(p2);
}
}
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
__m128 tmp1, tmp2;
tmp1 = _mm_mul_ps(mp1.f, a);
tmp2 = _mm_mul_ps(mp2.f, b);
return _mm_add_ps(tmp1, tmp2);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
__m128d tmp1, tmp2;
tmp1 = _mm_mul_pd(mp1.d, a);
tmp2 = _mm_mul_pd(mp2.d, b);
return _mm_add_pd(tmp1, tmp2);
}
};
class SSESub {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_sub_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_sub_pd(a, b);
}
};
class SSEMul {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_mul_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_mul_pd(a, b);
}
};
class SSEDiv {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_div_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_div_pd(a, b);
}
};
class SSESquaredDiff {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_mul_ps(_mm_sub_ps(a, b), _mm_sub_ps(a, b));
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_mul_pd(_mm_sub_pd(a, b), _mm_sub_pd(a, b));
}
};
class SSEFirst {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return a;
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return a;
}
};
class SSESecond {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return b;
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return b;
}
};
class SSEClassificationError {
public:
static const bool sse = true;
const real p;
union {__m128 f; __m128d d;} mp;
union {__m128 f; __m128d d;} result;
public:
explicit SSEClassificationError(const real s) : p(s) {
if (sizeof(real) == sizeof(float)) {
mp.f = _mm_set1_ps(p);
result.f = _mm_set1_ps(1.0f);
} else {
mp.d = _mm_set1_pd(p);
result.d = _mm_set1_pd(1.0);
}
}
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
__m128 tmp1 = _mm_cmpgt_ps(a, mp.f);
__m128 tmp2 = _mm_cmpgt_ps(b, mp.f);
__m128 tmp3 = _mm_xor_ps(tmp1, tmp2);
return _mm_and_ps(tmp3, result.f);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
__m128d tmp1 = _mm_cmpgt_pd(a, mp.d);
__m128d tmp2 = _mm_cmpgt_pd(b, mp.d);
__m128d tmp3 = _mm_xor_pd(tmp1, tmp2);
return _mm_and_pd(tmp3, result.d);
}
};
} // namespace binary
} // namespace base
#endif /* HL_MATRIX_BASE_SSE_CUH_ */
...@@ -17,35 +17,35 @@ limitations under the License. */ ...@@ -17,35 +17,35 @@ limitations under the License. */
#include "hl_base.h" #include "hl_base.h"
#if defined(__CUDA_ARCH__) #ifdef __CUDA_ARCH__
/**
* CUDA kernel inline function
*/
#define INLINE __device__ inline
#else
/**
* CPP inline function
*/
#define INLINE inline
#endif
#ifdef __CUDA_ARCH__
#include <vector_types.h> #include <vector_types.h>
#ifndef PADDLE_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
typedef float4 vecType; typedef float4 vecType;
#else #else
typedef double2 vecType; typedef double2 vecType;
#endif #endif
#elif (defined __ARM_NEON) || (defined __ARM_NEON__) #elif defined(__SSE3__)
#include <arm_neon.h> #include "hl_cpu_simd_sse.cuh"
#ifndef PADDLE_TYPE_DOUBLE #define PADDLE_USE_SSE3
typedef float32x4_t vecType; #elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__)
#else // Currently nvcc does not support neon intrinsic.
#error NEON instructions does not support double precision // TODO: Extract simd intrinsic implementation from .cu files.
#endif #include "hl_cpu_simd_neon.cuh"
#define PADDLE_USE_NEON
#else #else
#include <mmintrin.h> #include "hl_cpu_scalar.cuh"
#include <xmmintrin.h>
#include <emmintrin.h>
#ifndef PADDLE_TYPE_DOUBLE
typedef __m128 vecType;
#else
typedef __m128d vecType;
#endif
#endif
#ifdef __CUDA_ARCH__
#define INLINE __device__ inline
#else
#define INLINE inline
#endif #endif
#endif // HL_MATRIX_TYPE_CUH_ #endif // HL_MATRIX_TYPE_CUH_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_NEON_MATRIX_KERNEL_CUH_
#define HL_NEON_MATRIX_KERNEL_CUH_
#include "hl_matrix_type.cuh"
#define VECTOR_SIZE 16
/* number of float in vector */
#define VECTOR_LEN 4
#define VECTOR_SET vdupq_n_f32
inline bool hl_check_align(size_t size) {
return !(size & (VECTOR_SIZE - 1));
}
inline bool hl_check_align(void *ptr) {
return hl_check_align(reinterpret_cast<size_t>(ptr));
}
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
float32x4_t rev = vrev64q_f32(mm);
float32x4_t tmp1 = agg.vecOp(rev, rev);
float32x2_t lo = vget_high_f32(rev);
float32x2_t hi = vget_low_f32(rev);
float32x4_t tmp2 = vcombine_f32(hi, lo);
float32x4_t ret = agg.vecOp(tmp1, tmp2);
return vgetq_lane_f32(ret, 0);
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda) {
for (int i = 0; i < dimM; i++, A += lda) {
vecType mm = VECTOR_SET(agg.init());
vecType *a = (vecType*)(A);
for (int j = 0; j < dimN / VECTOR_LEN; j++, a++) {
mm = agg.vecOp(mm, op.vecOp(*a));
}
int rem = dimN % VECTOR_LEN;
if (rem) {
real tmp = hl_agg_op(agg, mm);
real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN;
for (int j = 0; j < rem; j++) {
tmp = agg(tmp, op(a[j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
} else {
dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm));
}
}
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda,
real *B, int ldb) {
for (int i = 0; i < dimM; i++, A += lda, B += ldb) {
vecType mm = VECTOR_SET(agg.init());
vecType *a = (vecType*)(A);
vecType *b = (vecType*)(B);
for (int j = 0; j < dimN / VECTOR_LEN; j++, a++, b++) {
mm = agg.vecOp(mm, op.vecOp(*a, *b));
}
int rem = dimN % VECTOR_LEN;
if (rem) {
real tmp = hl_agg_op(agg, mm);
real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN;
real *b = B + (dimN / VECTOR_LEN) * VECTOR_LEN;
for (int j = 0; j < rem; j++) {
tmp = agg(tmp, op(a[j], b[j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
} else {
dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm));
}
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
/*
* MaxRow greater than or equal dimN
* dimN is multiples of VECTOR_LEN
* so rem <= MaxRow / VECTOR_LEN
*/
template <int MaxRow, class Agg, class Op, class Saver>
void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
vecType mm[MaxRow / VECTOR_LEN];
for (int n = 0; n < MaxRow / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
int rem = dimN % VECTOR_LEN;
if (rem) {
A += (dimN / VECTOR_LEN) * VECTOR_LEN;
dst += (dimN / VECTOR_LEN) * VECTOR_LEN;
hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda);
}
}
/*
* dimN is multiples of VECTOR_LEN
* dimN greater than Step
*/
template <int Step, class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step) {
vecType mm[Step / VECTOR_LEN];
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
}
int remRow = dimN % Step;
if (remRow) {
hl_sse_column_op_with_rem<Step>(agg, op, sv, dimM, remRow, dst, A, lda);
}
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
if (dimN <= 16) {
hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda);
} else if (dimN <= 32) {
hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda);
} else if (dimN <= 1024 || dimM <= 512) {
hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda);
} else {
hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda);
}
}
template <int MaxRow, class Agg, class Op, class Saver>
void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
vecType mm[MaxRow / VECTOR_LEN];
for (int n = 0; n < MaxRow / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
vecType *b = (vecType*)(B + i * ldb);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
int rem = dimN % VECTOR_LEN;
if (rem) {
A += (dimN / VECTOR_LEN) * VECTOR_LEN;
B += (dimN / VECTOR_LEN) * VECTOR_LEN;
dst += (dimN / VECTOR_LEN) * VECTOR_LEN;
hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda, B, ldb);
}
}
template <int Step, class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step, B += Step) {
vecType mm[Step / VECTOR_LEN];
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
vecType *b = (vecType*)(B + i * ldb);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
}
int remRow = dimN % Step;
if (remRow) {
hl_sse_column_op_with_rem<Step>(
agg, op, sv, dimM, remRow, dst, A, lda, B, ldb);
}
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
if (dimN <= 16) {
hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
} else if (dimN <= 32) {
hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
} else if (dimN <= 1024 || dimM <= 512) {
hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
} else {
hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
}
}
#endif /* HL_NEON_MATRIX_KERNEL_CUH_ */
...@@ -328,6 +328,208 @@ public: ...@@ -328,6 +328,208 @@ public:
INLINE T operator()(const T a, const T b) const { return a < b ? b : a; } INLINE T operator()(const T a, const T b) const { return a < b ? b : a; }
}; };
#ifdef PADDLE_USE_SSE3
#ifndef PADDLE_TYPE_DOUBLE
template <>
class add<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_add_ps(a, b);
}
};
template <>
class add_scale<__m128> {
private:
const __m128 p1;
const __m128 p2;
public:
INLINE add_scale(const __m128 s1, const __m128 s2) : p1(s1), p2(s2) {}
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_add_ps(_mm_mul_ps(p1, a), _mm_mul_ps(p2, b));
}
};
template <>
class sub<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_sub_ps(a, b);
}
};
template <>
class mul<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_mul_ps(a, b);
}
};
template <>
class div<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_div_ps(a, b);
}
};
template <>
class min<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_min_ps(a, b);
}
};
template <>
class max<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_max_ps(a, b);
}
};
#else
template <>
class add<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_add_pd(a, b);
}
};
template <>
class add_scale<__m128d> {
private:
const __m128d p1;
const __m128d p2;
public:
INLINE add_scale(const __m128d s1, const __m128d s2) : p1(s1), p2(s2) {}
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_add_pd(_mm_mul_pd(p1, a), _mm_mul_pd(p2, b));
}
};
template <>
class sub<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_sub_pd(a, b);
}
};
template <>
class mul<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_mul_pd(a, b);
}
};
template <>
class div<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_div_pd(a, b);
}
};
template <>
class min<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_min_pd(a, b);
}
};
template <>
class max<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_max_pd(a, b);
}
};
#endif // PADDLE_TYPE_DOUBLE
#endif // PADDLE_USE_SSE3
#ifdef PADDLE_USE_NEON
#ifndef PADDLE_TYPE_DOUBLE
template <>
class add<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vmulq_f32(a, b);
}
};
template <>
class add_scale<float32x4_t> {
private:
const float32x4_t p1;
const float32x4_t p2;
public:
INLINE add_scale(const float32x4_t s1, const float32x4_t s2)
: p1(s1), p2(s2) {}
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vaddq_f32(vmulq_f32(p1, a), vmulq_f32(p2, b));
}
};
template <>
class sub<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vsubq_f32(a, b);
}
};
template <>
class mul<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vmulq_f32(a, b);
}
};
template <>
class div<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
float32x4_t tmp = vrecpeq_f32(b);
return vmulq_f32(a, tmp);
}
};
template <>
class min<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vminq_f32(a, b);
}
};
template <>
class max<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vmaxq_f32(a, b);
}
};
#else
#error To be implemented
#endif // PADDLE_TYPE_DOUBLE
#endif // PADDLE_USE_NEON
} // namespace binary } // namespace binary
} // namespace hppl } // namespace hppl
......
...@@ -14,8 +14,8 @@ add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) ...@@ -14,8 +14,8 @@ add_library(paddle_function STATIC ${cpp_files} ${cu_objs})
add_dependencies(paddle_function ${external_project_dependencies}) add_dependencies(paddle_function ${external_project_dependencies})
add_dependencies(paddle_function gen_proto_cpp) add_dependencies(paddle_function gen_proto_cpp)
if(WITH_GPU)
if(WITH_TESTING) if(WITH_TESTING)
if(WITH_GPU)
# TODO: # TODO:
# file(GLOB test_files . *OpTest.cpp) # file(GLOB test_files . *OpTest.cpp)
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
...@@ -28,7 +28,10 @@ if(WITH_TESTING) ...@@ -28,7 +28,10 @@ if(WITH_TESTING)
add_simple_unittest(PadOpTest) add_simple_unittest(PadOpTest)
add_simple_unittest(MulOpTest) add_simple_unittest(MulOpTest)
add_simple_unittest(CosSimOpTest) add_simple_unittest(CosSimOpTest)
add_simple_unittest(RowConvOpTest)
endif() endif()
add_simple_unittest(ConvOpTest)
endif() endif()
add_style_check_target(paddle_function ${h_files}) add_style_check_target(paddle_function ${h_files})
......
...@@ -28,7 +28,7 @@ void testMatrixProjectionForward(int context_start, ...@@ -28,7 +28,7 @@ void testMatrixProjectionForward(int context_start,
std::max(0, (int)(context_start + context_length - 1)); std::max(0, (int)(context_start + context_length - 1));
if (pad == 0) is_padding = false; if (pad == 0) is_padding = false;
FunctionCompare test( CpuGpuFuncCompare test(
"ContextProjectionForward", "ContextProjectionForward",
FuncConfig() FuncConfig()
.set("context_length", context_length) .set("context_length", context_length)
...@@ -60,7 +60,7 @@ void testMatrixProjectionBackward(int context_start, ...@@ -60,7 +60,7 @@ void testMatrixProjectionBackward(int context_start,
std::max(0, (int)(context_start + context_length - 1)); std::max(0, (int)(context_start + context_length - 1));
if (pad == 0) is_padding = false; if (pad == 0) is_padding = false;
FunctionCompare test( CpuGpuFuncCompare test(
"ContextProjectionBackward", "ContextProjectionBackward",
FuncConfig() FuncConfig()
.set("context_length", context_length) .set("context_length", context_length)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace paddle {
/*
* \brief Based on the ConvFunctionBase class, the forward calculation,
* backward input calculation and backward filter calculation
* of convolution operations can be implemented.
*
* Arguments of forward and backward calculation:
* 1. Forward calculation of convolution.
* inputs = {INPUT, FILTER}, outputs = {OUTPUT}
* The first and second input arguments are input image and filter data.
* The output argument is output image.
*
* 2. Backward input calculation of convolution.
* inputs = {OUTPUT_GRAD, FILTER}, outputs = {INPUT_GRAD}
* The first and second input arguments are output grad image
* and filter data.
* The output argument is input grad image.
*
* 3. Backward filter calculation of convolution.
* inputs = {OUTPUT_GRAD, INPUT}, outputs = {FILTER_GRAD}
* The first and second input arguments are output grad image
* and input image.
* The output argument is filter grad.
*
* Arguments format of input, filter and output:
* 1. Input image, output image, input image gradient, output image gradient
* are all NCHW format. Where N is batch size, C is the number of channels,
* H and W is the height and width of image or image gradient.
*
* 2. The format of the filter data is MCHW, where M is the number of output
* image channels, C is the number of input image channels,
* H and W is height and width of filter.
*
* If `groups` is greater than 1, the filter's data format should be GMCHW,
* where G is the `groups`, and G * M is the number of output image
* channels, G * C is the number of input image channels,
* H and W is height and width of filter.
*/
class ConvFunctionBase : public FunctionBase {
public:
void init(const FuncConfig& config) override {
// function arguments
strides_ = config.get<std::vector<size_t>>("strides");
paddings_ = config.get<std::vector<size_t>>("paddings");
groups_ = config.get<size_t>("groups");
// number of inputs and outputs
numInputs_ = 2;
numOutputs_ = 1;
}
virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {}
// input can be INPUT and INPUT_GRAD
// filter can be FILTER and FILTER_GRAD
// output can be OUTPUT and OUTPUT_GRAD
void check(const TensorShape& input,
const TensorShape& filter,
const TensorShape& output) {
// inputs and outputs arguments should be 4-dimensional.
CHECK_EQ(input.ndims(), (size_t)4);
CHECK_EQ(output.ndims(), (size_t)4);
// The batchSize of the input needs to be equal to
// the batchSize of the output.
CHECK_EQ(input[0], output[0]);
if (filter.ndims() == (size_t)4) {
// If the filter's dimension is 4, groups convolution is not supported.
CHECK_EQ(groups_, (size_t)1);
// The input and output channel dimensions are the second and first
// dimensions of the filter shape.
CHECK_EQ(input[1], filter[1]);
CHECK_EQ(output[1], filter[0]);
} else {
// filter argument should be 5-dimensional.
CHECK_EQ(filter.ndims(), (size_t)5);
// The first dimension of the filter is the size of the group
CHECK_EQ(filter[0], groups_);
// The input and output channel dimensions are the third and second
// dimensions of the filter shape.
CHECK_EQ(input[1], filter[2] * groups_);
CHECK_EQ(output[1], filter[1] * groups_);
}
}
protected:
size_t getFilterHeight(const TensorShape& filter) const {
return filter[filter.ndims() - 2];
}
size_t getFilterWidth(const TensorShape& filter) const {
return filter[filter.ndims() - 1];
}
std::vector<size_t> strides_;
std::vector<size_t> paddings_;
/// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the
/// filters are only connected to the first half of the input channels,
/// and the second half only connected to the second half.
size_t groups_;
inline int strideH() const { return strides_[0]; }
inline int strideW() const { return strides_[1]; }
inline int paddingH() const { return paddings_[0]; }
inline int paddingW() const { return paddings_[1]; }
// A temporary memory in convolution calculation.
MemoryHandlePtr memory_;
template <DeviceType Device>
void resizeBuffer(size_t newSize) {
if (!memory_ || newSize * sizeof(real) > memory_->getAllocSize()) {
if (Device == DEVICE_TYPE_CPU) {
memory_ = std::make_shared<CpuMemoryHandle>(newSize * sizeof(real));
} else {
memory_ = std::make_shared<GpuMemoryHandle>(newSize * sizeof(real));
}
}
}
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <memory>
#include "Function.h"
#include "FunctionTest.h"
namespace paddle {
enum TestType {
kForwardTest = 0,
kBackwardInputTest = 1,
kBackwardFilterTest = 2,
};
template <DeviceType DType1, DeviceType DType2>
class ConvolutionTest {
public:
ConvolutionTest(const std::string& conv1,
const std::string& conv2,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 64}) {
for (size_t outputChannels : {3, 64, 128}) {
if (inputChannels < outputChannels) break;
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.run();
}
}
}
}
}
}
}
}
}
};
// Mainly used to test cases where the height and width (input, filter)
// are not equal.
template <DeviceType DType1, DeviceType DType2>
class ConvolutionTest2 {
public:
ConvolutionTest2(const std::string& conv1,
const std::string& conv2,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {16}) {
for (size_t inputHeight : {7, 31}) {
for (size_t inputWidth : {10, 54}) {
for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {7}) {
for (size_t outputChannels : {32}) {
size_t stride = 1;
size_t padding = 0;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) /
stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputHeight
<< " inputWidth=" << inputWidth
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterHeight
<< " filterWidth=" << filterWidth
<< " outputHeight=" << outputHeight
<< " outputWidth=" << outputWidth
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter{
outputChannels, inputChannels, filterHeight, filterWidth};
TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth};
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.run();
}
}
}
}
}
}
}
}
}
};
TEST(Forward, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest);
}
#ifndef PADDLE_ONLY_CPU
TEST(Forward, GEMM2) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest);
}
TEST(BackwardInput, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest);
}
TEST(BackwardFilter, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest);
}
#endif
} // namespace paddle
...@@ -22,7 +22,7 @@ void testCosSimForward(size_t height_x, ...@@ -22,7 +22,7 @@ void testCosSimForward(size_t height_x,
size_t height_y, size_t height_y,
size_t width, size_t width,
real scale) { real scale) {
FunctionCompare test("CosSimForward", FuncConfig().set("scale", scale)); CpuGpuFuncCompare test("CosSimForward", FuncConfig().set("scale", scale));
// prepare input arguments // prepare input arguments
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width}));
...@@ -36,7 +36,7 @@ void testCosSimBackward(size_t height_x, ...@@ -36,7 +36,7 @@ void testCosSimBackward(size_t height_x,
size_t height_y, size_t height_y,
size_t width, size_t width,
real scale) { real scale) {
FunctionCompare test("CosSimBackward", FuncConfig().set("scale", scale)); CpuGpuFuncCompare test("CosSimBackward", FuncConfig().set("scale", scale));
// prepare input arguments // prepare input arguments
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}));
......
...@@ -28,7 +28,7 @@ TEST(CrossMapNormal, real) { ...@@ -28,7 +28,7 @@ TEST(CrossMapNormal, real) {
<< " size=" << size; << " size=" << size;
// init Test object // init Test object
FunctionCompare test("CrossMapNormal", CpuGpuFuncCompare test("CrossMapNormal",
FuncConfig() FuncConfig()
.set("size", size) .set("size", size)
.set("scale", (real)1.5) .set("scale", (real)1.5)
...@@ -57,7 +57,7 @@ TEST(CrossMapNormalGrad, real) { ...@@ -57,7 +57,7 @@ TEST(CrossMapNormalGrad, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size; << " size=" << size;
FunctionCompare test("CrossMapNormalGrad", CpuGpuFuncCompare test("CrossMapNormalGrad",
FuncConfig() FuncConfig()
.set("size", size) .set("size", size)
.set("scale", (real)1.5) .set("scale", (real)1.5)
......
...@@ -22,14 +22,62 @@ namespace paddle { ...@@ -22,14 +22,62 @@ namespace paddle {
typedef std::shared_ptr<BufferArg> BufferArgPtr; typedef std::shared_ptr<BufferArg> BufferArgPtr;
namespace test {
template <DeviceType DType>
struct Allocator;
template <>
struct Allocator<DEVICE_TYPE_CPU> {
using type = CpuMemoryHandle;
};
template <>
struct Allocator<DEVICE_TYPE_GPU> {
using type = GpuMemoryHandle;
};
// Copy argument1 to argument2
template <DeviceType DType1, DeviceType DType2>
class CopyArgument {
public:
void operator()(const BufferArg& arg1, BufferArg& arg2) {
CHECK_EQ(arg1.valueType(), arg2.valueType());
CHECK_LE(arg1.shape().getElements(), arg2.shape().getElements());
if (arg1.valueType() == VALUE_TYPE_INT32) {
IVectorPtr vector1 =
IVector::create((int*)arg1.data(),
arg1.shape().getElements(),
DType1 == DEVICE_TYPE_CPU ? false : true);
IVectorPtr vector2 =
IVector::create((int*)arg2.data(),
arg2.shape().getElements(),
DType2 == DEVICE_TYPE_CPU ? false : true);
vector2->copyFrom(*vector1);
} else {
VectorPtr vector1 =
Vector::create((real*)arg1.data(),
arg1.shape().getElements(),
DType1 == DEVICE_TYPE_CPU ? false : true);
VectorPtr vector2 =
Vector::create((real*)arg2.data(),
arg2.shape().getElements(),
DType2 == DEVICE_TYPE_CPU ? false : true);
vector2->copyFrom(*vector1);
}
}
};
} // namespace test
/** /**
* \brief A class for comparing CPU and GPU implementations of Function. * \brief A class for comparing two Functions of different implementations.
* * For example, can be used to compare the CPU and GPU implementation
* of the function is consistent.
* *
* Use case: * Use case:
* // Initializes a test object, the corresponding cpu and gpu Function * // Initializes a test object, the corresponding cpu and gpu Function
* // are constructed according to FunctionName and FuncConfig. * // are constructed according to FunctionName and FuncConfig.
* FunctionCompare test(FunctionName, FuncConfig); * CpuGpuFuncCompare test(FunctionName, FuncConfig);
* // Prepare inputs and outputs arguments. * // Prepare inputs and outputs arguments.
* // Here the input and output can not contain real data, * // Here the input and output can not contain real data,
* // only contains the argument type and shape. * // only contains the argument type and shape.
...@@ -45,28 +93,38 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr; ...@@ -45,28 +93,38 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* // Compares CPU and GPU calculation results for consistency. * // Compares CPU and GPU calculation results for consistency.
* test.run(); * test.run();
*/ */
class FunctionCompare { template <DeviceType DType1, DeviceType DType2>
class Compare2Function {
public: public:
FunctionCompare(const std::string& name, const FuncConfig& config) typedef typename test::Allocator<DType1>::type Allocator1;
: cpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-CPU")), typedef typename test::Allocator<DType2>::type Allocator2;
gpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) { typedef typename Tensor<real, DType1>::Vector Vector1;
cpuFunc_->init(config); typedef typename Tensor<real, DType2>::Vector Vector2;
gpuFunc_->init(config); typedef typename Tensor<real, DType1>::SparseMatrix SparseMatrix1;
typedef typename Tensor<real, DType2>::SparseMatrix SparseMatrix2;
Compare2Function(const std::string& name1,
const std::string& name2,
const FuncConfig& config)
: function1_(FunctionBase::funcRegistrar_.createByType(name1)),
function2_(FunctionBase::funcRegistrar_.createByType(name2)) {
function1_->init(config);
function2_->init(config);
} }
~FunctionCompare() {} ~Compare2Function() {}
// input need only contains shape, do not contains data. // input need only contains shape, do not contains data.
void addInputs(const BufferArg& input) { void addInputs(const BufferArg& input) {
size_t size = size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType()); input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
cpuInputs_.emplace_back(std::make_shared<BufferArg>( func1Inputs_.emplace_back(std::make_shared<BufferArg>(
cpuMemory_.back()->getBuf(), input.valueType(), input.shape())); func1Memory_.back()->getBuf(), input.valueType(), input.shape()));
gpuInputs_.emplace_back(std::make_shared<BufferArg>( func2Inputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(), input.valueType(), input.shape())); func2Memory_.back()->getBuf(), input.valueType(), input.shape()));
} }
// assume one copy of sequence is shared by different SequenceArgs // assume one copy of sequence is shared by different SequenceArgs
...@@ -75,62 +133,57 @@ public: ...@@ -75,62 +133,57 @@ public:
size_t batchSize = input.shape()[0]; size_t batchSize = input.shape()[0];
size_t numSeqs = batchSize / 10 + 1; size_t numSeqs = batchSize / 10 + 1;
size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId)); func1Memory_.emplace_back(std::make_shared<Allocator1>(sizeId));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId)); func2Memory_.emplace_back(std::make_shared<Allocator2>(sizeId));
cpuSeq_ = std::make_shared<SequenceIdArg>(cpuMemory_.back()->getBuf(), seq1_ = std::make_shared<SequenceIdArg>(func1Memory_.back()->getBuf(),
TensorShape{numSeqs + 1}); TensorShape{numSeqs + 1});
gpuSeq_ = std::make_shared<SequenceIdArg>(gpuMemory_.back()->getBuf(), seq2_ = std::make_shared<SequenceIdArg>(func2Memory_.back()->getBuf(),
TensorShape{numSeqs + 1}); TensorShape{numSeqs + 1});
/// init sequence Id /// init sequence Id
initArg(*cpuSeq_, batchSize); initArg(*seq1_, batchSize);
// todo(tianbing), delete it copyArg_(*seq1_, *seq2_);
CHECK_EQ(cpuSeq_->shape().getElements(), cpuSeq_->numSeqs() + 1);
CpuIVector cpuSeq(cpuSeq_->shape().getElements(), (int*)cpuSeq_->data());
GpuIVector gpuSeq(gpuSeq_->shape().getElements(), (int*)gpuSeq_->data());
gpuSeq.copyFrom(cpuSeq);
} }
void addInputs(const SequenceArg& input) { void addInputs(const SequenceArg& input) {
CHECK_EQ(input.shape().ndims(), 2UL); CHECK_EQ(input.shape().ndims(), 2UL);
size_t batchSize = input.shape()[0]; size_t batchSize = input.shape()[0];
if (!cpuSeq_ || !gpuSeq_) { // sequence not exist if (!seq1_ || !seq2_) { // sequence not exist
addSequence(SequenceIdArg(TensorShape{batchSize})); addSequence(SequenceIdArg(TensorShape{batchSize}));
} }
size_t size = size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType()); input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
/// SequenceArg /// SequenceArg
cpuInputs_.emplace_back( func1Inputs_.emplace_back(
std::make_shared<SequenceArg>(cpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
input.valueType(), input.valueType(),
input.shape(), input.shape(),
*cpuSeq_)); *seq1_));
gpuInputs_.emplace_back( func2Inputs_.emplace_back(
std::make_shared<SequenceArg>(gpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
input.valueType(), input.valueType(),
input.shape(), input.shape(),
*gpuSeq_)); *seq2_));
} }
// output need only contains shape, do not contains data. // output need only contains shape, do not contains data.
void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) { void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
size_t size = size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType()); output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
cpuOutputs_.emplace_back( func1Outputs_.emplace_back(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(), std::make_shared<BufferArg>(func1Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
argType)); argType));
gpuOutputs_.emplace_back( func2Outputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(), std::make_shared<BufferArg>(func2Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
argType)); argType));
...@@ -138,14 +191,14 @@ public: ...@@ -138,14 +191,14 @@ public:
/// add and init output sparse matrix /// add and init output sparse matrix
void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) { void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>( sparse1_ = std::make_shared<SparseMatrix1>(
output.shape()[0], output.shape()[0],
output.shape()[1], output.shape()[1],
output.nnz(), output.nnz(),
static_cast<SparseValueType>(output.dataType()), static_cast<SparseValueType>(output.dataType()),
static_cast<SparseFormat>(output.dataFormat())); static_cast<SparseFormat>(output.dataFormat()));
gpuSparse_ = std::make_shared<GpuSparseMatrix>( sparse2_ = std::make_shared<SparseMatrix2>(
output.shape()[0], output.shape()[0],
output.shape()[1], output.shape()[1],
output.nnz(), output.nnz(),
...@@ -154,52 +207,52 @@ public: ...@@ -154,52 +207,52 @@ public:
/// init sparse matrix /// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1); hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform(); sparse1_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream); sparse2_->copyFrom(*sparse1_, stream);
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
cpuOutputs_.emplace_back( func1Outputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*cpuSparse_, argType)); std::make_shared<SparseMatrixArg>(*sparse1_, argType));
gpuOutputs_.emplace_back( func2Outputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*gpuSparse_, argType)); std::make_shared<SparseMatrixArg>(*sparse2_, argType));
} }
void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) { void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) {
CHECK_EQ(output.shape().ndims(), 2UL); CHECK_EQ(output.shape().ndims(), 2UL);
size_t batchSize = output.shape()[0]; size_t batchSize = output.shape()[0];
if (!cpuSeq_ || !gpuSeq_) { // sequence not exist if (!seq1_ || !seq2_) { // sequence not exist
addSequence(SequenceIdArg(TensorShape{batchSize})); addSequence(SequenceIdArg(TensorShape{batchSize}));
} }
size_t size = size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType()); output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
/// SequenceArg /// SequenceArg
cpuOutputs_.emplace_back( func1Outputs_.emplace_back(
std::make_shared<SequenceArg>(cpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
*cpuSeq_, *seq1_,
argType)); argType));
gpuOutputs_.emplace_back( func2Outputs_.emplace_back(
std::make_shared<SequenceArg>(gpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
*gpuSeq_, *seq2_,
argType)); argType));
} }
void addInputs(const SparseMatrixArg& input) { void addInputs(const SparseMatrixArg& input) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>( sparse1_ = std::make_shared<SparseMatrix1>(
input.shape()[0], input.shape()[0],
input.shape()[1], input.shape()[1],
input.nnz(), input.nnz(),
static_cast<SparseValueType>(input.dataType()), static_cast<SparseValueType>(input.dataType()),
static_cast<SparseFormat>(input.dataFormat())); static_cast<SparseFormat>(input.dataFormat()));
gpuSparse_ = std::make_shared<GpuSparseMatrix>( sparse2_ = std::make_shared<SparseMatrix2>(
input.shape()[0], input.shape()[0],
input.shape()[1], input.shape()[1],
input.nnz(), input.nnz(),
...@@ -208,12 +261,12 @@ public: ...@@ -208,12 +261,12 @@ public:
/// init sparse matrix /// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1); hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform(); sparse1_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream); sparse2_->copyFrom(*sparse1_, stream);
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
cpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*cpuSparse_)); func1Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse1_));
gpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*gpuSparse_)); func2Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse2_));
} }
void run() { void run() {
...@@ -236,27 +289,27 @@ public: ...@@ -236,27 +289,27 @@ public:
function->calc(inArgs, outArgs); function->calc(inArgs, outArgs);
}; };
callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_); callFunction(function1_.get(), func1Inputs_, func1Outputs_);
callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_); callFunction(function2_.get(), func2Inputs_, func2Outputs_);
// check outputs // check outputs
compareOutputs(); compareOutputs();
} }
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpuFunc_; } std::shared_ptr<FunctionBase> getFunction1() const { return function1_; }
std::shared_ptr<FunctionBase> getGpuFunction() const { return gpuFunc_; } std::shared_ptr<FunctionBase> getFunction2() const { return function2_; }
protected: protected:
// only init cpu argument, gpu argument copy from cpu argument. // only init cpu argument, gpu argument copy from cpu argument.
void initArg(BufferArg& arg) { void initArg(BufferArg& arg) {
CpuVector vector(arg.shape().getElements(), (real*)arg.data()); Vector1 vector(arg.shape().getElements(), (real*)arg.data());
vector.uniform(0.001, 1); vector.uniform(0.001, 1);
} }
void initArg(SequenceArg& arg) { void initArg(SequenceArg& arg) {
/// init only matrix /// init only matrix
CpuVector vector(arg.shape().getElements(), (real*)arg.data()); Vector1 vector(arg.shape().getElements(), (real*)arg.data());
vector.uniform(0.001, 1); vector.uniform(0.001, 1);
} }
...@@ -276,73 +329,72 @@ protected: ...@@ -276,73 +329,72 @@ protected:
} }
void initInputs() { void initInputs() {
for (size_t i = 0; i < cpuInputs_.size(); i++) { for (size_t i = 0; i < func1Inputs_.size(); i++) {
if (cpuInputs_[i]->isSparseArg()) { if (func1Inputs_[i]->isSparseArg()) {
continue; /// sparse matrix already init continue; /// sparse matrix already init
} }
if (cpuInputs_[i]->isSequenceArg()) { if (func1Inputs_[i]->isSequenceArg()) {
initArg(dynamic_cast<SequenceArg&>(*cpuInputs_[i])); initArg(dynamic_cast<SequenceArg&>(*func1Inputs_[i]));
} else { } else {
initArg(*cpuInputs_[i]); initArg(*func1Inputs_[i]);
} }
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector cpuVector(cpuInputs_[i]->shape().getElements(),
(real*)cpuInputs_[i]->data());
GpuVector gpuVector(gpuInputs_[i]->shape().getElements(),
(real*)gpuInputs_[i]->data());
gpuVector.copyFrom(cpuVector); copyArg_(*func1Inputs_[i], *func2Inputs_[i]);
} }
} }
void initOutputs() { void initOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) { for (size_t i = 0; i < func1Outputs_.size(); i++) {
if (cpuOutputs_[i]->isSparseArg()) { if (func1Outputs_[i]->isSparseArg()) {
continue; /// sparse matrix already init continue; /// sparse matrix already init
} }
if (cpuOutputs_[i]->isSequenceArg()) { if (func1Outputs_[i]->isSequenceArg()) {
initArg(dynamic_cast<SequenceArg&>(*cpuOutputs_[i])); initArg(dynamic_cast<SequenceArg&>(*func1Outputs_[i]));
} else { } else {
initArg(*cpuOutputs_[i]); initArg(*func1Outputs_[i]);
} }
// TODO: Need a BufferCopy used to copy from one BufferArg to another. copyArg_(*func1Outputs_[i], *func2Outputs_[i]);
CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(),
(real*)cpuOutputs_[i]->data());
GpuVector gpuVector(gpuOutputs_[i]->shape().getElements(),
(real*)gpuOutputs_[i]->data());
gpuVector.copyFrom(cpuVector);
} }
} }
void compareOutputs() { void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) { for (size_t i = 0; i < func1Outputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers. // TODO, Need a BufferCheck used to compare the two buffers.
const auto cpu = cpuOutputs_[i]; const auto cpu = func1Outputs_[i];
const auto gpu = gpuOutputs_[i]; const auto gpu = func2Outputs_[i];
CHECK_EQ(cpu->numElements(), gpu->numElements()); CHECK_EQ(cpu->numElements(), gpu->numElements());
CpuVector cpuVector(cpu->numElements(), (real*)cpu->data()); Vector1 cpuVector(cpu->numElements(), (real*)cpu->data());
GpuVector gpuVector(gpu->numElements(), (real*)gpu->data()); Vector2 gpuVector(gpu->numElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector); autotest::TensorCheckErr(cpuVector, gpuVector);
} }
} }
protected: protected:
std::shared_ptr<FunctionBase> cpuFunc_; std::shared_ptr<FunctionBase> function1_;
std::shared_ptr<FunctionBase> gpuFunc_; std::shared_ptr<FunctionBase> function2_;
std::vector<CpuMemHandlePtr> cpuMemory_; std::vector<std::shared_ptr<Allocator1>> func1Memory_;
std::vector<GpuMemHandlePtr> gpuMemory_; std::vector<std::shared_ptr<Allocator2>> func2Memory_;
std::vector<BufferArgPtr> cpuInputs_; std::vector<BufferArgPtr> func1Inputs_;
std::vector<BufferArgPtr> cpuOutputs_; std::vector<BufferArgPtr> func1Outputs_;
std::vector<BufferArgPtr> gpuInputs_; std::vector<BufferArgPtr> func2Inputs_;
std::vector<BufferArgPtr> gpuOutputs_; std::vector<BufferArgPtr> func2Outputs_;
std::shared_ptr<CpuSparseMatrix> cpuSparse_; std::shared_ptr<SparseMatrix1> sparse1_;
std::shared_ptr<GpuSparseMatrix> gpuSparse_; std::shared_ptr<SparseMatrix2> sparse2_;
std::shared_ptr<SequenceIdArg> cpuSeq_; std::shared_ptr<SequenceIdArg> seq1_;
std::shared_ptr<SequenceIdArg> gpuSeq_; std::shared_ptr<SequenceIdArg> seq2_;
test::CopyArgument<DType1, DType2> copyArg_;
};
class CpuGpuFuncCompare
: public Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> {
public:
CpuGpuFuncCompare(const std::string& name, const FuncConfig& config)
: Compare2Function(name + "-CPU", name + "-GPU", config) {}
~CpuGpuFuncCompare() {}
}; };
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "GemmConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/MemoryHandle.h"
namespace paddle {
/*
* imData = [input_channels, input_height, input_width]
* colData = [input_channels, filter_height, filter_width,
* output_height, output_width]
*/
template <class T>
class Im2ColFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* imData,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth,
int outputHeight,
int outputWidth,
T* colData) {
int channelsCol = inputChannels * filterHeight * filterWidth;
for (int c = 0; c < channelsCol; ++c) {
int wOffset = c % filterWidth;
int hOffset = (c / filterWidth) % filterHeight;
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
if ((imRowIdx - paddingHeight) < 0 ||
(imRowIdx - paddingHeight) >= inputHeight ||
(imColIdx - paddingWidth) < 0 ||
(imColIdx - paddingWidth) >= inputWidth) {
colData[(c * outputHeight + h) * outputWidth + w] = T(0);
} else {
imRowIdx += c_im * inputHeight - paddingHeight;
imColIdx -= paddingWidth;
colData[(c * outputHeight + h) * outputWidth + w] =
imData[imRowIdx * inputWidth + imColIdx];
}
}
}
}
}
};
template <class T>
class Col2ImFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* colData,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth,
int outputHeight,
int outputWidth,
T* imData) {
int channelsCol = inputChannels * filterHeight * filterWidth;
for (int c = 0; c < channelsCol; ++c) {
int wOffset = c % filterWidth;
int hOffset = (c / filterWidth) % filterHeight;
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
if ((imRowIdx - paddingHeight) >= 0 &&
(imRowIdx - paddingHeight) < inputHeight &&
(imColIdx - paddingWidth) >= 0 &&
(imColIdx - paddingWidth) < inputWidth) {
imRowIdx += c_im * inputHeight - paddingHeight;
imColIdx -= paddingWidth;
imData[imRowIdx * inputWidth + imColIdx] +=
colData[(c * outputHeight + h) * outputWidth + w];
}
}
}
}
}
};
/*
* \brief Forward calculation of convolution.
*/
template <DeviceType Device>
class GemmConvFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
// TODO(hedaoyuan): Need to define some index macros,
// to avoid useing 0 and 1.
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
check(input, filter, output);
real beta;
if (outputs[0].getArgType() == ADD_TO) {
beta = 1.0;
} else {
beta = 0.0;
}
size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];
real* inputData = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* outputData = outputs[0].data<real>();
size_t size = inputChannels / groups_ * filterHeight * filterWidth *
outputHeight * outputWidth;
resizeBuffer<Device>(size);
real* colData = reinterpret_cast<real*>(memory_->getBuf());
Im2ColFunctor<Device, real> im2col;
GemmFunctor<Device, real> gemm;
size_t inputOffset = (inputChannels / groups_) * inputHeight * inputWidth;
size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth;
size_t filterOffset = filter.getElements() / groups_;
for (size_t i = 0; i < batchSize; i++) {
for (size_t g = 0; g < groups_; g++) {
im2col(inputData + g * inputOffset,
inputChannels / groups_,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
outputHeight,
outputWidth,
colData);
int M = outputChannels / groups_;
int N = outputHeight * outputWidth;
int K = inputChannels / groups_ * filterHeight * filterWidth;
gemm(CblasNoTrans,
CblasNoTrans,
M,
N,
K,
1.0f,
filterData + g * filterOffset,
K,
colData,
N,
beta,
outputData + g * outputOffset,
N);
}
inputData += inputChannels * inputHeight * inputWidth;
outputData += outputChannels * outputHeight * outputWidth;
}
}
};
/*
* \brief Backward input calculation of convolution.
*/
template <DeviceType Device>
class GemmConvGradInputFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
// Since the implementation of Col2ImFunctor is ADD_TO,
// this function only supports ADD_TO mode.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
check(input, filter, output);
size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];
real* outputGrad = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* inputGrad = outputs[0].data<real>();
size_t size = inputChannels / groups_ * filterHeight * filterWidth *
outputHeight * outputWidth;
resizeBuffer<Device>(size);
real* colData = reinterpret_cast<real*>(memory_->getBuf());
Col2ImFunctor<Device, real> col2im;
GemmFunctor<Device, real> gemm;
size_t inputOffset = (inputChannels / groups_) * inputHeight * inputWidth;
size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth;
size_t filterOffset = filter.getElements() / groups_;
for (size_t i = 0; i < batchSize; i++) {
for (size_t g = 0; g < groups_; g++) {
int K = outputChannels / groups_;
int N = outputHeight * outputWidth;
int M = inputChannels / groups_ * filterHeight * filterWidth;
gemm(CblasTrans,
CblasNoTrans,
M,
N,
K,
1.0f,
filterData + g * filterOffset,
M,
outputGrad + g * outputOffset,
N,
0.0f,
colData,
N);
col2im(colData,
inputChannels / groups_,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
outputHeight,
outputWidth,
inputGrad + g * inputOffset);
}
inputGrad += inputChannels * inputHeight * inputWidth;
outputGrad += outputChannels * outputHeight * outputWidth;
}
}
};
/*
* \brief Backward filter calculation of convolution.
*/
template <DeviceType Device>
class GemmConvGradFilterFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
check(input, filter, output);
real beta;
if (outputs[0].getArgType() == ADD_TO) {
beta = 1.0;
} else {
beta = 0.0;
}
size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];
real* outputGrad = inputs[0].data<real>();
real* inputData = inputs[1].data<real>();
real* filterGrad = outputs[0].data<real>();
size_t size = inputChannels / groups_ * filterHeight * filterWidth *
outputHeight * outputWidth;
resizeBuffer<Device>(size);
real* colData = reinterpret_cast<real*>(memory_->getBuf());
Im2ColFunctor<Device, real> im2col;
GemmFunctor<Device, real> gemm;
size_t inputOffset = (inputChannels / groups_) * inputHeight * inputWidth;
size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth;
size_t filterOffset = filter.getElements() / groups_;
for (size_t i = 0; i < batchSize; i++) {
for (size_t g = 0; g < groups_; g++) {
im2col(inputData + g * inputOffset,
inputChannels / groups_,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
outputHeight,
outputWidth,
colData);
int M = outputChannels / groups_;
int K = outputHeight * outputWidth;
int N = inputChannels / groups_ * filterHeight * filterWidth;
gemm(CblasNoTrans,
CblasTrans,
M,
N,
K,
1.0f,
outputGrad + g * outputOffset,
K,
colData,
K,
i == 0 ? beta : 1.0f,
filterGrad + g * filterOffset,
N);
}
inputData += inputChannels * inputHeight * inputWidth;
outputGrad += outputChannels * outputHeight * outputWidth;
}
}
};
REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ConvOp.h"
namespace paddle {
/*
* imData = [input_channels, input_height, input_width]
* colData = [input_channels, filter_height, filter_width,
* output_height, output_width]
*/
template <DeviceType Device, class T>
class Im2ColFunctor {
public:
void operator()(const T* imData,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth,
int outputHeight,
int outputWidth,
T* colData);
};
template <DeviceType Device, class T>
class Col2ImFunctor {
public:
void operator()(const T* colData,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth,
int outputHeight,
int outputWidth,
T* imData);
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvOp.h"
#include "GemmConvOp.h"
namespace paddle {
template<class T>
__global__
void im2col(const T* data_im, int numOuts, int height, int width,
int blockH, int blockW,
int strideH, int strideW,
int paddingH, int paddingW,
int height_col, int width_col,
T* data_col) {
int index =
(blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
if (index < numOuts) {
int w_out = index % width_col;
index /= width_col;
int h_out = index % height_col;
int channel_in = index / height_col;
int channel_out = channel_in * blockH * blockW;
int h_in = h_out * strideH;
int w_in = w_out * strideW;
data_col += (channel_out * height_col + h_out) * width_col + w_out;
for (int i = 0; i < blockH; ++i) {
for (int j = 0; j < blockW; ++j) {
int rIdx = int(h_in+i);
int cIdx = int(w_in+j);
if ((rIdx-(int)paddingH) >= (int)height ||
(rIdx-(int)paddingH) < 0 ||
(cIdx-(int)paddingW) >= (int)width ||
(cIdx-(int)paddingW) < 0) {
*data_col = 0;
} else {
rIdx = rIdx + channel_in*height - paddingH;
cIdx = cIdx - paddingW;
*data_col = data_im[rIdx* width + cIdx];
}
data_col += height_col * width_col;
}
}
}
}
template <class T>
class Im2ColFunctor<DEVICE_TYPE_GPU, T> {
public:
void operator()(const T* imData,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth,
int outputHeight,
int outputWidth,
T* colData) {
int numKernels = inputChannels * outputHeight * outputWidth;
int blocks = (numKernels + 1024 -1) / 1024;
int blockX = 512;
int blockY = (blocks + 512 - 1) / 512;
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
im2col<T><<< grid, threads, 0, STREAM_DEFAULT >>>
(imData, numKernels, inputHeight, inputWidth, filterHeight, filterWidth,
strideHeight, strideWidth, paddingHeight, paddingWidth,
outputHeight, outputWidth, colData);
CHECK_SYNC("Im2ColFunctor GPU failed");
}
};
template<class T>
__global__
void col2im(size_t n, const T* data_col, size_t height,
size_t width, size_t channels,
size_t blockH, size_t blockW,
size_t strideH, size_t strideW,
size_t paddingH, size_t paddingW,
size_t height_col, size_t width_col,
T* data_im) {
size_t index =
(blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
if (index < n) {
T val = 0;
int w = int(index % width);
int h = int((index / width) % height);
int c = int(index / (width * height));
if ((w - (int)paddingW) >= 0 &&
(w - (int)paddingW) < (width-2 * paddingW) &&
(h - (int)paddingH) >= 0 &&
(h - paddingH) < (height - 2 * paddingH)) {
// compute the start and end of the output
int w_col_start =
(w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1;
int w_col_end =
min((int)(w / (int)strideW + 1), (int)(width_col));
int h_col_start =
(h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1;
int h_col_end = min(int(h / strideH + 1), int(height_col));
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
// the col location: [c * width * height + h_out, w_out]
int c_col = int(c * blockH* blockW) + \
(h - h_col * (int)strideH) * (int)blockW +
(w - w_col * (int)strideW);
val += data_col[(c_col * height_col + h_col) * width_col + w_col];
}
}
h -= paddingH;
w -= paddingW;
data_im[c*((width-2*paddingW) * (height-2*paddingH)) +
h*(width-2*paddingW) + w] += val;
}
}
}
template <class T>
class Col2ImFunctor<DEVICE_TYPE_GPU, T> {
public:
void operator()(const T* colData,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth,
int outputHeight,
int outputWidth,
T* imData) {
size_t numKernels = inputChannels * (inputHeight + 2*paddingHeight)
* (inputWidth + 2*paddingWidth);
size_t blocks = (numKernels + 1024 -1) / 1024;
size_t blockX = 512;
size_t blockY = (blocks+512-1)/512;
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
col2im<T><<< grid, threads, 0, STREAM_DEFAULT >>>
(numKernels,
colData,
inputHeight + 2*paddingHeight,
inputWidth + 2*paddingWidth,
inputChannels,
filterHeight,
filterWidth,
strideHeight,
strideWidth,
paddingHeight,
paddingWidth,
outputHeight,
outputWidth,
imData);
CHECK_SYNC("Col2ImFunctor GPU failed");
}
};
template class Im2ColFunctor<DEVICE_TYPE_GPU, float>;
template class Im2ColFunctor<DEVICE_TYPE_GPU, double>;
template class Col2ImFunctor<DEVICE_TYPE_GPU, float>;
template class Col2ImFunctor<DEVICE_TYPE_GPU, double>;
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/math/MathFunctions.h"
namespace paddle {
// TODO(hedaoyuan): Since the hl_matrix_mul interface does not conform to the
// cblas_dgemm interface's parameter format, it is necessary to introduce
// GemmFunctor as a new interface. Later, when considering the implementation
// of MatMulFunction, we need to consider the reconstruction of hl_matrix_mul
// interface.
template <DeviceType Device, class T>
class GemmFunctor {
public:
void operator()(const CBLAS_TRANSPOSE transA,
const CBLAS_TRANSPOSE TransB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc);
};
template <class T>
class GemmFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const CBLAS_TRANSPOSE transA,
const CBLAS_TRANSPOSE TransB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc) {
gemm<T>(transA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}
};
template <class T>
class GemmFunctor<DEVICE_TYPE_GPU, T> {
public:
void operator()(const CBLAS_TRANSPOSE transA,
const CBLAS_TRANSPOSE TransB,
const int M,
const int N,
const int K,
const T alpha,
const T* A,
const int lda,
const T* B,
const int ldb,
const T beta,
T* C,
const int ldc) {
hl_matrix_mul((T*)A,
transA == CblasNoTrans ? HPPL_OP_N : HPPL_OP_T,
(T*)B,
TransB == CblasNoTrans ? HPPL_OP_N : HPPL_OP_T,
C,
M,
N,
K,
alpha,
beta,
lda,
ldb,
ldc);
}
};
} // namespace paddle
...@@ -35,7 +35,7 @@ void testFuncDDDMatrix( ...@@ -35,7 +35,7 @@ void testFuncDDDMatrix(
size_t heightC = dimM; size_t heightC = dimM;
size_t widthC = dimN; size_t widthC = dimN;
// init Test object // init Test object
FunctionCompare test( CpuGpuFuncCompare test(
"MulOp", FuncConfig().set("aTrans", transa).set("bTrans", transb)); "MulOp", FuncConfig().set("aTrans", transa).set("bTrans", transb));
// prepare input arguments // prepare input arguments
/// matrix A : HA * WA /// matrix A : HA * WA
...@@ -81,8 +81,8 @@ void testFuncDSparseDMatrix( ...@@ -81,8 +81,8 @@ void testFuncDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
// init Test object // init Test object
FunctionCompare test("MulOp", CpuGpuFuncCompare test(
FuncConfig().set("aTrans", false).set("bTrans", false)); "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false));
// prepare input arguments // prepare input arguments
/// sparse matrix A : M * K /// sparse matrix A : M * K
test.addInputs(SparseMatrixArg( test.addInputs(SparseMatrixArg(
...@@ -126,8 +126,8 @@ void testFuncDDSparseMatrix( ...@@ -126,8 +126,8 @@ void testFuncDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
// init Test object // init Test object
FunctionCompare test("MulOp", CpuGpuFuncCompare test(
FuncConfig().set("aTrans", false).set("bTrans", false)); "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false));
// prepare input arguments // prepare input arguments
/// matrix A : M * K /// matrix A : M * K
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
...@@ -172,8 +172,8 @@ void testFuncSparseDDMatrix( ...@@ -172,8 +172,8 @@ void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
// init Test object // init Test object
FunctionCompare test("MulOp", CpuGpuFuncCompare test(
FuncConfig().set("aTrans", false).set("bTrans", false)); "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false));
// prepare input arguments // prepare input arguments
/// matrix A : M * K /// matrix A : M * K
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvOp.h"
namespace paddle {
/*
* The three arguments are stored in memory in row major order.
* inputData = [batchSize, inputChannels, inputHeight, inputWidth]
* filterData = [outputChannels, inputChannels, filterHeight, filterWidth]
* outputData = [batchSize, outputChannels, outputHeight, outputWidth]
*/
template <class T>
class NaiveConvFunctor {
public:
void operator()(const T* inputData,
size_t batchSize,
size_t inputChannels,
size_t inputHeight,
size_t inputWidth,
const T* filterData,
size_t filterHeight,
size_t filterWidth,
T* outputData,
size_t outputChannels,
size_t outputHeight,
size_t outputWidth,
size_t paddingH,
size_t paddingW,
size_t strideH,
size_t strideW) {
for (size_t batch = 0; batch < batchSize; batch++) {
for (size_t outC = 0; outC < outputChannels; outC++) {
for (size_t outH = 0; outH < outputHeight; outH++) {
for (size_t outW = 0; outW < outputWidth; outW++) {
const int inStartH = (outH * strideH) - paddingH;
const int inStartW = (outW * strideW) - paddingW;
T outValue = (T)0;
for (size_t inC = 0; inC < inputChannels; inC++) {
for (size_t fH = 0; fH < filterHeight; fH++) {
for (size_t fW = 0; fW < filterWidth; fW++) {
T inValue;
const int inH = inStartH + fH;
const int inW = inStartW + fW;
if ((inH >= 0 && inH < inputHeight) &&
(inW >= 0 && inW < inputWidth)) {
size_t offsetInput =
batch * inputChannels * inputHeight * inputWidth +
inC * inputHeight * inputWidth + inH * inputWidth + inW;
inValue = inputData[offsetInput];
} else {
inValue = (T)0;
}
size_t offsetFilter =
outC * inputChannels * filterHeight * filterWidth +
inC * filterHeight * filterWidth + fH * filterWidth + fW;
T filterValue = filterData[offsetFilter];
outValue += (inValue * filterValue);
}
}
}
size_t offset =
batch * outputChannels * outputHeight * outputWidth +
outC * outputHeight * outputWidth + outH * outputWidth + outW;
outputData[offset] = outValue;
}
}
}
}
}
};
template <DeviceType Device>
class NaiveConvFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
check(input, filter, output);
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
size_t batchSize = inputs[0].shape()[0];
size_t inputChannels = inputs[0].shape()[1];
size_t inputHeight = inputs[0].shape()[2];
size_t inputWidth = inputs[0].shape()[3];
size_t filterHeight = inputs[1].shape()[2];
size_t filterWidth = inputs[1].shape()[3];
size_t outputChannels = outputs[0].shape()[1];
size_t outputHeight = outputs[0].shape()[2];
size_t outputWidth = outputs[0].shape()[3];
real* inputData = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* outputData = outputs[0].data<real>();
NaiveConvFunctor<real> conv;
conv(inputData,
batchSize,
inputChannels,
inputHeight,
inputWidth,
filterData,
filterHeight,
filterWidth,
outputData,
outputChannels,
outputHeight,
outputWidth,
paddingH(),
paddingW(),
strideH(),
strideW());
}
};
REGISTER_TYPED_FUNC(NaiveConv, CPU, NaiveConvFunction);
} // namespace paddle
...@@ -25,7 +25,7 @@ TEST(Pad, real) { ...@@ -25,7 +25,7 @@ TEST(Pad, real) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
for (bool test_grad : {false, true}) { for (bool test_grad : {false, true}) {
FunctionCompare compare( CpuGpuFuncCompare compare(
test_grad ? "PadGrad" : "Pad", test_grad ? "PadGrad" : "Pad",
FuncConfig() FuncConfig()
.set<std::vector<uint32_t>>("channel", {2, 3}) .set<std::vector<uint32_t>>("channel", {2, 3})
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "RowConvOp.h"
#include <iostream>
#include "paddle/math/Vector.h"
namespace paddle {
template <>
void RowConv<DEVICE_TYPE_CPU>(CpuMatrix& out,
const CpuMatrix& in,
const CpuMatrix& filter,
const CpuIVector& seq) {
const int* starts = seq.getData();
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
for (size_t j = begin; j < end; ++j) {
MatrixPtr x;
MatrixPtr w;
if ((j + contextLength) < end) {
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, contextLength);
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, contextLength);
} else {
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, end - j);
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, end - j);
}
MatrixPtr y = out.subMatrix(j, 1);
y->addDotMulVMM(*x, *w);
}
}
}
template <>
void RowConvGrad<DEVICE_TYPE_CPU>(const CpuMatrix& outG,
const CpuMatrix& in,
const CpuMatrix& filter,
CpuMatrix& inG,
CpuMatrix& filterG,
const CpuIVector& seq) {
// gradient w.r.t filter
const int* starts = seq.getData();
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
if (filterG) {
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
size_t steps = end - begin;
for (size_t j = 0; j < contextLength && (begin + j) < end; ++j) {
MatrixPtr x =
(const_cast<CpuMatrix&>(in)).subMatrix(begin + j, steps - j);
MatrixPtr dy =
(const_cast<CpuMatrix&>(outG)).subMatrix(begin, steps - j);
MatrixPtr dw = filterG.subMatrix(j, 1);
dw->addDotMulVMM(*dy, *x);
}
}
}
// gradient w.r.t input feature
if (inG) {
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
size_t steps = end - begin;
for (size_t j = 0; j < steps; ++j) {
MatrixPtr dx = inG.subMatrix(begin + j, 1);
for (size_t t = 0; t < contextLength; ++t) {
if (int(j - t) >= 0) {
MatrixPtr dy =
(const_cast<CpuMatrix&>(outG)).subMatrix(begin + j - t, 1);
MatrixPtr w = (const_cast<CpuMatrix&>(filter)).subMatrix(t, 1);
dx->addDotMul(*dy, *w, 1.0, 1.0);
}
}
}
}
}
}
/**
* \brief The row convolution is called lookahead convolution. It is firstly
* introduced in deep-speech2 system. The bidirectional RNN that learns
* representation for a sequence by performing a forward and a backward pass
* through the entire sequence. However, unlike unidirectional RNNs,
* bidirectional RNNs are challenging to deploy in an online and low-latency
* setting. The lookahead convolution incorporates information from future
* subsequences in a computationally efficient manner to improve unidirectional
* recurrent neural networks.
*
* The connection of row convolution is different form the 1D sequence
* convolution. Assumed that, the future context-length is k, that is to say,
* it can get the output at timestep t by using the the input feature from t-th
* timestep to (t+k)-th timestep. Assumed that the hidden dim of input
* activations are d, the activations r_t for the new layer at time-step t are:
*
*
* -- k + 1
* r(t,i) = > W(i,j) * h(t+j-1, i), for (1 <= i <= d)
* -- j = 1
*
*
* The weight shape is: (k + 1) x d
* Function Arguments:
*
* \param inputs[0] The input activations.
* \param inputs[0] The filter (or weight) and shape is (k+1) x d.
* \param outputs[1] The output activations.
*
* [1] Dario Amodei, etc. Deep Speech 2 : End-to-End Speech Recognition in
* English
* and Mandarin. https://arxiv.org/abs/1512.02595
*/
template <DeviceType Device>
class RowConvFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
// check
CHECK_EQ(2UL, inputs.size());
CHECK_EQ(1UL, outputs.size());
// TODO(qingqing): support ASSIGN_TO.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here.";
const auto in = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out = dynamic_cast<const SequenceArg&>(outputs[0]);
auto w = inputs[1];
CHECK(in.data() && out.data() && in.getSequenceId().data());
CHECK_EQ(in.shape().ndims(), 2UL);
CHECK(in.shape() == out.shape());
CHECK_EQ(w.shape()[1], in.shape()[1]);
auto outMat = out.matrix<Device>();
const auto inMat = in.matrix<Device>();
const auto wMat = w.matrix<Device>();
const auto seqId = in.getSequenceId().vector<int, Device>();
RowConv<Device>(outMat, inMat, wMat, seqId);
}
};
/**
* \brief The backward of row convolution function. This function calculated
* the gradient w.r.t filter and the gradient w.r.t input activations(or data).
*
* Argument in this Function:
*
* \param inputs[0] The gradient w.r.t output activations.
* \param inputs[1] The input activations.
* \param inputs[2] The filter (or weight) and shape is (k+1) x d.
* \param outputs[0] The gradient w.r.t input activations.
* \param outputs[1] The gradient w.r.r filter.
*
* Abbreviation:
* w.r.t: with respect to.
*/
template <DeviceType Device>
class RowConvGradFunc : public FunctionBase {
// TODO(qingqing): split into RowConvDataFunc and RowConvWeightFunc
public:
void init(const FuncConfig& config) override {}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
// check
CHECK_EQ(3UL, inputs.size());
CHECK_EQ(2UL, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);
CHECK(inputs[0].isSequenceArg() && inputs[1].isSequenceArg() &&
outputs[0].isSequenceArg())
<< "SequenceArg required here.";
const auto outGrad = dynamic_cast<const SequenceArg&>(inputs[0]);
const auto in = dynamic_cast<const SequenceArg&>(inputs[1]);
const auto w = inputs[2];
auto inGrad = dynamic_cast<const SequenceArg&>(outputs[0]);
auto wGrad = outputs[1];
CHECK_EQ(in.shape().ndims(), 2UL);
CHECK(in.shape() == inGrad.shape());
CHECK(in.shape() == outGrad.shape());
CHECK_EQ(wGrad.shape()[1], in.shape()[1]);
const auto outGMat = outGrad.matrix<Device>();
const auto inMat = in.matrix<Device>();
const auto wMat = w.matrix<Device>();
auto inGMat = inGrad.data()
? inGrad.matrix<Device>()
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
auto wGMat = wGrad.data()
? wGrad.matrix<Device>()
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
const auto seqId = in.getSequenceId().vector<int, Device>();
RowConvGrad<Device>(outGMat, inMat, wMat, inGMat, wGMat, seqId);
}
};
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace paddle {
/**
* \brief The forward of row convolution.
*
* \param[out] out The output data and shape is h x d. h is the sum of
* time steps of all samples in one mini-batch.
* \param[in] in The input data and shape is h x d.
* \param[in] filter The filter and shape is k x d. The lookahead step
* number plus one equals k.
* \param[in] seq The sequence start positions.
*
*/
template <DeviceType DType>
void RowConv(typename Tensor<real, DType>::Matrix& out,
const typename Tensor<real, DType>::Matrix& in,
const typename Tensor<real, DType>::Matrix& filter,
const typename Tensor<int, DType>::Vector& seq);
/**
* \brief The backward of row convolution.
*
* \param[in] outG The gradient w.r.t output data.
* \param[in] in The input data.
* \param[in] filter The filter.
* \param[out] inG The gradient w.r.t input data.
* \param[out] filterG The gradient w.r.t filter.
* \param[in] seq The sequence start positions.
*
*/
template <DeviceType DType>
void RowConvGrad(const typename Tensor<real, DType>::Matrix& outG,
const typename Tensor<real, DType>::Matrix& in,
const typename Tensor<real, DType>::Matrix& filter,
typename Tensor<real, DType>::Matrix& inG,
typename Tensor<real, DType>::Matrix& filterG,
const typename Tensor<int, DType>::Vector& seq);
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "RowConvOp.h"
namespace paddle {
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConv(real* y, const real* x, const real* w,
const int* starts, const int height, const int width,
const int numSeq, const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sw[BLOCK_H][BLOCK_W];
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context; ++t) {
if ((start + j + t) < end) {
int xoff = off + t * width;
real xVal = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0;
sum += sw[t][tidx] * xVal;
}
}
if (gidx + tidx < width) {
y[off + gidx + tidx] += sum;
}
}
}
}
__global__ void KeRowConv2(real* y, const real* x, const real* w,
const int* starts, const int height, const int width,
const int numSeq, const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
int off = (start + j) * width;
real sum = 0;
for (int t = 0; t < context && (start + j + t) < end; ++t) {
int xoff = off + t * width;
real xd = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0;
real wd = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0;
sum += wd * xd;
}
if (gidx + tidx < width) {
y[off + gidx + tidx] += sum;
}
}
}
}
template <>
void RowConv<DEVICE_TYPE_GPU>(GpuMatrix& out,
const GpuMatrix& in,
const GpuMatrix& filter,
const GpuIVector& seq) {
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
const size_t height = in.getHeight();
const size_t width = in.getWidth();
real* y = out.getData();
const real* x = in.getData();
const real* w = filter.getData();
const int* starts = seq.getData();
dim3 dimBlock(32, 32);
dim3 dimGrid(DIVUP(width, dimBlock.x), 1);
if (contextLength <= 32) {
KeRowConv<32, 32><<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(y, x, w, starts, height, width, numSeq, contextLength);
} else {
KeRowConv2<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(y, x, w, starts, height, width, numSeq, contextLength);
}
CHECK_SYNC("RowConv");
}
template<int BLOCK_H, int BLOCK_W, int CONTEXT>
__global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sh_x[BLOCK_W][BLOCK_H];
__shared__ real sh_dy[BLOCK_W][BLOCK_H + CONTEXT - 1];
__shared__ real sh_dw[CONTEXT][BLOCK_W];
if (tidy < context) {
sh_dw[tidy][tidx] = 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H;
for (int j = tidy; j < size; j += BLOCK_H) {
int xoff = gidx + tidx;
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0;
__syncthreads();
if (tidy < (context - 1)) {
yoff = yoff - context + 1;
sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0;
}
__syncthreads();
for (int t = 0; t < context; t++) {
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx + context - 1 - t];
__syncthreads();
// warp size and blockDim.x is 32.
val += __shfl_down(val, 16);
val += __shfl_down(val, 8);
val += __shfl_down(val, 4);
val += __shfl_down(val, 2);
val += __shfl_down(val, 1);
__syncthreads();
if (tidx == 0) {
sh_dw[t][tidy] += val;
}
__syncthreads();
}
}
}
for (int t = tidy; (t < context) && ((gidx + tidx) < width); t += blky) {
dw[t * width + gidx + tidx] += sh_dw[t][tidx];
}
}
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sh_x[BLOCK_H][BLOCK_W];
__shared__ real sh_dy[BLOCK_H][BLOCK_W];
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H;
for (int j = tidy; j < size; j += BLOCK_H) {
int xoff = gidx + tidx;
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
__syncthreads();
for (int t = 0; t < context; t++) {
sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
__syncthreads();
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];
__syncthreads();
// warp size and blockDim.x is 32.
val += __shfl_down(val, 16);
val += __shfl_down(val, 8);
val += __shfl_down(val, 4);
val += __shfl_down(val, 2);
val += __shfl_down(val, 1);
__syncthreads();
if (tidx == 0 && (gidx + tidy) < width) {
dw[t*width + gidx + tidy] += val;
}
}
}
}
}
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConvBwData(real* dx, const real* w, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sw[BLOCK_H][BLOCK_W];
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context && (j - t) >= 0; ++t) {
int dyOff = off - t * width;
real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0;
sum += sw[t][tidx] * dyVal;
}
if (gidx + tidx < width) {
dx[off + gidx + tidx] += sum;
}
}
}
}
__global__ void KeRowConvBwData2(real* dx, const real* w, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context && (j - t) >= 0; ++t) {
int dyOff = off - t * width;
real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0;
real wVal = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0;
sum += wVal * dyVal;
}
if (gidx + tidx < width) {
dx[off + gidx + tidx] += sum;
}
}
}
}
template <>
void RowConvGrad<DEVICE_TYPE_GPU>(const GpuMatrix& outG,
const GpuMatrix& in,
const GpuMatrix& filter,
GpuMatrix& inG,
GpuMatrix& filterG,
const GpuIVector& seq) {
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
const size_t height = in.getHeight();
const size_t width = in.getWidth();
const real* dy = outG.getData();
const real* x = in.getData();
const real* w = filter.getData();
const int* starts = seq.getData();
if (filterG) {
dim3 dimBlock(32, 32);
dim3 dimGrid(DIVUP(width, dimBlock.x), 1);
real* dw = filterG.getData();
if (contextLength <= 32) {
KeRowConvBwWeight<32, 32, 32>
<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(dw, x, dy, starts, height, width, numSeq, contextLength);
} else {
KeRowConvBwWeight2<32, 32>
<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(dw, x, dy, starts, height, width, numSeq, contextLength);
}
}
if (inG) {
real* dx = inG.getData();
dim3 dimBlock2(32, 32);
dim3 dimGrid2(DIVUP(width, dimBlock2.x), 1);
if (contextLength <= 64) {
KeRowConvBwData<32, 64>
<<<dimGrid2, dimBlock2, 0, STREAM_DEFAULT>>>
(dx, w, dy, starts, height, width, numSeq, contextLength);
} else {
KeRowConvBwData2
<<<dimGrid2, dimBlock2, 0, STREAM_DEFAULT>>>
(dx, w, dy, starts, height, width, numSeq, contextLength);
}
}
CHECK_SYNC("RowConvGrad");
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "FunctionTest.h"
namespace paddle {
void testRowConvFw(size_t batchSize, size_t dim, size_t contextLength) {
CpuGpuFuncCompare test("RowConv", FuncConfig());
test.addSequence(SequenceIdArg(TensorShape{batchSize}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}));
test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}),
ADD_TO);
test.run();
}
void testRowConvBw(size_t batchSize, size_t dim, size_t contextLength) {
CpuGpuFuncCompare test("RowConvGrad", FuncConfig());
test.addSequence(SequenceIdArg(TensorShape{batchSize}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}));
test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}),
ADD_TO);
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}),
ADD_TO);
test.run();
}
TEST(RowConv, real) {
for (size_t numSamples : {17, 129, 2020}) {
for (size_t dim : {16, 512, 2560}) {
for (size_t context : {3, 19, 65}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim
<< " context length=" << context;
testRowConvFw(numSamples, dim, context);
testRowConvBw(numSamples, dim, context);
}
}
}
}
} // namespace paddle
...@@ -118,11 +118,7 @@ size_t ConvBaseLayer::calOutputSize() { ...@@ -118,11 +118,7 @@ size_t ConvBaseLayer::calOutputSize() {
layerSize = outH[0] * outW[0] * size_t(numFilters_); layerSize = outH[0] * outW[0] * size_t(numFilters_);
}; };
if (isDeconv_) {
setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_);
} else {
setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_); setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_);
}
return layerSize; return layerSize;
} }
......
...@@ -70,14 +70,8 @@ void CudnnConvBaseLayer::forward(PassType passType) { ...@@ -70,14 +70,8 @@ void CudnnConvBaseLayer::forward(PassType passType) {
if (biases_) { if (biases_) {
REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str()); REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str());
int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
int outH, outW; int outH = outputH_[0];
if (isDeconv_) { int outW = outputW_[0];
outH = imgSizeH_[0];
outW = imgSizeW_[0];
} else {
outH = outputH_[0];
outW = outputW_[0];
}
hl_tensor_reshape(outputDesc_, hl_tensor_reshape(outputDesc_,
batchSize, batchSize,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DetectionUtil.h"
namespace paddle {
size_t appendWithPermute(const Matrix& inMatrix,
size_t height,
size_t width,
size_t outTotalSize,
size_t outOffset,
size_t batchSize,
Matrix& outMatrix,
PermMode permMode) {
CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu());
bool useGpu = inMatrix.useGpu();
if (permMode == kNCHWToNHWC) {
size_t inElementCnt = inMatrix.getElementCnt();
size_t channels = inElementCnt / (height * width * batchSize);
size_t imgSize = height * width;
for (size_t i = 0; i < batchSize; ++i) {
size_t offset = i * (outTotalSize / batchSize) + outOffset;
const MatrixPtr inTmp = Matrix::create(
const_cast<real*>(inMatrix.getData()) + i * channels * imgSize,
channels,
imgSize,
false,
useGpu);
MatrixPtr outTmp =
Matrix::create(const_cast<real*>(outMatrix.getData()) + offset,
imgSize,
channels,
false,
useGpu);
inTmp->transpose(outTmp, false);
}
return channels * imgSize;
} else {
LOG(FATAL) << "Unkown permute mode";
}
}
size_t decomposeWithPermute(const Matrix& inMatrix,
size_t height,
size_t width,
size_t inTotalSize,
size_t inOffset,
size_t batchSize,
Matrix& outMatrix,
PermMode permMode) {
CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu());
bool useGpu = inMatrix.useGpu();
if (permMode == kNHWCToNCHW) {
size_t outElementCnt = outMatrix.getElementCnt();
size_t channels = outElementCnt / (height * width * batchSize);
size_t imgSize = height * width;
for (size_t i = 0; i < batchSize; ++i) {
size_t offset = i * (inTotalSize / batchSize) + inOffset;
const MatrixPtr inTmp =
Matrix::create(const_cast<real*>(inMatrix.getData()) + offset,
imgSize,
channels,
false,
useGpu);
MatrixPtr outTmp = Matrix::create(
const_cast<real*>(outMatrix.getData()) + i * channels * imgSize,
channels,
imgSize,
false,
useGpu);
inTmp->transpose(outTmp, false);
}
return channels * imgSize;
} else {
LOG(FATAL) << "Unkown permute mode";
}
}
real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2) {
if (bbox2.xMin > bbox1.xMax || bbox2.xMax < bbox1.xMin ||
bbox2.yMin > bbox1.yMax || bbox2.yMax < bbox1.yMin) {
return 0.0;
} else {
real interXMin = std::max(bbox1.xMin, bbox2.xMin);
real interYMin = std::max(bbox1.yMin, bbox2.yMin);
real interXMax = std::min(bbox1.xMax, bbox2.xMax);
real interYMax = std::min(bbox1.yMax, bbox2.yMax);
real interWidth = interXMax - interXMin;
real interHeight = interYMax - interYMin;
real interArea = interWidth * interHeight;
real bboxArea1 = bbox1.getArea();
real bboxArea2 = bbox2.getArea();
return interArea / (bboxArea1 + bboxArea2 - interArea);
}
}
void encodeBBoxWithVar(const NormalizedBBox& priorBBox,
const vector<real>& priorBBoxVar,
const NormalizedBBox& gtBBox,
vector<real>& outVec) {
real priorBBoxWidth = priorBBox.getWidth();
real priorBBoxHeight = priorBBox.getHeight();
real priorBBoxCenterX = priorBBox.getCenterX();
real priorBBoxCenterY = priorBBox.getCenterY();
real gtBBoxWidth = gtBBox.getWidth();
real gtBBoxHeight = gtBBox.getHeight();
real gtBBoxCenterX = gtBBox.getCenterX();
real gtBBoxCenterY = gtBBox.getCenterY();
outVec.clear();
outVec.push_back((gtBBoxCenterX - priorBBoxCenterX) / priorBBoxWidth /
priorBBoxVar[0]);
outVec.push_back((gtBBoxCenterY - priorBBoxCenterY) / priorBBoxHeight /
priorBBoxVar[1]);
outVec.push_back(std::log(std::fabs(gtBBoxWidth / priorBBoxWidth)) /
priorBBoxVar[2]);
outVec.push_back(std::log(std::fabs(gtBBoxHeight / priorBBoxHeight)) /
priorBBoxVar[3]);
}
NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox,
const vector<real>& priorBBoxVar,
const vector<real>& locPredData) {
real priorBBoxWidth = priorBBox.getWidth();
real priorBBoxHeight = priorBBox.getHeight();
real priorBBoxCenterX = priorBBox.getCenterX();
real priorBBoxCenterY = priorBBox.getCenterY();
real decodedBBoxCenterX =
priorBBoxVar[0] * locPredData[0] * priorBBoxWidth + priorBBoxCenterX;
real decodedBBoxCenterY =
priorBBoxVar[1] * locPredData[1] * priorBBoxHeight + priorBBoxCenterY;
real decodedBBoxWidth =
std::exp(priorBBoxVar[2] * locPredData[2]) * priorBBoxWidth;
real decodedBBoxHeight =
std::exp(priorBBoxVar[3] * locPredData[3]) * priorBBoxHeight;
NormalizedBBox decodedBBox;
decodedBBox.xMin = decodedBBoxCenterX - decodedBBoxWidth / 2;
decodedBBox.yMin = decodedBBoxCenterY - decodedBBoxHeight / 2;
decodedBBox.xMax = decodedBBoxCenterX + decodedBBoxWidth / 2;
decodedBBox.yMax = decodedBBoxCenterY + decodedBBoxHeight / 2;
return decodedBBox;
}
void getBBoxFromPriorData(const real* priorData,
const size_t numBBoxes,
vector<NormalizedBBox>& bboxVec) {
size_t outOffset = bboxVec.size();
bboxVec.resize(bboxVec.size() + numBBoxes);
for (size_t i = 0; i < numBBoxes; ++i) {
NormalizedBBox bbox;
bbox.xMin = *(priorData + i * 8);
bbox.yMin = *(priorData + i * 8 + 1);
bbox.xMax = *(priorData + i * 8 + 2);
bbox.yMax = *(priorData + i * 8 + 3);
bboxVec[outOffset + i] = bbox;
}
}
void getBBoxVarFromPriorData(const real* priorData,
const size_t num,
vector<vector<real>>& varVec) {
size_t outOffset = varVec.size();
varVec.resize(varVec.size() + num);
for (size_t i = 0; i < num; ++i) {
vector<real> var;
var.push_back(*(priorData + i * 8 + 4));
var.push_back(*(priorData + i * 8 + 5));
var.push_back(*(priorData + i * 8 + 6));
var.push_back(*(priorData + i * 8 + 7));
varVec[outOffset + i] = var;
}
}
void getBBoxFromLabelData(const real* labelData,
const size_t numBBoxes,
vector<NormalizedBBox>& bboxVec) {
size_t outOffset = bboxVec.size();
bboxVec.resize(bboxVec.size() + numBBoxes);
for (size_t i = 0; i < numBBoxes; ++i) {
NormalizedBBox bbox;
bbox.xMin = *(labelData + i * 6 + 1);
bbox.yMin = *(labelData + i * 6 + 2);
bbox.xMax = *(labelData + i * 6 + 3);
bbox.yMax = *(labelData + i * 6 + 4);
real isDifficult = *(labelData + i * 6 + 5);
if (std::abs(isDifficult - 0.0) < 1e-6)
bbox.isDifficult = false;
else
bbox.isDifficult = true;
bboxVec[outOffset + i] = bbox;
}
}
void getBBoxFromDetectData(const real* detectData,
const size_t numBBoxes,
vector<real>& labelVec,
vector<real>& scoreVec,
vector<NormalizedBBox>& bboxVec) {
size_t outOffset = bboxVec.size();
labelVec.resize(outOffset + numBBoxes);
scoreVec.resize(outOffset + numBBoxes);
bboxVec.resize(outOffset + numBBoxes);
for (size_t i = 0; i < numBBoxes; ++i) {
labelVec[outOffset + i] = *(detectData + i * 7 + 1);
scoreVec[outOffset + i] = *(detectData + i * 7 + 2);
NormalizedBBox bbox;
bbox.xMin = *(detectData + i * 7 + 3);
bbox.yMin = *(detectData + i * 7 + 4);
bbox.xMax = *(detectData + i * 7 + 5);
bbox.yMax = *(detectData + i * 7 + 6);
bboxVec[outOffset + i] = bbox;
}
}
void matchBBox(const vector<NormalizedBBox>& priorBBoxes,
const vector<NormalizedBBox>& gtBBoxes,
real overlapThreshold,
vector<int>* matchIndices,
vector<real>* matchOverlaps) {
map<size_t, map<size_t, real>> overlaps;
size_t numPriors = priorBBoxes.size();
size_t numGTs = gtBBoxes.size();
matchIndices->clear();
matchIndices->resize(numPriors, -1);
matchOverlaps->clear();
matchOverlaps->resize(numPriors, 0.0);
// Store the positive overlap between predictions and ground truth
for (size_t i = 0; i < numPriors; ++i) {
for (size_t j = 0; j < numGTs; ++j) {
real overlap = jaccardOverlap(priorBBoxes[i], gtBBoxes[j]);
if (overlap > 1e-6) {
(*matchOverlaps)[i] = std::max((*matchOverlaps)[i], overlap);
overlaps[i][j] = overlap;
}
}
}
// Bipartite matching
vector<int> gtPool;
for (size_t i = 0; i < numGTs; ++i) {
gtPool.push_back(i);
}
while (gtPool.size() > 0) {
// Find the most overlapped gt and corresponding predictions
int maxPriorIdx = -1;
int maxGTIdx = -1;
real maxOverlap = -1.0;
for (map<size_t, map<size_t, real>>::iterator it = overlaps.begin();
it != overlaps.end();
++it) {
size_t i = it->first;
if ((*matchIndices)[i] != -1) {
// The prediction already has matched ground truth or is ignored
continue;
}
for (size_t p = 0; p < gtPool.size(); ++p) {
int j = gtPool[p];
if (it->second.find(j) == it->second.end()) {
// No overlap between the i-th prediction and j-th ground truth
continue;
}
// Find the maximum overlapped pair
if (it->second[j] > maxOverlap) {
maxPriorIdx = (int)i;
maxGTIdx = (int)j;
maxOverlap = it->second[j];
}
}
}
if (maxPriorIdx == -1) {
break;
} else {
(*matchIndices)[maxPriorIdx] = maxGTIdx;
(*matchOverlaps)[maxPriorIdx] = maxOverlap;
gtPool.erase(std::find(gtPool.begin(), gtPool.end(), maxGTIdx));
}
}
// Get most overlaped for the rest prediction bboxes
for (map<size_t, map<size_t, real>>::iterator it = overlaps.begin();
it != overlaps.end();
++it) {
size_t i = it->first;
if ((*matchIndices)[i] != -1) {
// The prediction already has matched ground truth or is ignored
continue;
}
int maxGTIdx = -1;
real maxOverlap = -1;
for (size_t j = 0; j < numGTs; ++j) {
if (it->second.find(j) == it->second.end()) {
// No overlap between the i-th prediction and j-th ground truth
continue;
}
// Find the maximum overlapped pair
real overlap = it->second[j];
if (overlap > maxOverlap && overlap >= overlapThreshold) {
maxGTIdx = j;
maxOverlap = overlap;
}
}
if (maxGTIdx != -1) {
(*matchIndices)[i] = maxGTIdx;
(*matchOverlaps)[i] = maxOverlap;
}
}
}
pair<size_t, size_t> generateMatchIndices(
const Matrix& priorValue,
const size_t numPriorBBoxes,
const Matrix& gtValue,
const int* gtStartPosPtr,
const size_t seqNum,
const vector<vector<real>>& maxConfScore,
const size_t batchSize,
const real overlapThreshold,
const real negOverlapThreshold,
const size_t negPosRatio,
vector<vector<int>>* matchIndicesVecPtr,
vector<vector<int>>* negIndicesVecPtr) {
vector<NormalizedBBox> priorBBoxes; // share same prior bboxes
getBBoxFromPriorData(priorValue.getData(), numPriorBBoxes, priorBBoxes);
size_t totalPos = 0;
size_t totalNeg = 0;
for (size_t n = 0; n < batchSize; ++n) {
vector<int> matchIndices;
vector<int> negIndices;
vector<real> matchOverlaps;
matchIndices.resize(numPriorBBoxes, -1);
matchOverlaps.resize(numPriorBBoxes, 0.0);
size_t numGTBBoxes = 0;
if (n < seqNum) numGTBBoxes = gtStartPosPtr[n + 1] - gtStartPosPtr[n];
if (!numGTBBoxes) {
matchIndicesVecPtr->push_back(matchIndices);
negIndicesVecPtr->push_back(negIndices);
continue;
}
vector<NormalizedBBox> gtBBoxes;
getBBoxFromLabelData(
gtValue.getData() + gtStartPosPtr[n] * 6, numGTBBoxes, gtBBoxes);
matchBBox(
priorBBoxes, gtBBoxes, overlapThreshold, &matchIndices, &matchOverlaps);
size_t numPos = 0;
size_t numNeg = 0;
for (size_t i = 0; i < matchIndices.size(); ++i)
if (matchIndices[i] != -1) ++numPos;
totalPos += numPos;
vector<pair<real, size_t>> scoresIndices;
for (size_t i = 0; i < matchIndices.size(); ++i)
if (matchIndices[i] == -1 && matchOverlaps[i] < negOverlapThreshold) {
scoresIndices.push_back(std::make_pair(maxConfScore[n][i], i));
++numNeg;
}
numNeg = std::min(static_cast<size_t>(numPos * negPosRatio), numNeg);
std::sort(scoresIndices.begin(),
scoresIndices.end(),
sortScorePairDescend<size_t>);
for (size_t i = 0; i < numNeg; ++i)
negIndices.push_back(scoresIndices[i].second);
totalNeg += numNeg;
matchIndicesVecPtr->push_back(matchIndices);
negIndicesVecPtr->push_back(negIndices);
}
return std::make_pair(totalPos, totalNeg);
}
void getMaxConfidenceScores(const real* confData,
const size_t batchSize,
const size_t numPriorBBoxes,
const size_t numClasses,
const size_t backgroundId,
vector<vector<real>>* maxConfScoreVecPtr) {
maxConfScoreVecPtr->clear();
for (size_t i = 0; i < batchSize; ++i) {
vector<real> maxConfScore;
for (size_t j = 0; j < numPriorBBoxes; ++j) {
int offset = j * numClasses;
real maxVal = -FLT_MAX;
real maxPosVal = -FLT_MAX;
real maxScore = 0.0;
for (size_t c = 0; c < numClasses; ++c) {
maxVal = std::max<real>(confData[offset + c], maxVal);
if (c != backgroundId)
maxPosVal = std::max<real>(confData[offset + c], maxPosVal);
}
real sum = 0.0;
for (size_t c = 0; c < numClasses; ++c)
sum += std::exp(confData[offset + c] - maxVal);
maxScore = std::exp(maxPosVal - maxVal) / sum;
maxConfScore.push_back(maxScore);
}
confData += numPriorBBoxes * numClasses;
maxConfScoreVecPtr->push_back(maxConfScore);
}
}
template <typename T>
bool sortScorePairDescend(const pair<real, T>& pair1,
const pair<real, T>& pair2) {
return pair1.first > pair2.first;
}
template <>
bool sortScorePairDescend(const pair<real, NormalizedBBox>& pair1,
const pair<real, NormalizedBBox>& pair2) {
return pair1.first > pair2.first;
}
void applyNMSFast(const vector<NormalizedBBox>& bboxes,
const real* confScoreData,
size_t classIdx,
size_t topK,
real confThreshold,
real nmsThreshold,
size_t numPriorBBoxes,
size_t numClasses,
vector<size_t>* indices) {
vector<pair<real, size_t>> scores;
for (size_t i = 0; i < numPriorBBoxes; ++i) {
size_t confOffset = i * numClasses + classIdx;
if (confScoreData[confOffset] > confThreshold)
scores.push_back(std::make_pair(confScoreData[confOffset], i));
}
std::stable_sort(scores.begin(), scores.end(), sortScorePairDescend<size_t>);
if (topK > 0 && topK < scores.size()) scores.resize(topK);
while (scores.size() > 0) {
const size_t idx = scores.front().second;
bool keep = true;
for (size_t i = 0; i < indices->size(); ++i) {
if (keep) {
const size_t savedIdx = (*indices)[i];
real overlap = jaccardOverlap(bboxes[idx], bboxes[savedIdx]);
keep = overlap <= nmsThreshold;
} else {
break;
}
}
if (keep) indices->push_back(idx);
scores.erase(scores.begin());
}
}
size_t getDetectionIndices(
const real* confData,
const size_t numPriorBBoxes,
const size_t numClasses,
const size_t backgroundId,
const size_t batchSize,
const size_t confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
const vector<vector<NormalizedBBox>>& allDecodedBBoxes,
vector<map<size_t, vector<size_t>>>* allDetectionIndices) {
size_t totalKeepNum = 0;
for (size_t n = 0; n < batchSize; ++n) {
const vector<NormalizedBBox>& decodedBBoxes = allDecodedBBoxes[n];
size_t numDetected = 0;
map<size_t, vector<size_t>> indices;
size_t confOffset = n * numPriorBBoxes * numClasses;
for (size_t c = 0; c < numClasses; ++c) {
if (c == backgroundId) continue;
applyNMSFast(decodedBBoxes,
confData + confOffset,
c,
nmsTopK,
confThreshold,
nmsThreshold,
numPriorBBoxes,
numClasses,
&(indices[c]));
numDetected += indices[c].size();
}
if (keepTopK > 0 && numDetected > keepTopK) {
vector<pair<real, pair<size_t, size_t>>> scoreIndexPairs;
for (size_t c = 0; c < numClasses; ++c) {
const vector<size_t>& labelIndices = indices[c];
for (size_t i = 0; i < labelIndices.size(); ++i) {
size_t idx = labelIndices[i];
scoreIndexPairs.push_back(
std::make_pair((confData + confOffset)[idx * numClasses + c],
std::make_pair(c, idx)));
}
}
std::sort(scoreIndexPairs.begin(),
scoreIndexPairs.end(),
sortScorePairDescend<pair<size_t, size_t>>);
scoreIndexPairs.resize(keepTopK);
map<size_t, vector<size_t>> newIndices;
for (size_t i = 0; i < scoreIndexPairs.size(); ++i) {
size_t label = scoreIndexPairs[i].second.first;
size_t idx = scoreIndexPairs[i].second.second;
newIndices[label].push_back(idx);
}
allDetectionIndices->push_back(newIndices);
totalKeepNum += keepTopK;
} else {
allDetectionIndices->push_back(indices);
totalKeepNum += numDetected;
}
}
return totalKeepNum;
}
void getDetectionOutput(const real* confData,
const size_t numKept,
const size_t numPriorBBoxes,
const size_t numClasses,
const size_t batchSize,
const vector<map<size_t, vector<size_t>>>& allIndices,
const vector<vector<NormalizedBBox>>& allDecodedBBoxes,
Matrix& out) {
MatrixPtr outBuffer;
Matrix::resizeOrCreate(outBuffer, numKept, 7, false, false);
real* bufferData = outBuffer->getData();
size_t count = 0;
for (size_t n = 0; n < batchSize; ++n) {
for (map<size_t, vector<size_t>>::const_iterator it = allIndices[n].begin();
it != allIndices[n].end();
++it) {
size_t label = it->first;
const vector<size_t>& indices = it->second;
const vector<NormalizedBBox>& decodedBBoxes = allDecodedBBoxes[n];
for (size_t i = 0; i < indices.size(); ++i) {
size_t idx = indices[i];
size_t confOffset = n * numPriorBBoxes * numClasses + idx * numClasses;
bufferData[count * 7] = n;
bufferData[count * 7 + 1] = label;
bufferData[count * 7 + 2] = (confData + confOffset)[label];
NormalizedBBox clippedBBox = clipBBox(decodedBBoxes[idx]);
bufferData[count * 7 + 3] = clippedBBox.xMin;
bufferData[count * 7 + 4] = clippedBBox.yMin;
bufferData[count * 7 + 5] = clippedBBox.xMax;
bufferData[count * 7 + 6] = clippedBBox.yMax;
++count;
}
}
}
out.copyFrom(bufferData, numKept * 7);
}
NormalizedBBox clipBBox(const NormalizedBBox& bbox) {
real realOne = static_cast<real>(1.0);
real realZero = static_cast<real>(0.0);
NormalizedBBox clippedBBox;
clippedBBox.xMin = std::max(std::min(bbox.xMin, realOne), realZero);
clippedBBox.yMin = std::max(std::min(bbox.yMin, realOne), realZero);
clippedBBox.xMax = std::max(std::min(bbox.xMax, realOne), realZero);
clippedBBox.yMax = std::max(std::min(bbox.yMax, realOne), realZero);
return clippedBBox;
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <float.h>
#include <algorithm>
#include <vector>
#include "paddle/math/Matrix.h"
using std::vector;
using std::pair;
using std::map;
namespace paddle {
template <typename T>
struct BBoxBase {
BBoxBase(T xMin, T yMin, T xMax, T yMax)
: xMin(xMin), yMin(yMin), xMax(xMax), yMax(yMax), isDifficult(false) {}
BBoxBase() {}
T getWidth() const { return xMax - xMin; }
T getHeight() const { return yMax - yMin; }
T getCenterX() const { return (xMin + xMax) / 2; }
T getCenterY() const { return (yMin + yMax) / 2; }
T getArea() const { return getWidth() * getHeight(); }
// coordinate of bounding box
T xMin;
T yMin;
T xMax;
T yMax;
// whether difficult object (e.g. object with heavy occlusion is difficult)
bool isDifficult;
};
struct NormalizedBBox : BBoxBase<real> {
NormalizedBBox() : BBoxBase<real>() {}
};
enum PermMode { kNCHWToNHWC, kNHWCToNCHW };
/**
* @brief First permute input maxtrix then append to output matrix
*/
size_t appendWithPermute(const Matrix& inMatrix,
size_t height,
size_t width,
size_t outTotalSize,
size_t outOffset,
size_t batchSize,
Matrix& outMatrix,
PermMode permMode);
/**
* @brief First permute input maxtrix then decompose to output
*/
size_t decomposeWithPermute(const Matrix& inMatrix,
size_t height,
size_t width,
size_t totalSize,
size_t offset,
size_t batchSize,
Matrix& outMatrix,
PermMode permMode);
/**
* @brief Compute jaccard overlap between two bboxes.
* @param bbox1 The first bbox
* @param bbox2 The second bbox
*/
real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2);
/**
* @brief Compute offset parameters between prior bbox and ground truth bbox
* and variances of prior bbox are considered
* @param priorBBox Input prior bbox
* @param priorBBoxVar Variance parameters of prior bbox
* @param gtBBox Groundtruth bbox
* @param outVec Output vector
*/
void encodeBBoxWithVar(const NormalizedBBox& priorBBox,
const vector<real>& priorBBoxVar,
const NormalizedBBox& gtBBox,
vector<real>& outVec);
/**
* @brief Decode prior bbox with offset parameters
* and variances of prior bbox are considered
* @param priorBBox Prior bbox to be decoded
* @param priorBBoxVar Variance parameters of prior bbox
* @param locPredData Offset parameters
*/
NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox,
const vector<real>& priorBBoxVar,
const vector<real>& locPredData);
/**
* @brief Extract bboxes from prior matrix, the layout is
* xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ...
* @param priorData Matrix of prior value
* @param numBBoxes Number of bbox to be extracted
* @param bboxVec Append to the vector
*/
void getBBoxFromPriorData(const real* priorData,
const size_t numBBoxes,
vector<NormalizedBBox>& bboxVec);
/**
* @brief Extract labels, scores and bboxes from detection matrix, the layout is
* imageId | label | score | xmin | ymin | xmax | ymax
* @param detectData Matrix of detection value
* @param numBBoxes Number of bbox to be extracted
* @param labelVec Label of bbox
* @param scoreVec Score of bbox
* @param bboxVec Append to the vector
*/
void getBBoxFromDetectData(const real* detectData,
const size_t numBBoxes,
vector<real>& labelVec,
vector<real>& scoreVec,
vector<NormalizedBBox>& bboxVec);
/**
* @brief Extract variances from prior matrix, the layout is
* xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ...
* @param priorData Matrix of prior value
* @param num Number to be extracted
* @param varVec Append to the vector
*/
void getBBoxVarFromPriorData(const real* priorData,
const size_t num,
vector<vector<real>>& varVec);
/**
* @brief Extract bboxes from label matrix, the layout is
* class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ...
* @param labelData Matrix of label value
* @param numBBoxes Number to be extracted
* @param bboxVec Append to the vector
*/
void getBBoxFromLabelData(const real* labelData,
const size_t numBBoxes,
vector<NormalizedBBox>& bboxVec);
/**
* @brief Match prior bbox to groundtruth bbox, the strategy is:
1. Find the most overlaped bbox pair (prior and groundtruth)
2. For rest of prior bboxes find the most overlaped groundtruth bbox
* @param priorBBoxes prior bbox
* @param gtBBoxes groundtruth bbox
* @param overlapThreshold Low boundary of overlap (judge whether matched)
* @param matchIndices For each prior bbox, groundtruth bbox index if matched
otherwise -1
* @param matchOverlaps For each prior bbox, overap with all groundtruth bboxes
*/
void matchBBox(const vector<NormalizedBBox>& priorBBoxes,
const vector<NormalizedBBox>& gtBBoxes,
real overlapThreshold,
vector<int>* matchIndices,
vector<real>* matchOverlaps);
/**
* @brief Generate positive bboxes and negative bboxes,
|positive bboxes|/|negative bboxes| is negPosRatio
* @param priorValue Prior value
* @param numPriorBBoxes Number of prior bbox
* @param gtValue Groundtruth value
* @param gtStartPosPtr Since groundtruth value stored as sequence type,
this parameter indicates start position of each record
* @param seqNum Number of sequence
* @param maxConfScore Classification score for prior bbox, used to mine
negative examples
* @param batchSize Image number
* @param overlapThreshold Low boundary of overap
* @param negOverlapThreshold Upper boundary of overap (judge negative example)
* @param negPosRatio Control number of negative bboxes
* @param matchIndicesVecPtr Save indices of matched prior bbox
* @param negIndicesVecPtr Save indices of negative prior bbox
*/
pair<size_t, size_t> generateMatchIndices(
const Matrix& priorValue,
const size_t numPriorBBoxes,
const Matrix& gtValue,
const int* gtStartPosPtr,
const size_t seqNum,
const vector<vector<real>>& maxConfScore,
const size_t batchSize,
const real overlapThreshold,
const real negOverlapThreshold,
const size_t negPosRatio,
vector<vector<int>>* matchIndicesVecPtr,
vector<vector<int>>* negIndicesVecPtr);
/**
* @brief Get max confidence score for each prior bbox
* @param confData Confidence scores, layout is
* class1 score | class2 score | ... | classN score ...
* @param batchSize Image number
* @param numPriorBBoxes Prior bbox number
* @param numClasses Classes number
* @param backgroundId Background id
* @param maxConfScoreVecPtr Ouput
*/
void getMaxConfidenceScores(const real* confData,
const size_t batchSize,
const size_t numPriorBBoxes,
const size_t numClasses,
const size_t backgroundId,
vector<vector<real>>* maxConfScoreVecPtr);
template <typename T>
bool sortScorePairDescend(const pair<real, T>& pair1,
const pair<real, T>& pair2);
template <>
bool sortScorePairDescend(const pair<real, NormalizedBBox>& pair1,
const pair<real, NormalizedBBox>& pair2);
/**
* @brief Do NMS for bboxes to remove duplicated bboxes
* @param bboxes BBoxes to apply NMS
* @param confScoreData Confidence scores
* @param classIdx Class to do NMS
* @param topK Number to keep
* @param confThreshold Low boundary of confidence score
* @param nmsThreshold Threshold of overlap
* @param numPriorBBoxes Total number of prior bboxes
* @param numClasses Total class number
* @param indices Indices of high quality bboxes
*/
void applyNMSFast(const vector<NormalizedBBox>& bboxes,
const real* confScoreData,
size_t classIdx,
size_t topK,
real confThreshold,
real nmsThreshold,
size_t numPriorBBoxes,
size_t numClasses,
vector<size_t>* indices);
/**
* @brief Get detection results which satify requirements
* @param numPriorBBoxes Prior bbox number
* @param numClasses Class number
* @param backgroundId Background class
* @param batchSize Image number
* @param confThreshold Threshold of class confidence
* @param nmsTopK Used in NMS operation to keep top k bbox
* @param nmsThreshold Used in NMS, threshold of overlap
* @param keepTopK How many bboxes keeped in an image
* @param allDecodedBBoxes Decoded bboxes for all images
* @param allDetectionIndices Save detection bbox indices
*/
size_t getDetectionIndices(
const real* confData,
const size_t numPriorBBoxes,
const size_t numClasses,
const size_t backgroundId,
const size_t batchSize,
const size_t confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
const vector<vector<NormalizedBBox>>& allDecodedBBoxes,
vector<map<size_t, vector<size_t>>>* allDetectionIndices);
/**
* @brief Get detection results
* @param confData Confidence scores
* @param numPriorBBoxes Prior bbox number
* @param numClasses Class number
* @param batchSize Image number
* @param allIndices Indices of predicted bboxes
* @param allDecodedBBoxes BBoxes decoded
* @param out Output matrix
* image number | label | confidence score | xMin | yMin | xMax | yMax
*/
void getDetectionOutput(const real* confData,
const size_t numKept,
const size_t numPriorBBoxes,
const size_t numClasses,
const size_t batchSize,
const vector<map<size_t, vector<size_t>>>& allIndices,
const vector<vector<NormalizedBBox>>& allDecodedBBoxes,
Matrix& out);
NormalizedBBox clipBBox(const NormalizedBBox& bbox);
} // namespace paddle
...@@ -22,26 +22,8 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap, ...@@ -22,26 +22,8 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
/* Initialize the basic convolutional parent class */ /* Initialize the basic convolutional parent class */
ConvBaseLayer::init(layerMap, parameterMap); ConvBaseLayer::init(layerMap, parameterMap);
/* The class fields channels_ and numFilters_ are the same as in the config
* i.e., channels_ is the for the input and numFilters_ is for the output
*
* But in order for the variables in convTrans having the same semantic
* meaning as in conv, we need to swap channels_ and numFilters here for
* convTrans, and in other functions too.
* */
/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) { for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf(); const ConvConfig &conf = inputConfig.conv_conf();
int numFilters = isDeconv_ ? conf.channels() : numFilters_;
subM_.push_back(numFilters / conf.groups());
subN_.push_back(conf.output_x() *
(conf.has_output_y() ? conf.output_y() : conf.output_x()));
int channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(
channel * conf.filter_size() *
(conf.has_filter_size_y() ? conf.filter_size_y() : conf.filter_size()) /
conf.groups());
/* Consistent caffe mode for multiple input */ /* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode(); caffeMode_ = conf.caffe_mode();
} }
...@@ -54,17 +36,9 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap, ...@@ -54,17 +36,9 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
size_t ExpandConvBaseLayer::getOutputSize() { size_t ExpandConvBaseLayer::getOutputSize() {
CHECK_NE(inputLayers_.size(), 0UL); CHECK_NE(inputLayers_.size(), 0UL);
size_t layerSize = ConvBaseLayer::calOutputSize(); size_t layerSize = ConvBaseLayer::calOutputSize();
subN_.clear();
for (size_t i = 0; i < inputLayers_.size(); i++) {
subN_.push_back(outputH_[i] * outputW_[i]);
}
return layerSize; return layerSize;
} }
void ExpandConvBaseLayer::resetExpandInput(size_t height, size_t width) {
Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
}
void ExpandConvBaseLayer::addSharedBias() { void ExpandConvBaseLayer::addSharedBias() {
size_t mapW = getOutputSize() / numFilters_; size_t mapW = getOutputSize() / numFilters_;
size_t mapH = getOutputValue()->getElementCnt() / mapW; size_t mapH = getOutputValue()->getElementCnt() / mapW;
...@@ -101,173 +75,6 @@ void ExpandConvBaseLayer::addUnsharedBias() { ...@@ -101,173 +75,6 @@ void ExpandConvBaseLayer::addUnsharedBias() {
outValue->addBias(*bias, 1.0f); outValue->addBias(*bias, 1.0f);
} }
void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image,
size_t startIdx,
int inIdx) {
int channel = isDeconv_ ? numFilters_ : channels_[inIdx];
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
CHECK_EQ(image->getWidth(),
static_cast<size_t>(imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel));
real *imgData = image->getData() + startIdx * image->getWidth();
MatrixPtr imageTmp =
Matrix::create(imgData,
1,
imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel,
false,
useGpu_);
expandInput_->convExpand(*imageTmp,
imgSizeH_[inIdx],
imgSizeW_[inIdx],
channel,
filterSizeY_[inIdx],
filterSize_[inIdx],
strideY_[inIdx],
stride_[inIdx],
paddingY_[inIdx],
padding_[inIdx],
outputH_[inIdx],
outputW_[inIdx]);
imageTmp->clear();
}
void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image,
MatrixPtr out,
int inIdx,
int startIdx) {
int subM = subM_[inIdx];
int subN = subN_[inIdx];
int subK = subK_[inIdx];
expandOneFrame(image, startIdx, inIdx);
int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_;
real *outData = out->getData() + startIdx * subN * numFilters;
real *wgtData = weights_[inIdx]->getW()->getData();
real *expInData = expandInput_->getData();
for (int g = 0; g < groups_[inIdx]; ++g) {
MatrixPtr A =
Matrix::create(wgtData, subM, subK, false, useGpu_); // mark transpose
MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
C->mul(*A, *B, 1, 1);
A->clear();
B->clear();
C->clear();
wgtData += subK * subM;
expInData += subK * subN;
outData += subM * subN;
}
}
void ExpandConvBaseLayer::bpropActs(MatrixPtr out,
MatrixPtr image,
int inpIdx) {
int channel = isDeconv_ ? numFilters_ : channels_[inpIdx];
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = image->getHeight();
/* reset the expand-grad memory */
resetExpandInput(subK * groups_[inpIdx], subN);
real *localGradData = out->getData();
real *tgtGradData = image->getData();
for (size_t n = 0; n < batchSize; n++) {
real *wgtData = weights_[inpIdx]->getW()->getData();
real *expandInData = expandInput_->getData();
for (int g = 0; g < groups_[inpIdx]; g++) {
// create temporary matrix
MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
MatrixPtr A = Matrix::create(wgtData, subM, subK, true, useGpu_);
C->mul(*A, *B); // mul
// clear the temporary matrix
A->clear();
B->clear();
C->clear();
expandInData += subK * subN;
localGradData += subM * subN;
wgtData += subK * subM;
}
// shrink one frame outGrad
MatrixPtr oneGradTmp = Matrix::create(
expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
MatrixPtr vTmp =
Matrix::create(tgtGradData,
1,
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel,
false,
useGpu_);
vTmp->convShrink(*oneGradTmp,
imgSizeH_[inpIdx],
imgSizeW_[inpIdx],
channel,
filterSizeY_[inpIdx],
filterSize_[inpIdx],
strideY_[inpIdx],
stride_[inpIdx],
paddingY_[inpIdx],
padding_[inpIdx],
outputH_[inpIdx],
outputW_[inpIdx],
1.0f,
1.0f);
vTmp->clear();
oneGradTmp->clear();
// move the data-pointer
tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel;
}
}
void ExpandConvBaseLayer::bpropWeights(MatrixPtr image,
MatrixPtr out,
int inpIdx) {
MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = image->getHeight();
resetExpandInput(subK * groups_[inpIdx], subN);
real *gradData = out->getData();
for (size_t n = 0; n < batchSize; n++) { // frame by frame
// expand
expandOneFrame(image, n, inpIdx);
real *wGradData = weightGrad->getData();
real *expandInData = expandInput_->getData();
// expand-mul one-group by one
for (int g = 0; g < groups_[inpIdx]; g++) {
MatrixPtr A = Matrix::create(expandInData, subK, subN, true, useGpu_);
MatrixPtr B = Matrix::create(gradData, subM, subN, false, useGpu_);
MatrixPtr C = Matrix::create(wGradData, subM, subK, false, useGpu_);
C->mul(*B, *A, 1, 1);
A->clear();
B->clear();
C->clear();
gradData += subM * subN;
wGradData += subK * subM;
expandInData += subK * subN;
}
}
}
void ExpandConvBaseLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { void ExpandConvBaseLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
size_t mapW = getOutputSize() / numFilters_; size_t mapW = getOutputSize() / numFilters_;
size_t mapH = v->getElementCnt() / mapW; size_t mapH = v->getElementCnt() / mapW;
......
...@@ -26,19 +26,6 @@ namespace paddle { ...@@ -26,19 +26,6 @@ namespace paddle {
*/ */
class ExpandConvBaseLayer : public ConvBaseLayer { class ExpandConvBaseLayer : public ConvBaseLayer {
protected: protected:
/// For expand convolution.
/// subM_ = numFilters_ / groups_.
IntV subM_;
/// subN_ = outputH_ * outputW_.
IntV subN_;
/// subK_ = channels_ * filterPixels_ * groups_.
IntV subK_;
/*The expandInput_ and transOutValue_ are used for CPU expand conv calc
* Expand one sample at a time. shape:
* (numChannels * filterPixels_, outputSizeH * outputSizeW)
* */
MatrixPtr expandInput_;
/// The transpose of output, which is an auxiliary matrix. /// The transpose of output, which is an auxiliary matrix.
MatrixPtr transOutValue_; MatrixPtr transOutValue_;
...@@ -52,10 +39,6 @@ public: ...@@ -52,10 +39,6 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
size_t getOutputSize(); size_t getOutputSize();
/**
* Create or resize expandInput_.
*/
void resetExpandInput(size_t height, size_t width);
/** /**
* Add shared bias. * Add shared bias.
...@@ -66,20 +49,9 @@ public: ...@@ -66,20 +49,9 @@ public:
* Add unshared bias. * Add unshared bias.
*/ */
void addUnsharedBias(); void addUnsharedBias();
/**
* Expand one input sample.
*/
void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
/**
* Expand one input sample and perform matrix multiplication.
*/
void expandFwdOnce(MatrixPtr image, MatrixPtr out, int inIdx, int startIdx);
void bpropSharedBias(MatrixPtr biases, MatrixPtr v); void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
void bpropBiases(MatrixPtr v); void bpropBiases(MatrixPtr v);
void bpropWeights(MatrixPtr image, MatrixPtr out, int inpIdx);
void bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx);
}; };
} // namespace paddle } // namespace paddle
...@@ -18,32 +18,94 @@ limitations under the License. */ ...@@ -18,32 +18,94 @@ limitations under the License. */
namespace paddle { namespace paddle {
/*
* The calculation of the exconvt(convolution transpose (deconv) operation)
* is a swap of forward and backward of the calculation of exconv.
* */
REGISTER_LAYER(exconv, ExpandConvLayer); REGISTER_LAYER(exconv, ExpandConvLayer);
REGISTER_LAYER(exconvt, ExpandConvLayer);
bool ExpandConvLayer::init(const LayerMap &layerMap, bool ExpandConvLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) { const ParameterMap &parameterMap) {
/* Initialize the basic convolutional parent class */ /* Initialize the basic convolutional parent class */
ExpandConvBaseLayer::init(layerMap, parameterMap); ExpandConvBaseLayer::init(layerMap, parameterMap);
size_t numInputs = config_.inputs_size();
inputShape_.resize(numInputs);
filterShape_.resize(numInputs);
outputShape_.resize(numInputs);
for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
createFunction(forward_,
!isDeconv_ ? "GemmConv" : "GemmConvGradInput",
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)groups_[i]));
createFunction(backward_,
!isDeconv_ ? "GemmConvGradInput" : "GemmConv",
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)groups_[i]));
createFunction(backward_,
"GemmConvGradFilter",
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)groups_[i]));
}
return true; return true;
} }
// i is the index of input layers
#define BACKWARD_INPUT(i, inputs, outputs) \
backward_[2 * i]->calc(inputs, outputs)
#define BACKWARD_FILTER(i, inputs, outputs) \
backward_[2 * i + 1]->calc(inputs, outputs)
void ExpandConvLayer::forward(PassType passType) { void ExpandConvLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
/* malloc memory for the output_ if necessary */ size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
resetOutput(batchSize, getOutputSize()); resetOutput(batchSize, getOutputSize());
MatrixPtr image = nullptr; // Calculate the shape of the input, output, and filter.
MatrixPtr outV = getOutputValue();
for (size_t i = 0; i < inputLayers_.size(); ++i) { for (size_t i = 0; i < inputLayers_.size(); ++i) {
LayerPtr prevLayer = getPrev(i); inputShape_[i] = TensorShape({(size_t)batchSize,
image = prevLayer->getOutputValue(); (size_t)channels_[i],
for (size_t off = 0; off < image->getHeight(); off++) { (size_t)imgSizeH_[i],
REGISTER_TIMER_INFO("expandFwdOnce", getName().c_str()); (size_t)imgSizeW_[i]});
expandFwdOnce(image, outV, i, off); filterShape_[i] =
TensorShape({(size_t)groups_[i],
!isDeconv_ ? (size_t)numFilters_ / groups_[i]
: (size_t)channels_[i] / groups_[i],
!isDeconv_ ? (size_t)channels_[i] / groups_[i]
: (size_t)numFilters_ / groups_[i],
(size_t)filterSizeY_[i],
(size_t)filterSize_[i]});
outputShape_[i] = TensorShape({(size_t)batchSize,
(size_t)numFilters_,
(size_t)outputH_[i],
(size_t)outputW_[i]});
} }
// Calculate the output value.
for (size_t i = 0; i < inputLayers_.size(); ++i) {
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(i), inputShape_[i]);
inputs.addArg(*weights_[i]->getW(), filterShape_[i]);
outputs.addArg(*getOutputValue(),
outputShape_[i],
!isDeconv_ && i == 0 ? ASSIGN_TO : ADD_TO);
forward_[i]->calc(inputs, outputs);
} }
/* add the bias-vector */ /* add the bias-vector */
if (biases_.get()) { if (biases_.get()) {
if (sharedBiases_) { if (sharedBiases_) {
...@@ -67,14 +129,30 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) { ...@@ -67,14 +129,30 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) {
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
} }
// Calculate the input grad and filter grad.
for (size_t i = 0; i < inputLayers_.size(); ++i) { for (size_t i = 0; i < inputLayers_.size(); ++i) {
/* First, calculate the input layers error */ if (getInputGrad(i)) {
if (getPrev(i)->getOutputGrad()) { BufferArgs inputs;
bpropActs(outGrad, getPrev(i)->getOutputGrad(), i); BufferArgs outputs;
inputs.addArg(*getOutputGrad(), outputShape_[i]);
inputs.addArg(*weights_[i]->getW(), filterShape_[i]);
outputs.addArg(*getInputGrad(i), inputShape_[i], ADD_TO);
BACKWARD_INPUT(i, inputs, outputs);
} }
if (weights_[i]->getWGrad()) { if (weights_[i]->getWGrad()) {
/* Then, calculate the W-gradient for the current layer */ BufferArgs inputs;
bpropWeights(getPrev(i)->getOutputValue(), outGrad, i); BufferArgs outputs;
if (!isDeconv_) {
inputs.addArg(*getOutputGrad(), outputShape_[i]);
inputs.addArg(*getInputValue(i), inputShape_[i]);
} else {
inputs.addArg(*getInputValue(i), inputShape_[i]);
inputs.addArg(*getOutputGrad(), outputShape_[i]);
}
outputs.addArg(*weights_[i]->getWGrad(), filterShape_[i], ADD_TO);
BACKWARD_FILTER(i, inputs, outputs);
/* Increasing the number of gradient */ /* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback); weights_[i]->getParameterPtr()->incUpdate(callback);
} }
......
...@@ -40,6 +40,11 @@ public: ...@@ -40,6 +40,11 @@ public:
void forward(PassType passType) override; void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override; void backward(const UpdateCallback& callback) override;
protected:
std::vector<TensorShape> inputShape_;
std::vector<TensorShape> filterShape_;
std::vector<TensorShape> outputShape_;
}; };
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ExpandConvTransLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
/* The implementation of the convTransLayer is basically a swap of forward and
* backward of the original convLayer.
* The variable naming follows the convention of the convLayer.
* */
namespace paddle {
REGISTER_LAYER(exconvt, ExpandConvTransLayer);
bool ExpandConvTransLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
/* Initialize the basic convolutional parent class */
ExpandConvBaseLayer::init(layerMap, parameterMap);
return true;
}
void ExpandConvTransLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
resetOutput(batchSize, getOutputSize());
MatrixPtr output = nullptr;
for (size_t i = 0; i < inputLayers_.size(); ++i) {
LayerPtr prevLayer = getPrev(i);
output = prevLayer->getOutputValue();
REGISTER_TIMER_INFO("shrinkFwd", getName().c_str());
bpropActs(output, getOutputValue(), i);
}
/* add the bias-vector */
if (biases_.get()) {
if (sharedBiases_) {
addSharedBias();
} else {
addUnsharedBias();
}
}
/* activation */
forwardActivation();
}
void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
backwardActivation();
MatrixPtr imageGrad = getOutputGrad();
if (biases_ && biases_->getWGrad()) {
bpropBiases(imageGrad);
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}
for (size_t i = 0; i < inputLayers_.size(); ++i) {
/* First, calculate the input layers error */
for (size_t off = 0; off < imageGrad->getHeight(); off++) {
if (getPrev(i)->getOutputGrad()) {
expandFwdOnce(imageGrad, getPrev(i)->getOutputGrad(), i, off);
}
}
if (weights_[i]->getWGrad()) {
/* Then, calculate the W-gradient for the current layer */
bpropWeights(imageGrad, getPrev(i)->getOutputValue(), i);
/* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback);
}
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "RowConvLayer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_LAYER(row_conv, RowConvLayer);
bool RowConvLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
contexLength_ = config_.inputs(0).row_conv_conf().context_length();
CHECK_EQ(inputLayers_.size(), 1UL);
weight_.reset(new Weight(contexLength_, getSize(), parameters_[0]));
createFunction(forward_, "RowConv", FuncConfig());
createFunction(backward_, "RowConvGrad", FuncConfig());
return true;
}
void RowConvLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr input = getInputValue(0);
size_t height = input->getHeight();
size_t width = input->getWidth();
CHECK_EQ(width, getSize());
resetOutput(height, width);
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
MatrixPtr w = weight_->getW();
wDims_ = TensorShape({w->getHeight(), w->getWidth()});
MatrixPtr outV = getOutputValue();
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(0), *startPos);
inputs.addArg(*w, wDims_);
outputs.addArg(*getOutputValue(), *startPos, ADD_TO);
{
REGISTER_TIMER_INFO("RowConvForward", getName().c_str());
forward_[0]->calc(inputs, outputs);
}
/* activation */ {
REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
forwardActivation();
}
}
void RowConvLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getOutputGrad(), *startPos);
inputs.addArg(*getInputValue(0), *startPos);
inputs.addArg(*weight_->getW(), wDims_);
MatrixPtr inGrad = getInputGrad(0);
MatrixPtr wGrad = weight_->getWGrad();
size_t h = getInputValue(0)->getHeight();
size_t w = getInputValue(0)->getWidth();
outputs.addArg(
inGrad ? (*inGrad) : *(Matrix::create(nullptr, h, w, false, useGpu_)),
*startPos,
ADD_TO);
outputs.addArg(
wGrad ? (*wGrad)
: *(Matrix::create(nullptr, contexLength_, w, false, useGpu_)),
wDims_,
ADD_TO);
{
REGISTER_TIMER_INFO("RowConvBackward", getName().c_str());
backward_[0]->calc(inputs, outputs);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weight_->getParameterPtr()->incUpdate(callback);
}
}
} // namespace paddle
...@@ -14,31 +14,31 @@ limitations under the License. */ ...@@ -14,31 +14,31 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include "Layer.h"
#include "ExpandConvBaseLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle { namespace paddle {
/** /**
* @brief A subclass of convolution layer. * \brief Row Convolution Layer.
* This layer expands input and use matrix multiplication to
* calculate convolution transpose (deconv) operation.
*
* The config file api is img_conv_layer with flag trans=True.
*/ */
class ExpandConvTransLayer : public ExpandConvBaseLayer { class RowConvLayer : public Layer {
public: public:
explicit ExpandConvTransLayer(const LayerConfig& config) explicit RowConvLayer(const LayerConfig& config) : Layer(config) {}
: ExpandConvBaseLayer(config) {}
~ExpandConvTransLayer() {} ~RowConvLayer() {}
bool init(const LayerMap& layerMap, bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void forward(PassType passType) override; void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override; void backward(const UpdateCallback& callback = nullptr) override;
};
protected:
// Row convolution weight, context_lenght_ * fan_out.
// fan_out is the size of output feature.
std::unique_ptr<Weight> weight_;
// The step number to look ahead plus one equals contexLength_.
size_t contexLength_;
TensorShape wDims_;
};
} // namespace paddle } // namespace paddle
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/ExpandConvTransLayer.h"
#include "paddle/trainer/Trainer.h" #include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/ExpandConvTransLayer.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h" #include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/ExpandConvTransLayer.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h" #include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
......
...@@ -1705,6 +1705,26 @@ TEST(Layer, TransLayer) { ...@@ -1705,6 +1705,26 @@ TEST(Layer, TransLayer) {
} }
} }
TEST(Layer, RowConvLayer) {
const int context = 3;
const int size = 512;
TestConfig config;
config.layerConfig.set_type("row_conv");
config.layerConfig.set_size(size);
config.layerConfig.set_active_type("sigmoid");
config.inputDefs.push_back(
{INPUT_SEQUENCE_DATA, "layer_0", size, context * size});
LayerInputConfig* input = config.layerConfig.add_inputs();
RowConvConfig* conv = input->mutable_row_conv_conf();
conv->set_context_length(context);
for (auto useGpu : {false, true}) {
testLayerGrad(config, "row_conv", 100, false, useGpu, false);
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
...@@ -632,7 +632,7 @@ void Argument::printValueString(std::ostream& stream, ...@@ -632,7 +632,7 @@ void Argument::printValueString(std::ostream& stream,
const std::string& prefix) const { const std::string& prefix) const {
std::unordered_map<std::string, std::string> out; std::unordered_map<std::string, std::string> out;
getValueString(&out); getValueString(&out);
for (auto field : {"value", "id", "sequence pos", "sub-sequence pos"}) { for (auto field : {"value", "ids", "sequence pos", "sub-sequence pos"}) {
auto it = out.find(field); auto it = out.find(field);
if (it != out.end()) { if (it != out.end()) {
stream << prefix << field << ":\n" << it->second; stream << prefix << field << ":\n" << it->second;
......
...@@ -42,7 +42,7 @@ TEST(Argument, poolSequenceWithStride) { ...@@ -42,7 +42,7 @@ TEST(Argument, poolSequenceWithStride) {
CHECK_EQ(outStart[3], 4); CHECK_EQ(outStart[3], 4);
CHECK_EQ(outStart[4], 7); CHECK_EQ(outStart[4], 7);
CHECK_EQ(stridePositions->getSize(), 8); CHECK_EQ(stridePositions->getSize(), 8UL);
auto result = reversed ? strideResultReversed : strideResult; auto result = reversed ? strideResultReversed : strideResult;
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
CHECK_EQ(stridePositions->getData()[i], result[i]); CHECK_EQ(stridePositions->getData()[i], result[i]);
......
...@@ -383,20 +383,23 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { ...@@ -383,20 +383,23 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) {
setOption(sockfd); setOption(sockfd);
/// Now connect to the server /// Now connect to the server
int retry_second = 0; int retry_count = 0;
int error = 0;
do { do {
error = connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)); if (connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) == 0) {
if (error == ECONNREFUSED) { break;
}
if (errno == ECONNREFUSED) {
LOG(WARNING) << "connection refused by pserver, try again!"; LOG(WARNING) << "connection refused by pserver, try again!";
if (retry_second++ >= 7) { if (retry_count++ >= 7) {
LOG(FATAL) << "connection refused by pserver, maybe pserver failed!"; LOG(FATAL) << "connection refused by pserver, maybe pserver failed!";
} }
std::this_thread::sleep_for(std::chrono::seconds(1)); std::this_thread::sleep_for(std::chrono::seconds(1));
} else { } else {
PCHECK(error >= 0) << "ERROR connecting to " << serverAddr; PCHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":"
<< serverPort << "errorno: " << errno;
} }
} while (error == ECONNREFUSED); } while (errno == ECONNREFUSED);
channel_.reset(new SocketChannel(sockfd, serverAddr)); channel_.reset(new SocketChannel(sockfd, serverAddr));
tcpRdma_ = F_TCP; tcpRdma_ = F_TCP;
......
...@@ -58,7 +58,7 @@ EOF ...@@ -58,7 +58,7 @@ EOF
make -j `nproc` make -j `nproc`
if [ ${WITH_TESTING:-OFF} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then if [ ${WITH_TESTING:-OFF} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then
pip uninstall -y py-paddle paddle || true pip uninstall -y py-paddle paddle || true
ctest -V ctest --output-on-failure
fi fi
......
cc_library(stringpiece SRCS stringpiece.cc)
cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags)
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/strings/stringpiece.h"
#include <string.h>
#include <algorithm>
#include <iosfwd>
#include <stdexcept>
namespace paddle {
StringPiece::StringPiece() : data_(NULL), size_(0) {}
StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) {
if (d == NULL && n != 0)
throw std::invalid_argument(
"StringPiece requires len to be 0 for NULL data");
}
StringPiece::StringPiece(const char* s) : data_(s) {
size_ = (s == NULL) ? 0 : strlen(s);
}
StringPiece::StringPiece(const std::string& s)
: data_(s.data()), size_(s.size()) {}
char StringPiece::operator[](size_t n) const {
if (n >= len())
throw std::invalid_argument("index out of StringPiece length");
return data_[n];
}
int Compare(StringPiece a, StringPiece b) {
const size_t min_len = (a.len() < b.len()) ? a.len() : b.len();
int r = memcmp(a.data(), b.data(), min_len);
if (r == 0) {
if (a.len() < b.len())
return -1;
else if (a.len() > b.len())
return 1;
}
return r;
}
bool operator==(StringPiece x, StringPiece y) {
return ((x.len() == y.len()) &&
(x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0));
}
bool operator!=(StringPiece x, StringPiece y) { return !(x == y); }
bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; }
bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; }
bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; }
bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; }
bool HasPrefix(StringPiece s, StringPiece x) {
return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0));
}
bool HasSuffix(StringPiece s, StringPiece x) {
return ((s.len() >= x.len()) &&
(memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0));
}
StringPiece SkipPrefix(StringPiece s, size_t n) {
if (n > s.len())
throw std::invalid_argument("Skip distance larger than StringPiece length");
return StringPiece(s.data() + n, s.len() - n);
}
StringPiece SkipSuffix(StringPiece s, size_t n) {
if (n > s.len())
throw std::invalid_argument("Skip distance larger than StringPiece length");
return StringPiece(s.data(), s.len() - n);
}
StringPiece TrimPrefix(StringPiece s, StringPiece x) {
return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s;
}
StringPiece TrimSuffix(StringPiece s, StringPiece x) {
return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s;
}
bool Contains(StringPiece s, StringPiece sub) {
return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end();
}
size_t Index(StringPiece s, StringPiece sub) {
auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end());
return e != s.end() ? e - s.data() : StringPiece::npos;
}
size_t Find(StringPiece s, char c, size_t pos) {
if (pos >= s.len()) {
return StringPiece::npos;
}
const char* result =
reinterpret_cast<const char*>(memchr(s.data() + pos, c, s.len() - pos));
return result != nullptr ? result - s.data() : StringPiece::npos;
}
size_t RFind(StringPiece s, char c, size_t pos) {
if (s.len() == 0) return StringPiece::npos;
for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data();
p--) {
if (*p == c) {
return p - s.data();
}
}
return StringPiece::npos;
}
StringPiece SubStr(StringPiece s, size_t pos, size_t n) {
if (pos > s.len()) pos = s.len();
if (n > s.len() - pos) n = s.len() - pos;
return StringPiece(s.data() + pos, n);
}
std::ostream& operator<<(std::ostream& o, StringPiece piece) {
return o << piece.ToString();
}
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#pragma once
#include <ostream>
#include <string>
namespace paddle {
// StringPiece points into a std::string object but doesn't own the
// string. It is for efficient access to strings. Like Go's string
// type. Not that StringPiece doesn't mutate the underlying string,
// so it is thread-safe given that the underlying string doesn't
// change. Because StringPiece contains a little data members, and
// its syntax is simple as it doesn't own/manage the string, it is
// cheap to construct StringPieces and pass them around.
class StringPiece {
public:
static const size_t npos = static_cast<size_t>(-1);
// We provide non-explicit singleton constructors so users can
// pass in a "const char*" or a "string" wherever a "StringPiece"
// is expected. These contructors ensure that if data_ is NULL,
// size_ is 0.
StringPiece();
StringPiece(const char* d, size_t n);
StringPiece(const char* d);
StringPiece(const std::string& s);
const char* data() const { return data_; }
size_t len() const { return size_; }
char operator[](size_t n) const;
// StringPiece doesn't own the string, so both iterator and const
// iterator are const char* indeed.
typedef const char* const_iterator;
typedef const char* iterator;
iterator begin() const { return data_; }
iterator end() const { return data_ + size_; }
// Return a string that contains the copy of the referenced data.
std::string ToString() const { return std::string(data_, size_); }
private:
const char* data_;
size_t size_;
// Intentionally copyable
};
int Compare(StringPiece a, StringPiece b);
bool operator==(StringPiece x, StringPiece y);
bool operator!=(StringPiece x, StringPiece y);
bool operator<(StringPiece x, StringPiece y);
bool operator>(StringPiece x, StringPiece y);
bool operator<=(StringPiece x, StringPiece y);
bool operator>=(StringPiece x, StringPiece y);
bool HasPrefix(StringPiece s, StringPiece prefix);
bool HasSuffix(StringPiece s, StringPiece suffix);
StringPiece SkipPrefix(StringPiece s, size_t n);
StringPiece SkipSuffix(StringPiece s, size_t n);
// Skip the prefix (or suffix) if it matches with the string.
StringPiece TrimPrefix(StringPiece s, StringPiece prefix);
StringPiece TrimSuffix(StringPiece s, StringPiece suffix);
// Returns if s contains sub. Any s except for empty s contains an
// empty sub.
bool Contains(StringPiece s, StringPiece sub);
// Return the first occurrence of sub in s, or npos. If both s and
// sub is empty, it returns npos; otherwise, if only sub is empty, it
// returns 0.
size_t Index(StringPiece s, StringPiece sub);
// Return the first occurrence of c in s[pos:end], or npos.
size_t Find(StringPiece s, char c, size_t pos);
// Search range is [0..pos] inclusive. If pos == npos, search everything.
size_t RFind(StringPiece s, char c, size_t pos);
StringPiece SubStr(StringPiece s, size_t pos, size_t n);
// allow StringPiece to be logged
std::ostream& operator<<(std::ostream& o, StringPiece piece);
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/strings/stringpiece.h"
#include <sstream>
#include "gtest/gtest.h"
TEST(StringPiece, Construct) {
{
paddle::StringPiece s;
EXPECT_EQ(NULL, s.data());
EXPECT_EQ(0U, s.len());
}
{ EXPECT_THROW(paddle::StringPiece s(NULL, 10000U), std::invalid_argument); }
{
paddle::StringPiece s(NULL);
EXPECT_EQ(0U, s.len());
}
{
std::string a;
EXPECT_EQ(0U, a.size());
paddle::StringPiece s(a);
EXPECT_EQ(0U, s.len());
}
}
TEST(StringPiece, CopyAndAssign) {
paddle::StringPiece empty;
EXPECT_EQ(0U, empty.len());
paddle::StringPiece a("hello");
paddle::StringPiece b = a;
EXPECT_EQ(b.len(), strlen("hello"));
EXPECT_EQ(a, b);
std::string storage("hello");
paddle::StringPiece c(storage);
EXPECT_EQ(a, c);
EXPECT_NE(a.data(), c.data());
}
TEST(StringPiece, Compare) {
{
paddle::StringPiece a("hello");
paddle::StringPiece b("world");
EXPECT_TRUE(a != b);
EXPECT_FALSE(a == b);
EXPECT_TRUE(a < b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a > b);
EXPECT_FALSE(a >= b);
EXPECT_LT(Compare(a, b), 0);
EXPECT_GT(Compare(b, a), 0);
}
{
paddle::StringPiece a, b;
EXPECT_TRUE(a == b);
EXPECT_FALSE(a != b);
EXPECT_FALSE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_TRUE(a >= b);
EXPECT_EQ(0, Compare(a, b));
EXPECT_EQ(0, Compare(b, a));
}
}
TEST(StringPiece, ToString) {
{
paddle::StringPiece s;
EXPECT_EQ(std::string(""), s.ToString());
}
{
paddle::StringPiece s(NULL);
EXPECT_EQ(std::string(""), s.ToString());
}
{
paddle::StringPiece s("hello");
EXPECT_EQ(std::string("hello"), s.ToString());
}
}
TEST(StringPiece, HasPrefixSuffix) {
using paddle::HasPrefix;
using paddle::HasSuffix;
{
paddle::StringPiece s;
EXPECT_FALSE(HasPrefix(s, "something"));
EXPECT_TRUE(HasPrefix(s, ""));
EXPECT_FALSE(HasSuffix(s, "something"));
EXPECT_TRUE(HasSuffix(s, ""));
}
{
paddle::StringPiece s("app");
EXPECT_TRUE(HasPrefix(s, ""));
EXPECT_TRUE(HasPrefix(s, "a"));
EXPECT_TRUE(HasPrefix(s, "ap"));
EXPECT_TRUE(HasPrefix(s, "app"));
EXPECT_TRUE(HasSuffix(s, ""));
EXPECT_TRUE(HasSuffix(s, "p"));
EXPECT_TRUE(HasSuffix(s, "pp"));
EXPECT_TRUE(HasSuffix(s, "app"));
}
}
TEST(StringPiece, SkipPrefixSuffix) {
using paddle::SkipPrefix;
using paddle::SkipSuffix;
{
paddle::StringPiece s;
EXPECT_EQ("", SkipPrefix(s, 0));
EXPECT_THROW(SkipPrefix(s, 1), std::invalid_argument);
EXPECT_EQ("", SkipSuffix(s, 0));
EXPECT_THROW(SkipSuffix(s, 1), std::invalid_argument);
}
{
paddle::StringPiece s("app");
EXPECT_EQ("app", SkipPrefix(s, 0));
EXPECT_EQ("pp", SkipPrefix(s, 1));
EXPECT_EQ("p", SkipPrefix(s, 2));
EXPECT_EQ("", SkipPrefix(s, 3));
EXPECT_THROW(SkipPrefix(s, 4), std::invalid_argument);
EXPECT_EQ("app", SkipSuffix(s, 0));
EXPECT_EQ("ap", SkipSuffix(s, 1));
EXPECT_EQ("a", SkipSuffix(s, 2));
EXPECT_EQ("", SkipSuffix(s, 3));
EXPECT_THROW(SkipSuffix(s, 4), std::invalid_argument);
}
}
TEST(StringPiece, TrimPrefixSuffix) {
using paddle::TrimPrefix;
using paddle::TrimSuffix;
{
paddle::StringPiece s;
EXPECT_EQ("", TrimPrefix(s, ""));
EXPECT_EQ("", TrimPrefix(s, "something"));
EXPECT_EQ("", TrimSuffix(s, ""));
EXPECT_EQ("", TrimSuffix(s, "something"));
}
{
paddle::StringPiece s("app");
EXPECT_EQ("app", TrimPrefix(s, ""));
EXPECT_EQ("pp", TrimPrefix(s, "a"));
EXPECT_EQ("p", TrimPrefix(s, "ap"));
EXPECT_EQ("", TrimPrefix(s, "app"));
EXPECT_EQ("app", TrimPrefix(s, "something"));
EXPECT_EQ("app", TrimSuffix(s, ""));
EXPECT_EQ("ap", TrimSuffix(s, "p"));
EXPECT_EQ("a", TrimSuffix(s, "pp"));
EXPECT_EQ("", TrimSuffix(s, "app"));
EXPECT_EQ("app", TrimSuffix(s, "something"));
}
}
TEST(StringPiece, Contains) {
using paddle::Contains;
{
paddle::StringPiece s;
EXPECT_FALSE(Contains(s, ""));
EXPECT_FALSE(Contains(s, "something"));
}
{
paddle::StringPiece s("app");
EXPECT_TRUE(Contains(s, ""));
EXPECT_TRUE(Contains(s, "a"));
EXPECT_TRUE(Contains(s, "p"));
EXPECT_TRUE(Contains(s, "ap"));
EXPECT_TRUE(Contains(s, "pp"));
EXPECT_TRUE(Contains(s, "app"));
EXPECT_FALSE(Contains(s, "something"));
}
}
TEST(StringPiece, Index) {
using paddle::Index;
auto npos = paddle::StringPiece::npos;
{
paddle::StringPiece s;
EXPECT_EQ(npos, Index(s, ""));
EXPECT_EQ(npos, Index(s, "something"));
}
{
paddle::StringPiece s("app");
EXPECT_EQ(0U, Index(s, ""));
EXPECT_EQ(0U, Index(s, "a"));
EXPECT_EQ(1U, Index(s, "p"));
EXPECT_EQ(0U, Index(s, "ap"));
EXPECT_EQ(1U, Index(s, "pp"));
EXPECT_EQ(0U, Index(s, "app"));
EXPECT_EQ(npos, Index(s, "something"));
}
}
TEST(StringPiece, Find) {
using paddle::Find;
auto npos = paddle::StringPiece::npos;
{
paddle::StringPiece s;
EXPECT_EQ(npos, Find(s, 'a', 0U));
}
{
paddle::StringPiece s("app");
EXPECT_EQ(0U, Find(s, 'a', 0U));
EXPECT_EQ(1U, Find(s, 'p', 0U));
EXPECT_EQ(1U, Find(s, 'p', 1U));
EXPECT_EQ(2U, Find(s, 'p', 2U));
EXPECT_EQ(npos, Find(s, 'z', 2U));
}
}
TEST(StringPiece, RFind) {
using paddle::RFind;
auto npos = paddle::StringPiece::npos;
{
paddle::StringPiece s;
EXPECT_EQ(npos, RFind(s, 'a', 0U));
}
{
paddle::StringPiece s("app");
EXPECT_EQ(2U, RFind(s, 'p', 2U));
EXPECT_EQ(0U, RFind(s, 'a', 2U));
EXPECT_EQ(1U, RFind(s, 'p', 1U));
EXPECT_EQ(0U, RFind(s, 'a', 0));
EXPECT_EQ(npos, RFind(s, 'z', 2U));
}
}
TEST(StringPiece, SubStr) {
using paddle::SubStr;
{
paddle::StringPiece s;
EXPECT_EQ("", SubStr(s, 0, 0));
EXPECT_EQ("", SubStr(s, 0, 1));
EXPECT_EQ("", SubStr(s, 1, 0));
}
{
paddle::StringPiece s("app");
EXPECT_EQ("", SubStr(s, 0, 0));
EXPECT_EQ("", SubStr(s, 1, 0));
EXPECT_EQ("", SubStr(s, 2, 0));
EXPECT_EQ("", SubStr(s, 3, 0));
EXPECT_EQ("a", SubStr(s, 0, 1));
EXPECT_EQ("p", SubStr(s, 1, 1));
EXPECT_EQ("p", SubStr(s, 2, 1));
EXPECT_EQ("", SubStr(s, 3, 1));
EXPECT_EQ("ap", SubStr(s, 0, 2));
EXPECT_EQ("pp", SubStr(s, 1, 2));
EXPECT_EQ("p", SubStr(s, 2, 2));
EXPECT_EQ("", SubStr(s, 3, 2));
EXPECT_EQ("app", SubStr(s, 0, 3));
EXPECT_EQ("pp", SubStr(s, 1, 3));
EXPECT_EQ("p", SubStr(s, 2, 3));
EXPECT_EQ("", SubStr(s, 3, 3));
}
}
TEST(StringPiece, StreamOutput) {
using paddle::StringPiece;
std::stringstream o;
o << StringPiece();
EXPECT_EQ("", o.str());
o << StringPiece("hello");
EXPECT_EQ("hello", o.str());
o << StringPiece();
EXPECT_EQ("hello", o.str());
}
...@@ -4,6 +4,7 @@ set(TRAINER_SOURCES ...@@ -4,6 +4,7 @@ set(TRAINER_SOURCES
ParameterUpdater.cpp ParameterUpdater.cpp
ParamUtil.cpp ParamUtil.cpp
RemoteParameterUpdater.cpp RemoteParameterUpdater.cpp
NewRemoteParameterUpdater.cpp
Tester.cpp Tester.cpp
Trainer.cpp Trainer.cpp
TrainerInternal.cpp TrainerInternal.cpp
...@@ -16,6 +17,7 @@ set(TRAINER_HEADERS ...@@ -16,6 +17,7 @@ set(TRAINER_HEADERS
ParameterUpdater.h ParameterUpdater.h
ParamUtil.h ParamUtil.h
RemoteParameterUpdater.h RemoteParameterUpdater.h
NewRemoteParameterUpdater.h
Tester.h Tester.h
TesterConfig.h TesterConfig.h
Trainer.h Trainer.h
...@@ -32,7 +34,7 @@ add_style_check_target(paddle_trainer_lib ...@@ -32,7 +34,7 @@ add_style_check_target(paddle_trainer_lib
add_style_check_target(paddle_trainer_lib add_style_check_target(paddle_trainer_lib
${TRAINER_HEADERS}) ${TRAINER_HEADERS})
add_dependencies(paddle_trainer_lib add_dependencies(paddle_trainer_lib
gen_proto_cpp) gen_proto_cpp paddle_pserver_cclient_lib)
macro(add_paddle_exe TARGET_NAME) macro(add_paddle_exe TARGET_NAME)
add_executable(${TARGET_NAME} ${ARGN}) add_executable(${TARGET_NAME} ${ARGN})
...@@ -56,3 +58,10 @@ install(TARGETS paddle_trainer paddle_merge_model ...@@ -56,3 +58,10 @@ install(TARGETS paddle_trainer paddle_merge_model
set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
endif()
target_link_libraries(paddle_trainer ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a)
target_link_libraries(paddle_trainer_lib ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "NewRemoteParameterUpdater.h"
#include "Trainer.h"
#include "paddle/utils/Stat.h"
DECLARE_int32(trainer_id);
DECLARE_string(save_dir);
namespace paddle {
NewRemoteParameterUpdater::NewRemoteParameterUpdater(
const OptimizationConfig &config, const std::string pserverSpec)
: parameterClient_(-1),
newParameters_(nullptr),
newGradients_(nullptr),
pserverSpec_(pserverSpec) {}
void NewRemoteParameterUpdater::init(
const std::vector<ParameterPtr> &parameters) {
ParameterUpdater::init(parameters);
for (auto &para : parameters_) {
para->getBuf(PARAMETER_VALUE)->zeroMem();
para->getBuf(PARAMETER_GRADIENT)->zeroMem();
}
// create parameter server client.
parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(),
FLAGS_trainer_id == 0);
// init new parameter and gradient.
newParameters_ = initNewParameter(PARAMETER_VALUE);
newGradients_ = initNewParameter(PARAMETER_GRADIENT);
// init parameter, one trainer will get the opportunity to int parameter and
// send them to parameter server. Others will get the initialized parameter
// from parameter server
if (paddle_begin_init_params(parameterClient_)) {
LOG(INFO) << "paddle_begin_init_params start";
for (int i = 0; i < parameterSize(); ++i) {
auto paramConfig = parameters_[i]->getConfig();
std::string bytes = paramConfig.SerializeAsString();
const char *array = bytes.data();
int size = (int)bytes.size();
paddle_init_param(
parameterClient_, *newParameters_[i], (void *)array, size);
}
paddle_finish_init_params(parameterClient_);
LOG(INFO) << "paddle_begin_init_params done";
} else {
paddle_get_params(parameterClient_, newParameters_, parameterSize());
}
LOG(INFO) << "NewRemoteParameterUpdater initialized";
}
void NewRemoteParameterUpdater::updateImpl(Parameter *para) {}
void NewRemoteParameterUpdater::finishBatch(real cost) {
// send gradient to parameter server.
paddle_send_grads(parameterClient_, newGradients_, parameterSize());
// get the updated parameter from parameterClient.
paddle_get_params(parameterClient_, newParameters_, parameterSize());
// clear gradient after update parameter.
for (auto &para : parameters_) {
para->getBuf(PARAMETER_GRADIENT)->zeroMem();
}
}
void NewRemoteParameterUpdater::startPass() {}
bool NewRemoteParameterUpdater::finishPass() { return true; }
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <thread>
#include "ParameterUpdater.h"
#include "libpaddle_pserver_cclient.h"
#include "paddle/pserver/ParameterClient2.h"
#include "paddle/utils/Queue.h"
#include "paddle/utils/Util.h"
namespace paddle {
/**
* New remote parameter updater for dense parameters that use cclient of go.
*/
class NewRemoteParameterUpdater : public ParameterUpdater {
public:
NewRemoteParameterUpdater(const OptimizationConfig& config,
const std::string pserverSpec);
~NewRemoteParameterUpdater() {
releaseNewParameter(newParameters_);
releaseNewParameter(newGradients_);
if (parameterClient_ >= 0) paddle_pserver_client_release(parameterClient_);
}
/**
* initialize the internal parameter client and itself.
*/
virtual void init(const std::vector<ParameterPtr>& parameters);
/**
* @brief start batch
*
* @note one batch training exhibits stateful feature to help
* to do performance tuning, sgd optimization if necessary.
*/
virtual PassType startBatch(int64_t batchSize) { return PASS_TRAIN; }
/**
* send parameters to pservers and get returned parameters
* from all pservers if necessary.
*/
virtual void finishBatch(real cost);
virtual void startPass();
virtual bool finishPass();
protected:
/**
* work need to do after finishBatch
*/
virtual void updateImpl(Parameter* para);
private:
int parameterSize() { return (int)parameters_.size(); }
/**
* init parameter of go paddle pserver cclient.
* @param new_params
* @param type
*/
paddle_parameter** initNewParameter(ParameterType type) {
paddle_parameter** new_params =
(paddle_parameter**)malloc(sizeof(paddle_parameter*) * parameterSize());
for (int i = 0; i < parameterSize(); ++i) {
new_params[i] = (paddle_parameter*)malloc(sizeof(paddle_parameter));
memset(new_params[i], 0, sizeof(paddle_parameter));
}
for (int i = 0; i < parameterSize(); ++i) {
ParameterPtr param = parameters_[i];
new_params[i]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
new_params[i]->name = (char*)param->getName().c_str();
new_params[i]->content =
(unsigned char*)(param->getBuf(type).get()->getData());
new_params[i]->content_len =
(int)param->getBuf(type).get()->getSize() * sizeof(real);
}
return new_params;
}
void releaseNewParameter(paddle_parameter** newParams) {
if (newParams != nullptr) {
for (int i = 0; i < parameterSize(); ++i) {
free(newParams[i]);
}
free(newParams);
}
}
protected:
/// internal parameter client object for exchanging data with pserver
paddle_pserver_client parameterClient_;
/// the parameters for new pserver client
paddle_parameter** newParameters_;
/// the gradinets for new pserver client
paddle_parameter** newGradients_;
/// the specification of parameter server "host1:port,host1:port"
std::string pserverSpec_;
};
} // namespace paddle
...@@ -194,6 +194,10 @@ message MaxOutConfig { ...@@ -194,6 +194,10 @@ message MaxOutConfig {
required uint32 groups = 2; required uint32 groups = 2;
} }
message RowConvConfig {
required uint32 context_length = 1;
}
message ProjectionConfig { message ProjectionConfig {
required string type = 1; required string type = 1;
required string name = 2; required string name = 2;
...@@ -279,6 +283,7 @@ message LayerInputConfig { ...@@ -279,6 +283,7 @@ message LayerInputConfig {
optional SppConfig spp_conf = 12; optional SppConfig spp_conf = 12;
optional PriorBoxConfig priorbox_conf = 13; optional PriorBoxConfig priorbox_conf = 13;
optional PadConfig pad_conf = 14; optional PadConfig pad_conf = 14;
optional RowConvConfig row_conv_conf = 15;
} }
message LayerConfig { message LayerConfig {
......
...@@ -18,7 +18,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -18,7 +18,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies}) DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} paddle_master_shared)
add_custom_target(paddle_python ALL DEPENDS add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp) ${OUTPUT_DIR}/.timestamp)
......
...@@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with ...@@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with
--config_args=extension_module_name=[MODULE_NAME] --config_args=extension_module_name=[MODULE_NAME]
''' '''
import copy import copy
import logging import logging
import os import os
...@@ -127,6 +126,7 @@ def init_config_environment( ...@@ -127,6 +126,7 @@ def init_config_environment(
g_config=TrainerConfig(), g_config=TrainerConfig(),
g_layer_map={}, g_layer_map={},
g_parameter_map={}, g_parameter_map={},
g_parameter_initializer_map={},
g_extended_config_funcs={}, g_extended_config_funcs={},
# store command args of paddle_trainer # store command args of paddle_trainer
...@@ -440,8 +440,7 @@ def model_type(name): ...@@ -440,8 +440,7 @@ def model_type(name):
@config_class @config_class
class Bias(Cfg): class Bias(Cfg):
def __init__( def __init__(self,
self,
parameter_name=None, parameter_name=None,
learning_rate=None, learning_rate=None,
momentum=None, momentum=None,
...@@ -455,7 +454,8 @@ class Bias(Cfg): ...@@ -455,7 +454,8 @@ class Bias(Cfg):
sparse_remote_update=None, sparse_remote_update=None,
gradient_clipping_threshold=None, gradient_clipping_threshold=None,
is_static=None, is_static=None,
is_shared=None, ): is_shared=None,
initializer=None):
self.add_keys(locals()) self.add_keys(locals())
...@@ -466,6 +466,7 @@ class Input(Cfg): ...@@ -466,6 +466,7 @@ class Input(Cfg):
self, self,
input_layer_name, input_layer_name,
parameter_name=None, parameter_name=None,
initializer=None,
learning_rate=None, learning_rate=None,
momentum=None, momentum=None,
decay_rate=None, decay_rate=None,
...@@ -522,6 +523,7 @@ class Projection(Input): ...@@ -522,6 +523,7 @@ class Projection(Input):
initial_std=None, initial_std=None,
initial_strategy=None, initial_strategy=None,
initial_smart=None, initial_smart=None,
initializer=None,
num_batches_regularization=None, num_batches_regularization=None,
sparse_remote_update=None, sparse_remote_update=None,
sparse_update=None, sparse_update=None,
...@@ -1480,7 +1482,8 @@ class LayerBase(object): ...@@ -1480,7 +1482,8 @@ class LayerBase(object):
gradient_clipping_threshold=bias. gradient_clipping_threshold=bias.
gradient_clipping_threshold, gradient_clipping_threshold,
is_static=bias.is_static, is_static=bias.is_static,
is_shared=bias.is_shared, ) is_shared=bias.is_shared,
initializer=bias.initializer)
if for_self: if for_self:
self.config.bias_parameter_name = bias.parameter_name self.config.bias_parameter_name = bias.parameter_name
else: else:
...@@ -1537,7 +1540,8 @@ class LayerBase(object): ...@@ -1537,7 +1540,8 @@ class LayerBase(object):
format=format, format=format,
is_static=input_config.is_static, is_static=input_config.is_static,
is_shared=input_config.is_shared, is_shared=input_config.is_shared,
update_hooks=input_config.update_hooks) update_hooks=input_config.update_hooks,
initializer=input_config.initializer)
def set_layer_size(self, size): def set_layer_size(self, size):
if self.config.size == 0: if self.config.size == 0:
...@@ -1731,9 +1735,10 @@ class ParameterReluLayer(LayerBase): ...@@ -1731,9 +1735,10 @@ class ParameterReluLayer(LayerBase):
def __init__(self, name, inputs, partial_sum=1, **args): def __init__(self, name, inputs, partial_sum=1, **args):
super(ParameterReluLayer, self).__init__( super(ParameterReluLayer, self).__init__(
name, self.layer_type, 0, inputs=inputs, **args) name, self.layer_type, 0, inputs=inputs, **args)
config_assert(len(self.inputs) == 1)
config_assert(self.input_layer.size % partial_sum == 0)
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
config_assert(len(self.inputs) == 1, "prelu layer has only one input.")
config_assert(input_layer.size % partial_sum == 0,
"a wrong setting for partial_sum")
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
self.create_input_parameter(0, input_layer.size / partial_sum) self.create_input_parameter(0, input_layer.size / partial_sum)
...@@ -2081,6 +2086,23 @@ class MaxOutLayer(LayerBase): ...@@ -2081,6 +2086,23 @@ class MaxOutLayer(LayerBase):
g_layer_map[input_layer.name].width, out_channels) g_layer_map[input_layer.name].width, out_channels)
@config_layer('row_conv')
class RowConvLayer(LayerBase):
def __init__(self, name, inputs, context_length, **xargs):
super(RowConvLayer, self).__init__(
name, 'maxout', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'TransLayer must have one and only one input')
input_layer = self.get_input_layer(0)
row_conv_conf = self.config.inputs[0].row_conv_conf
row_conv_conf.context_length = context_length
self.set_layer_size(input_layer.size)
psize = context_length * input_layer.size
dims = [context_length, input_layer.size]
self.create_input_parameter(0, psize, dims)
# key: cost type # key: cost type
# value: cost class # value: cost class
g_cost_map = {} g_cost_map = {}
...@@ -3204,7 +3226,8 @@ def Parameter(name, ...@@ -3204,7 +3226,8 @@ def Parameter(name,
need_compact=None, need_compact=None,
is_static=None, is_static=None,
is_shared=None, is_shared=None,
update_hooks=None): update_hooks=None,
initializer=None):
config_assert(name not in g_parameter_map, config_assert(name not in g_parameter_map,
'Duplicated parameter name: ' + name) 'Duplicated parameter name: ' + name)
...@@ -3292,6 +3315,11 @@ def Parameter(name, ...@@ -3292,6 +3315,11 @@ def Parameter(name,
para.update_hooks.extend(update_hooks) para.update_hooks.extend(update_hooks)
g_parameter_map[name] = para g_parameter_map[name] = para
if initializer is not None:
config_assert(
callable(initializer),
"parameter initializer should be a callable object")
g_parameter_initializer_map[name] = initializer
@config_func @config_func
...@@ -3546,11 +3574,7 @@ def update_g_config(): ...@@ -3546,11 +3574,7 @@ def update_g_config():
return g_config return g_config
def begin_parse(config_arg_str=''): def begin_parse():
'''
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be
passed to config script as a dictionary CONFIG_ARGS
'''
init_config_environment() init_config_environment()
for hook in _parse_config_hooks: for hook in _parse_config_hooks:
hook() hook()
...@@ -3568,8 +3592,12 @@ def begin_parse(config_arg_str=''): ...@@ -3568,8 +3592,12 @@ def begin_parse(config_arg_str=''):
def parse_config(trainer_config, config_arg_str): def parse_config(trainer_config, config_arg_str):
begin_parse(config_arg_str) '''
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be
passed to config script as a dictionary CONFIG_ARGS
'''
begin_parse()
config_args = {} config_args = {}
if config_arg_str: if config_arg_str:
......
...@@ -95,6 +95,10 @@ class ParameterAttribute(object): ...@@ -95,6 +95,10 @@ class ParameterAttribute(object):
:param sparse_update: Enable sparse update for this parameter. It will :param sparse_update: Enable sparse update for this parameter. It will
enable both local and remote sparse update. enable both local and remote sparse update.
:type sparse_update: bool :type sparse_update: bool
:param initializer: If not None, it should be a callable object which accepts
a parameter name and returns numpy array for the initial
value of the parameter
:param initializer: callable object
""" """
def __init__(self, def __init__(self,
...@@ -109,7 +113,8 @@ class ParameterAttribute(object): ...@@ -109,7 +113,8 @@ class ParameterAttribute(object):
learning_rate=None, learning_rate=None,
momentum=None, momentum=None,
gradient_clipping_threshold=None, gradient_clipping_threshold=None,
sparse_update=False): sparse_update=False,
initializer=None):
self.attr = {} self.attr = {}
if is_static: if is_static:
...@@ -161,6 +166,8 @@ class ParameterAttribute(object): ...@@ -161,6 +166,8 @@ class ParameterAttribute(object):
is_compatible_with(gradient_clipping_threshold, float): is_compatible_with(gradient_clipping_threshold, float):
self.attr['gradient_clipping_threshold'] = \ self.attr['gradient_clipping_threshold'] = \
gradient_clipping_threshold gradient_clipping_threshold
if initializer is not None:
self.attr['initializer'] = initializer
def set_default_parameter_name(self, name): def set_default_parameter_name(self, name):
""" """
......
...@@ -31,31 +31,31 @@ except ImportError: ...@@ -31,31 +31,31 @@ except ImportError:
import copy import copy
__all__ = [ __all__ = [
"full_matrix_projection", 'full_matrix_projection',
"AggregateLevel", 'AggregateLevel',
"ExpandLevel", 'ExpandLevel',
"identity_projection", 'identity_projection',
"dotmul_projection", 'dotmul_projection',
"dotmul_operator", 'dotmul_operator',
"repeat_layer", 'repeat_layer',
"seq_reshape_layer", 'seq_reshape_layer',
"table_projection", 'table_projection',
"mixed_layer", 'mixed_layer',
"data_layer", 'data_layer',
"embedding_layer", 'embedding_layer',
"fc_layer", 'fc_layer',
"grumemory", 'grumemory',
"pooling_layer", 'pooling_layer',
"lstmemory", 'lstmemory',
"last_seq", 'last_seq',
"first_seq", 'first_seq',
"cos_sim", 'cos_sim',
"hsigmoid", 'hsigmoid',
"conv_projection", 'conv_projection',
"mse_cost", 'mse_cost',
"regression_cost", 'regression_cost',
'classification_cost', 'classification_cost',
"LayerOutput", 'LayerOutput',
'img_conv_layer', 'img_conv_layer',
'img_pool_layer', 'img_pool_layer',
'batch_norm_layer', 'batch_norm_layer',
...@@ -111,6 +111,7 @@ __all__ = [ ...@@ -111,6 +111,7 @@ __all__ = [
'block_expand_layer', 'block_expand_layer',
'maxout_layer', 'maxout_layer',
'out_prod_layer', 'out_prod_layer',
'printer_layer',
'print_layer', 'print_layer',
'priorbox_layer', 'priorbox_layer',
'cross_channel_norm_layer', 'cross_channel_norm_layer',
...@@ -120,6 +121,9 @@ __all__ = [ ...@@ -120,6 +121,9 @@ __all__ = [
'smooth_l1_cost', 'smooth_l1_cost',
'layer_support', 'layer_support',
'multiplex_layer', 'multiplex_layer',
'row_conv_layer',
'dropout_layer',
'prelu_layer',
] ]
...@@ -128,26 +132,26 @@ class LayerType(object): ...@@ -128,26 +132,26 @@ class LayerType(object):
Layer type enumerations. Layer type enumerations.
""" """
DATA = "data" DATA = 'data'
MIXED_LAYER = "mixed" MIXED_LAYER = 'mixed'
LSTMEMORY = "lstmemory" LSTMEMORY = 'lstmemory'
GRUMEMORY = "gated_recurrent" GRUMEMORY = 'gated_recurrent'
SEQUENCE_LAST_INSTANCE = "seqlastins" SEQUENCE_LAST_INSTANCE = 'seqlastins'
SEQUENCE_FIRST_INSTANCE = "seqfirstins" SEQUENCE_FIRST_INSTANCE = 'seqfirstins'
SEQUENCE_RESHAPE = "seqreshape" SEQUENCE_RESHAPE = 'seqreshape'
POOLING_MAX = "max" POOLING_MAX = 'max'
POOLING_AVG = 'average' POOLING_AVG = 'average'
FC_LAYER = "fc" FC_LAYER = 'fc'
COST = 'cost' COST = 'cost'
COSINE_SIM_VEC = 'cos_vm' COSINE_SIM_VEC = 'cos_vm'
COSINE_SIM = 'cos' COSINE_SIM = 'cos'
HSIGMOID = 'hsigmoid' HSIGMOID = 'hsigmoid'
CONV_LAYER = "conv" CONV_LAYER = 'conv'
CONVTRANS_LAYER = "convt" CONVTRANS_LAYER = 'convt'
EXCONV_LAYER = "exconv" EXCONV_LAYER = 'exconv'
EXCONVTRANS_LAYER = "exconvt" EXCONVTRANS_LAYER = 'exconvt'
CUDNNCONV_LAYER = "cudnn_conv" CUDNNCONV_LAYER = 'cudnn_conv'
POOL_LAYER = "pool" POOL_LAYER = 'pool'
BATCH_NORM_LAYER = 'batch_norm' BATCH_NORM_LAYER = 'batch_norm'
NORM_LAYER = 'norm' NORM_LAYER = 'norm'
SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
...@@ -187,25 +191,28 @@ class LayerType(object): ...@@ -187,25 +191,28 @@ class LayerType(object):
SPP_LAYER = "spp" SPP_LAYER = "spp"
PAD_LAYER = "pad" PAD_LAYER = "pad"
MULTIPLEX_LAYER = "multiplex" MULTIPLEX_LAYER = "multiplex"
ROW_CONV_LAYER = "row_conv"
PRINT_LAYER = "print" PRINT_LAYER = 'print'
PRIORBOX_LAYER = "priorbox" PRIORBOX_LAYER = 'priorbox'
CTC_LAYER = "ctc" CTC_LAYER = 'ctc'
WARP_CTC_LAYER = "warp_ctc" WARP_CTC_LAYER = 'warp_ctc'
CRF_LAYER = "crf" CRF_LAYER = 'crf'
CRF_DECODING_LAYER = "crf_decoding" CRF_DECODING_LAYER = 'crf_decoding'
NCE_LAYER = 'nce' NCE_LAYER = 'nce'
RANK_COST = "rank-cost" RANK_COST = 'rank-cost'
LAMBDA_COST = "lambda_cost" LAMBDA_COST = 'lambda_cost'
HUBER = "huber" HUBER = 'huber'
CROSS_ENTROPY = "multi-class-cross-entropy" CROSS_ENTROPY = 'multi-class-cross-entropy'
CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm" CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
SUM_COST = "sum_cost" SUM_COST = 'sum_cost'
SMOOTH_L1 = "smooth_l1" SMOOTH_L1 = 'smooth_l1'
PRELU = 'prelu'
@staticmethod @staticmethod
def is_layer_type(type_name): def is_layer_type(type_name):
...@@ -969,7 +976,7 @@ def fc_layer(input, ...@@ -969,7 +976,7 @@ def fc_layer(input,
@wrap_name_default("print") @wrap_name_default("print")
def print_layer(input, name=None): def printer_layer(input, name=None):
""" """
Print the output value of input layers. This layer is useful for debugging. Print the output value of input layers. This layer is useful for debugging.
...@@ -991,6 +998,13 @@ def print_layer(input, name=None): ...@@ -991,6 +998,13 @@ def print_layer(input, name=None):
inputs=[l.name for l in input], ) inputs=[l.name for l in input], )
# this layer don't return anything, can not be input of other layer. # this layer don't return anything, can not be input of other layer.
# Keep print_layer for compatibility with V1 API.
# 'print_layer' does not work for V2 API because it will be changed to
# 'print' for V2 API. But 'print' is a reserved key word in python.
print_layer = printer_layer
@wrap_name_default("priorbox") @wrap_name_default("priorbox")
def priorbox_layer(input, def priorbox_layer(input,
...@@ -2916,11 +2930,11 @@ def memory(name, ...@@ -2916,11 +2930,11 @@ def memory(name,
to specify the layer needs to be remembered as the following: to specify the layer needs to be remembered as the following:
.. code-block:: python .. code-block:: python
mem = memory(size=256) mem = memory(size=256)
state = fc_layer(input=mem, size=256) state = fc_layer(input=mem, size=256)
mem.set_input(mem) mem.set_input(mem)
:param name: the name of the layer which this memory remembers. :param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the If name is None, user should call set_input() to specify the
name of the layer which this memory remembers. name of the layer which this memory remembers.
...@@ -3407,7 +3421,7 @@ def recurrent_group(step, ...@@ -3407,7 +3421,7 @@ def recurrent_group(step,
else, for training or testing, one of the input type must else, for training or testing, one of the input type must
be LayerOutput. be LayerOutput.
: type is_generating: bool :type is_generating: bool
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -3760,7 +3774,6 @@ def beam_search(step, ...@@ -3760,7 +3774,6 @@ def beam_search(step,
assert generated_input_index != -1 assert generated_input_index != -1
gipt = input[generated_input_index] gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInput)
gipt.bos_id = bos_id gipt.bos_id = bos_id
gipt.eos_id = eos_id gipt.eos_id = eos_id
...@@ -3780,7 +3793,6 @@ def beam_search(step, ...@@ -3780,7 +3793,6 @@ def beam_search(step,
predict = gipt.after_real_step(step(*args)) predict = gipt.after_real_step(step(*args))
eos_layer(input=predict, eos_id=eos_id, name=eos_name) eos_layer(input=predict, eos_id=eos_id, name=eos_name)
return predict return predict
tmp = recurrent_group( tmp = recurrent_group(
...@@ -3814,7 +3826,7 @@ def mse_cost(input, label, weight=None, name=None, coeff=1.0, layer_attr=None): ...@@ -3814,7 +3826,7 @@ def mse_cost(input, label, weight=None, name=None, coeff=1.0, layer_attr=None):
.. math:: .. math::
\frac{1}{N}\sum_{i=1}^N(t_i-y_i)^2 \\frac{1}{N}\sum_{i=1}^N(t_i-y_i)^2
:param name: layer name. :param name: layer name.
:type name: basestring :type name: basestring
...@@ -3852,7 +3864,6 @@ def classification_cost(input, ...@@ -3852,7 +3864,6 @@ def classification_cost(input,
label, label,
weight=None, weight=None,
name=None, name=None,
top_k=None,
evaluator=classification_error_evaluator, evaluator=classification_error_evaluator,
layer_attr=None): layer_attr=None):
""" """
...@@ -3867,8 +3878,6 @@ def classification_cost(input, ...@@ -3867,8 +3878,6 @@ def classification_cost(input,
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight affects the cost, namely the scale of cost.
It is an optional argument. It is an optional argument.
:type weight: LayerOutput :type weight: LayerOutput
:param top_k: number k in top-k error rate
:type top_k: int
:param evaluator: Evaluator method. :param evaluator: Evaluator method.
:param layer_attr: layer's extra attribute. :param layer_attr: layer's extra attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3896,7 +3905,7 @@ def classification_cost(input, ...@@ -3896,7 +3905,7 @@ def classification_cost(input,
assert isinstance(e.for_classification, bool) assert isinstance(e.for_classification, bool)
assert e.for_classification assert e.for_classification
e(name=e.__name__, input=input, label=label, weight=weight, top_k=top_k) e(name=e.__name__, input=input, label=label, weight=weight)
if not isinstance(evaluator, collections.Sequence): if not isinstance(evaluator, collections.Sequence):
evaluator = [evaluator] evaluator = [evaluator]
...@@ -4717,7 +4726,7 @@ def ctc_layer(input, ...@@ -4717,7 +4726,7 @@ def ctc_layer(input,
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
should also be num_classes + 1. should also be num_classes + 1.
The simple usage: The example usage is:
.. code-block:: python .. code-block:: python
...@@ -4769,27 +4778,42 @@ def warp_ctc_layer(input, ...@@ -4769,27 +4778,42 @@ def warp_ctc_layer(input,
layer_attr=None): layer_attr=None):
""" """
A layer intergrating the open-source `warp-ctc A layer intergrating the open-source `warp-ctc
<https://github.com/baidu-research/warp-ctc>` library, which is used in <https://github.com/baidu-research/warp-ctc>`_ library, which is used in
`Deep Speech 2: End-toEnd Speech Recognition in English and Mandarin `Deep Speech 2: End-toEnd Speech Recognition in English and Mandarin
<https://arxiv.org/pdf/1512.02595v1.pdf>`, to compute Connectionist Temporal <https://arxiv.org/pdf/1512.02595v1.pdf>`_, to compute Connectionist Temporal
Classification (CTC) loss. Classification (CTC) loss. Besides, another `warp-ctc
<https://github.com/gangliao/warp-ctc>`_ repository, which is forked from
the official one, is maintained to enable more compiling options. During the
building process, PaddlePaddle will clone the source codes, build and
install it to :code:`third_party/install/warpctc` directory.
To use warp_ctc layer, you need to specify the path of :code:`libwarpctc.so`,
using following methods:
1. Set it in :code:`paddle.init` (python api) or :code:`paddle_init` (c api),
such as :code:`paddle.init(use_gpu=True,
warpctc_dir=your_paddle_source_dir/third_party/install/warpctc/lib)`.
2. Set environment variable LD_LIBRARY_PATH on Linux or DYLD_LIBRARY_PATH
on Mac OS. For instance, :code:`export
LD_LIBRARY_PATH=your_paddle_source_dir/third_party/install/warpctc/lib:$LD_LIBRARY_PATH`.
More details of CTC can be found by referring to `Connectionist Temporal More details of CTC can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/
icml2006_GravesFGS06.pdf>`_ icml2006_GravesFGS06.pdf>`_.
Note: Note:
- Let num_classes represent the category number. Considering the 'blank' - Let num_classes represent the category number. Considering the 'blank'
label needed by CTC, you need to use (num_classes + 1) as the input label needed by CTC, you need to use (num_classes + 1) as the input size.
size. Thus, the size of both warp_ctc_layer and 'input' layer should Thus, the size of both warp_ctc layer and 'input' layer should be set to
be set to num_classes + 1. num_classes + 1.
- You can set 'blank' to any value ranged in [0, num_classes], which - You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels. should be consistent as that used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library, - As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer. 'linear' activation is expected instead in the 'input' layer.
The simple usage: The example usage is:
.. code-block:: python .. code-block:: python
...@@ -4850,7 +4874,7 @@ def crf_layer(input, ...@@ -4850,7 +4874,7 @@ def crf_layer(input,
A layer for calculating the cost of sequential conditional random A layer for calculating the cost of sequential conditional random
field model. field model.
The simple usage: The example usage is:
.. code-block:: python .. code-block:: python
...@@ -4924,7 +4948,7 @@ def crf_decoding_layer(input, ...@@ -4924,7 +4948,7 @@ def crf_decoding_layer(input,
this layer will also calculate error. output.value[i] is 1 for incorrect this layer will also calculate error. output.value[i] is 1 for incorrect
decoding or 0 for correct decoding. decoding or 0 for correct decoding.
The simple usage: The example usage is:
.. code-block:: python .. code-block:: python
...@@ -5117,7 +5141,7 @@ def rank_cost(left, ...@@ -5117,7 +5141,7 @@ def rank_cost(left,
- :math:`o_i` and :math:`o_j`: the left output and right output. - :math:`o_i` and :math:`o_j`: the left output and right output.
Their dimension is one. Their dimension is one.
The simple usage: The example usage is:
.. code-block:: python .. code-block:: python
...@@ -5174,7 +5198,7 @@ def lambda_cost(input, ...@@ -5174,7 +5198,7 @@ def lambda_cost(input,
""" """
lambdaCost for lambdaRank LTR approach. lambdaCost for lambdaRank LTR approach.
The simple usage: The example usage is:
.. code-block:: python .. code-block:: python
...@@ -5232,6 +5256,8 @@ def cross_entropy(input, ...@@ -5232,6 +5256,8 @@ def cross_entropy(input,
""" """
A loss layer for multi class entropy. A loss layer for multi class entropy.
The example usage is:
.. code-block:: python .. code-block:: python
cost = cross_entropy(input=input_layer, cost = cross_entropy(input=input_layer,
...@@ -5278,6 +5304,8 @@ def cross_entropy_with_selfnorm(input, ...@@ -5278,6 +5304,8 @@ def cross_entropy_with_selfnorm(input,
A loss layer for multi class entropy with selfnorm. A loss layer for multi class entropy with selfnorm.
Input should be a vector of positive numbers, without normalization. Input should be a vector of positive numbers, without normalization.
The example usage is:
.. code-block:: python .. code-block:: python
cost = cross_entropy_with_selfnorm(input=input_layer, cost = cross_entropy_with_selfnorm(input=input_layer,
...@@ -5319,6 +5347,8 @@ def sum_cost(input, name=None, layer_attr=None): ...@@ -5319,6 +5347,8 @@ def sum_cost(input, name=None, layer_attr=None):
""" """
A loss layer which calculate the sum of the input as loss A loss layer which calculate the sum of the input as loss
The example usage is:
.. code-block:: python .. code-block:: python
cost = sum_cost(input=input_layer) cost = sum_cost(input=input_layer)
...@@ -5348,6 +5378,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -5348,6 +5378,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
""" """
A loss layer for huber loss. A loss layer for huber loss.
The example usage is:
.. code-block:: python .. code-block:: python
cost = huber_cost(input=input_layer, cost = huber_cost(input=input_layer,
...@@ -5388,6 +5420,8 @@ def multi_binary_label_cross_entropy(input, ...@@ -5388,6 +5420,8 @@ def multi_binary_label_cross_entropy(input,
""" """
A loss layer for multi binary label cross entropy. A loss layer for multi binary label cross entropy.
The example usage is:
.. code-block:: python .. code-block:: python
cost = multi_binary_label_cross_entropy(input=input_layer, cost = multi_binary_label_cross_entropy(input=input_layer,
...@@ -5447,6 +5481,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -5447,6 +5481,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
More details can be found by referring to `Fast R-CNN More details can be found by referring to `Fast R-CNN
<https://arxiv.org/pdf/1504.08083v2.pdf>`_ <https://arxiv.org/pdf/1504.08083v2.pdf>`_
The example usage is:
.. code-block:: python .. code-block:: python
cost = smooth_l1_cost(input=input_layer, cost = smooth_l1_cost(input=input_layer,
...@@ -5496,6 +5532,8 @@ def multiplex_layer(input, name=None, layer_attr=None): ...@@ -5496,6 +5532,8 @@ def multiplex_layer(input, name=None, layer_attr=None):
where, y is output. :math:`x_{k}` is the k-th input layer and where, y is output. :math:`x_{k}` is the k-th input layer and
:math:`k = x_{0}[i] + 1`. :math:`k = x_{0}[i] + 1`.
The example usage is:
.. code-block:: python .. code-block:: python
maxid = multiplex_layer(input=layers) maxid = multiplex_layer(input=layers)
...@@ -5528,3 +5566,155 @@ def multiplex_layer(input, name=None, layer_attr=None): ...@@ -5528,3 +5566,155 @@ def multiplex_layer(input, name=None, layer_attr=None):
layer_type=LayerType.MULTIPLEX_LAYER, layer_type=LayerType.MULTIPLEX_LAYER,
parents=input, parents=input,
size=l.config.size) size=l.config.size)
@wrap_name_default("dropout")
def dropout_layer(input, dropout_rate, name=None):
"""
@TODO(yuyang18): Add comments.
:param name:
:param input:
:param dropout_rate:
:return:
"""
return addto_layer(
name=name,
input=input,
act=LinearActivation(),
bias_attr=False,
layer_attr=ExtraAttr(drop_rate=dropout_rate))
@wrap_name_default()
@wrap_act_default(act=LinearActivation())
@wrap_param_attr_default()
@layer_support(DROPOUT)
def row_conv_layer(input,
context_len,
act=None,
name=None,
param_attr=None,
layer_attr=None):
"""
The row convolution is called lookahead convolution. It is firstly
introduced in paper of `Deep Speech 2: End-toEnd Speech Recognition
in English and Mandarin <https://arxiv.org/pdf/1512.02595v1.pdf>`_ .
The bidirectional RNN that learns representation for a sequence by
performing a forward and a backward pass through the entire sequence.
However, unlike unidirectional RNNs, bidirectional RNNs are challenging
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
\quad \text{for} \quad (1 \leq i \leq d)
Note:
The `context_len` is `k + 1`. That is to say, the lookahead step
number plus one equals context_len.
.. code-block:: python
row_conv = row_conv_layer(input=input_layer, context_len=3)
:param input: The input layer.
:type input: LayerOutput
:param context_len: The context length equals the lookahead step number
plus one.
:type context_len: int
:param act: Activation Type. Default is linear activation.
:type act: BaseActivation
:param param_attr: The Parameter Attribute. If None, the parameter will be
initialized smartly. It's better set it by yourself.
:type param_attr: ParameterAttribute
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput)
assert context_len > 0, "the context_len must be greatet than 0."
Layer(
inputs=[Input(input.name, **param_attr.attr)],
name=name,
context_length=context_len,
type=LayerType.ROW_CONV_LAYER,
active_type=act.name,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.ROW_CONV_LAYER, input, activation=act, size=input.size)
@layer_support()
@wrap_name_default()
@wrap_param_attr_default()
def prelu_layer(input,
name=None,
partial_sum=1,
param_attr=None,
layer_attr=None):
"""
The Parameter Relu activation that actives outputs with a learnable weight.
Reference:
Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf
.. math::
z_i &\\quad if \\quad z_i > 0 \\\\
a_i * z_i &\\quad \\mathrm{otherwise}
The example usage is:
.. code-block:: python
prelu = prelu_layer(input=layers, partial_sum=1)
:param name: Name of this layer.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput
:param partial_sum: this parameter makes a group of inputs share a same weight.
- partial_sum = 1, indicates the element-wise activation: each element has a weight.
- partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share a same weight.
- partial_sum = number of outputs, indicates all elements share a same weight.
:type partial_sum: int
:param param_attr: The parameter attribute. See ParameterAttribute for details.
:type param_attr: ParameterAttribute|None
:param layer_attr: Extra layer configurations. Default is None.
:type layer_attr: ExtraLayerAttribute|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input'
assert isinstance(param_attr, ParameterAttribute)
l = Layer(
name=name,
type=LayerType.PRELU,
inputs=Input(input.name, **param_attr.attr),
partial_sum=partial_sum,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name=name,
layer_type=LayerType.PRELU,
parents=input,
size=l.config.size)
...@@ -26,10 +26,10 @@ from paddle.trainer.config_parser import * ...@@ -26,10 +26,10 @@ from paddle.trainer.config_parser import *
__all__ = [ __all__ = [
'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
"img_conv_bn_pool", 'dropout_layer', 'lstmemory_group', 'lstmemory_unit', "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg',
'small_vgg', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru',
'simple_gru', 'simple_attention', 'simple_gru2', 'bidirectional_gru', 'simple_attention', 'simple_gru2', 'bidirectional_gru', 'text_conv_pool',
'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs' 'bidirectional_lstm', 'inputs', 'outputs'
] ]
###################################################### ######################################################
...@@ -1366,29 +1366,6 @@ def simple_attention(encoded_sequence, ...@@ -1366,29 +1366,6 @@ def simple_attention(encoded_sequence,
input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
############################################################################
# Miscs #
############################################################################
@wrap_name_default("dropout")
def dropout_layer(input, dropout_rate, name=None):
"""
@TODO(yuyang18): Add comments.
:param name:
:param input:
:param dropout_rate:
:return:
"""
return addto_layer(
name=name,
input=input,
act=LinearActivation(),
bias_attr=False,
layer_attr=ExtraAttr(drop_rate=dropout_rate))
def inputs(layers, *args): def inputs(layers, *args):
""" """
Declare the inputs of network. The order of input should be as same as Declare the inputs of network. The order of input should be as same as
......
...@@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid ...@@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer) test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__prelu_layer_0__"
type: "prelu"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
input_parameter_name: "___prelu_layer_0__.w0"
}
}
parameters {
name: "___prelu_layer_0__.w0"
size: 300
initial_mean: 0.0
initial_std: 0.057735026919
initial_strategy: 0
initial_smart: true
}
input_layer_names: "input"
output_layer_names: "__prelu_layer_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__prelu_layer_0__"
input_layer_names: "input"
output_layer_names: "__prelu_layer_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "data"
type: "data"
size: 2560
active_type: ""
}
layers {
name: "__row_conv_layer_0__"
type: "maxout"
size: 2560
active_type: "relu"
inputs {
input_layer_name: "data"
input_parameter_name: "___row_conv_layer_0__.w0"
row_conv_conf {
context_length: 19
}
}
}
parameters {
name: "___row_conv_layer_0__.w0"
size: 48640
initial_mean: 0.0
initial_std: 0.229415733871
dims: 19
dims: 2560
initial_strategy: 0
initial_smart: true
}
input_layer_names: "data"
output_layer_names: "__row_conv_layer_0__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__row_conv_layer_0__"
input_layer_names: "data"
output_layer_names: "__row_conv_layer_0__"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
prelu = prelu_layer(input=data)
outputs(prelu)
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
data = data_layer(name='data', size=2560)
row_conv = row_conv_layer(input=data, context_len=19, act=ReluActivation())
outputs(row_conv)
...@@ -12,7 +12,7 @@ from paddle.trainer.config_parser import logger ...@@ -12,7 +12,7 @@ from paddle.trainer.config_parser import logger
try: try:
import cv2 import cv2
except ImportError: except ImportError:
logger.warning("OpenCV2 is not installed, using PIL to prcoess") logger.warning("OpenCV2 is not installed, using PIL to process")
cv2 = None cv2 = None
__all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"] __all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"]
......
...@@ -26,6 +26,7 @@ import evaluator ...@@ -26,6 +26,7 @@ import evaluator
from . import dataset from . import dataset
from . import reader from . import reader
from . import plot from . import plot
from . import master
import attr import attr
import op import op
import pooling import pooling
...@@ -37,9 +38,26 @@ import plot ...@@ -37,9 +38,26 @@ import plot
import image import image
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer',
'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', 'layer',
'topology', 'networks', 'infer', 'plot', 'evaluator', 'image' 'activation',
'parameters',
'init',
'trainer',
'event',
'data_type',
'attr',
'pooling',
'data_feeder',
'dataset',
'reader',
'topology',
'networks',
'infer',
'plot',
'evaluator',
'image',
'master',
] ]
......
...@@ -149,3 +149,57 @@ def cluster_files_reader(files_pattern, ...@@ -149,3 +149,57 @@ def cluster_files_reader(files_pattern,
yield line yield line
return reader return reader
def convert(output_path,
reader,
num_shards,
name_prefix,
max_lines_to_shuffle=1000):
import recordio
import cPickle as pickle
import random
"""
Convert data from reader to recordio format files.
:param output_path: directory in which output files will be saved.
:param reader: a data reader, from which the convert program will read data instances.
:param num_shards: the number of shards that the dataset will be partitioned into.
:param name_prefix: the name prefix of generated files.
:param max_lines_to_shuffle: the max lines numbers to shuffle before writing.
"""
assert num_shards >= 1
assert max_lines_to_shuffle >= 1
def open_writers():
w = []
for i in range(0, num_shards):
n = "%s/%s-%05d-of-%05d" % (output_path, name_prefix, i,
num_shards - 1)
w.append(recordio.writer(n))
return w
def close_writers(w):
for i in range(0, num_shards):
w[i].close()
def write_data(w, lines):
random.shuffle(lines)
for i, d in enumerate(lines):
d = pickle.dumps(d, pickle.HIGHEST_PROTOCOL)
w[i % num_shards].write(d)
w = open_writers()
lines = []
for i, d in enumerate(reader()):
lines.append(d)
if i % max_lines_to_shuffle == 0 and i >= max_lines_to_shuffle:
write_data(w, lines)
lines = []
continue
write_data(w, lines)
close_writers(w)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module will download dataset from
http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html
and parse train/test set intopaddle reader creators.
This set contains images of flowers belonging to 102 different categories.
The images were acquired by searching the web and taking pictures. There are a
minimum of 40 images for each category.
The database was used in:
Nilsback, M-E. and Zisserman, A. Automated flower classification over a large
number of classes.Proceedings of the Indian Conference on Computer Vision,
Graphics and Image Processing (2008)
http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
"""
import cPickle
import itertools
from common import download
import tarfile
import scipy.io as scio
from paddle.v2.image import *
import os
import numpy as np
import paddle.v2 as paddle
from multiprocessing import cpu_count
__all__ = ['train', 'test', 'valid']
DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz'
LABEL_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat'
SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat'
DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
def default_mapper(sample):
'''
map image bytes data to type needed by model input layer
'''
img, label = sample
img = paddle.image.load_image_bytes(img)
img = paddle.image.simple_transform(img, 256, 224, True)
return img.flatten().astype('float32'), label
def reader_creator(data_file,
label_file,
setid_file,
dataset_name,
mapper=default_mapper,
buffered_size=1024):
'''
1. read images from tar file and
merge images into batch files in 102flowers.tgz_batch/
2. get a reader to read sample from batch file
:param data_file: downloaded data file
:type data_file: string
:param label_file: downloaded label file
:type label_file: string
:param setid_file: downloaded setid file containing information
about how to split dataset
:type setid_file: string
:param dataset_name: data set name (tstid|trnid|valid)
:type dataset_name: string
:param mapper: a function to map image bytes data to type
needed by model input layer
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: data reader
:rtype: callable
'''
labels = scio.loadmat(label_file)['labels'][0]
indexes = scio.loadmat(setid_file)[dataset_name][0]
img2label = {}
for i in indexes:
img = "jpg/image_%05d.jpg" % i
img2label[img] = labels[i - 1]
file_list = batch_images_from_tar(data_file, dataset_name, img2label)
def reader():
for file in open(file_list):
file = file.strip()
batch = None
with open(file, 'r') as f:
batch = cPickle.load(f)
data = batch['data']
labels = batch['label']
for sample, label in itertools.izip(data, batch['label']):
yield sample, int(label)
return paddle.reader.xmap_readers(mapper, reader,
cpu_count(), buffered_size)
def train(mapper=default_mapper, buffered_size=1024):
'''
Create flowers training set reader.
It returns a reader, each sample in the reader is
image pixels in [0, 1] and label in [1, 102]
translated from original color image by steps:
1. resize to 256*256
2. random crop to 224*224
3. flatten
:param mapper: a function to map sample.
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: train data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), 'trnid', mapper,
buffered_size)
def test(mapper=default_mapper, buffered_size=1024):
'''
Create flowers test set reader.
It returns a reader, each sample in the reader is
image pixels in [0, 1] and label in [1, 102]
translated from original color image by steps:
1. resize to 256*256
2. random crop to 224*224
3. flatten
:param mapper: a function to map sample.
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: test data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), 'tstid', mapper,
buffered_size)
def valid(mapper=default_mapper, buffered_size=1024):
'''
Create flowers validation set reader.
It returns a reader, each sample in the reader is
image pixels in [0, 1] and label in [1, 102]
translated from original color image by steps:
1. resize to 256*256
2. random crop to 224*224
3. flatten
:param mapper: a function to map sample.
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: test data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), 'valid', mapper,
buffered_size)
def fetch():
download(DATA_URL, 'flowers', DATA_MD5)
download(LABEL_URL, 'flowers', LABEL_MD5)
download(SETID_URL, 'flowers', SETID_MD5)
...@@ -57,6 +57,38 @@ class TestCommon(unittest.TestCase): ...@@ -57,6 +57,38 @@ class TestCommon(unittest.TestCase):
for idx, e in enumerate(reader()): for idx, e in enumerate(reader()):
self.assertEqual(e, str("0")) self.assertEqual(e, str("0"))
def test_convert(self):
record_num = 10
num_shards = 4
def test_reader():
def reader():
for x in xrange(record_num):
yield x
return reader
path = tempfile.mkdtemp()
paddle.v2.dataset.common.convert(path,
test_reader(), num_shards,
'random_images')
files = glob.glob(path + '/random_images-*')
self.assertEqual(len(files), num_shards)
recs = []
for i in range(0, num_shards):
n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1)
r = recordio.reader(n)
while True:
d = r.read()
if d is None:
break
recs.append(d)
recs.sort()
self.assertEqual(total, record_num)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -14,26 +11,41 @@ ...@@ -14,26 +11,41 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Print model parameters in last model
Usage:
python evaluate_model.py
"""
import numpy as np
import os
def load(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
def main(): import paddle.v2.dataset.flowers
print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'), import unittest
load('output/pass-00029/b'))
class TestFlowers(unittest.TestCase):
def check_reader(self, reader):
sum = 0
label = 0
size = 224 * 224 * 3
for l in reader():
self.assertEqual(l[0].size, size)
if l[1] > label:
label = l[1]
sum += 1
return sum, label
def test_train(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.train())
self.assertEqual(instances, 1020)
self.assertEqual(max_label_value, 102)
def test_test(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.test())
self.assertEqual(instances, 6149)
self.assertEqual(max_label_value, 102)
def test_valid(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.valid())
self.assertEqual(instances, 1020)
self.assertEqual(max_label_value, 102)
if __name__ == '__main__': if __name__ == '__main__':
main() unittest.main()
import numpy as np import numpy as np
try: try:
import cv2 import cv2
except: except ImportError:
print( cv2 = None
"import cv2 error, please install opencv-python: pip install opencv-python" import os
) import tarfile
import cPickle
__all__ = [ __all__ = [
"load_image", "resize_short", "to_chw", "center_crop", "random_crop", "load_image_bytes", "load_image", "resize_short", "to_chw", "center_crop",
"left_right_flip", "simple_transform", "load_and_transform" "random_crop", "left_right_flip", "simple_transform", "load_and_transform",
"batch_images_from_tar"
] ]
""" """
This file contains some common interfaces for image preprocess. This file contains some common interfaces for image preprocess.
...@@ -28,6 +30,90 @@ the image layout as follows. ...@@ -28,6 +30,90 @@ the image layout as follows.
""" """
def batch_images_from_tar(data_file,
dataset_name,
img2label,
num_per_batch=1024):
"""
Read images from tar file and batch them into batch file.
param data_file: path of image tar file
type data_file: string
param dataset_name: 'train','test' or 'valid'
type dataset_name: string
param img2label: a dic with image file name as key
and image's label as value
type img2label: dic
param num_per_batch: image number per batch file
type num_per_batch: int
return: path of list file containing paths of batch file
rtype: string
"""
batch_dir = data_file + "_batch"
out_path = "%s/%s" % (batch_dir, dataset_name)
meta_file = "%s/%s.txt" % (batch_dir, dataset_name)
if os.path.exists(out_path):
return meta_file
else:
os.makedirs(out_path)
tf = tarfile.open(data_file)
mems = tf.getmembers()
data = []
labels = []
file_id = 0
for mem in mems:
if mem.name in img2label:
data.append(tf.extractfile(mem).read())
labels.append(img2label[mem.name])
if len(data) == num_per_batch:
output = {}
output['label'] = labels
output['data'] = data
cPickle.dump(
output,
open('%s/batch_%d' % (out_path, file_id), 'w'),
protocol=cPickle.HIGHEST_PROTOCOL)
file_id += 1
data = []
labels = []
if len(data) > 0:
output = {}
output['label'] = labels
output['data'] = data
cPickle.dump(
output,
open('%s/batch_%d' % (out_path, file_id), 'w'),
protocol=cPickle.HIGHEST_PROTOCOL)
with open(meta_file, 'a') as meta:
for file in os.listdir(out_path):
meta.write(os.path.abspath("%s/%s" % (out_path, file)) + "\n")
return meta_file
def load_image_bytes(bytes, is_color=True):
"""
Load an color or gray image from bytes array.
Example usage:
.. code-block:: python
with open('cat.jpg') as f:
im = load_image_bytes(f.read())
:param bytes: the input image bytes array.
:type file: str
:param is_color: If set is_color True, it will load and
return a color image. Otherwise, it will
load and return a gray image.
"""
flag = 1 if is_color else 0
file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
img = cv2.imdecode(file_bytes, flag)
return img
def load_image(file, is_color=True): def load_image(file, is_color=True):
""" """
Load an color or gray image from the file path. Load an color or gray image from the file path.
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
""" """
`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2, `paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
we want to make Paddle a plain Python package. The model config package defined we want to make Paddle a plain Python package. The model config package defines
the way how to configure a neural network topology in Paddle Python code. the way how to configure a neural network topology in Paddle Python code.
The primary usage shows below. The primary usage shows below.
...@@ -30,7 +30,6 @@ The primary usage shows below. ...@@ -30,7 +30,6 @@ The primary usage shows below.
# use prediction instance where needed. # use prediction instance where needed.
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
""" """
import collections import collections
import copy import copy
import re import re
...@@ -44,9 +43,10 @@ __all__ = ['data', 'parse_network'] ...@@ -44,9 +43,10 @@ __all__ = ['data', 'parse_network']
def __need_to_keep__(name): def __need_to_keep__(name):
if name in ['StaticInput', 'LayerType', 'layer_support']: return name in [
return False 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType',
return True 'layer_support'
]
def __need_to_wrap__(name): def __need_to_wrap__(name):
...@@ -54,6 +54,8 @@ def __need_to_wrap__(name): ...@@ -54,6 +54,8 @@ def __need_to_wrap__(name):
def __convert_name__(inname): def __convert_name__(inname):
if __need_to_keep__(inname):
return inname
if inname == 'maxid_layer': if inname == 'maxid_layer':
return 'max_id' return 'max_id'
elif inname.endswith('memory') or inname.endswith( elif inname.endswith('memory') or inname.endswith(
...@@ -74,8 +76,6 @@ def __convert_name__(inname): ...@@ -74,8 +76,6 @@ def __convert_name__(inname):
for name in v1_layers.__all__: for name in v1_layers.__all__:
obj = getattr(v1_layers, name) obj = getattr(v1_layers, name)
if not __need_to_keep__(name):
continue
new_name = __convert_name__(name) new_name = __convert_name__(name)
if callable(obj) and __need_to_wrap__(name): if callable(obj) and __need_to_wrap__(name):
globals()[new_name] = __convert_to_v2__(obj, new_name, __name__) globals()[new_name] = __convert_to_v2__(obj, new_name, __name__)
...@@ -107,7 +107,7 @@ __data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__) ...@@ -107,7 +107,7 @@ __data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__)
data = __convert_to_v2__(__data_layer__, 'name', __name__) data = __convert_to_v2__(__data_layer__, 'name', __name__)
def __get_used_layers__(output_layers, extra_layers=None): def __get_used_layers__(output_layers):
layer_names = set() layer_names = set()
parents = {} parents = {}
...@@ -132,6 +132,13 @@ def __get_used_layers__(output_layers, extra_layers=None): ...@@ -132,6 +132,13 @@ def __get_used_layers__(output_layers, extra_layers=None):
add_parent(mem.layer_name, mem.boot_layer_name) add_parent(mem.layer_name, mem.boot_layer_name)
add_parent(mem.link_name, mem.layer_name) add_parent(mem.link_name, mem.layer_name)
if sub_model.HasField('generator'):
# according to the implementation of text generation
# in recurrent layer group, the generated word must be
# the first out link
add_parent(sub_model.out_links[0].layer_name,
sub_model.generator.eos_layer_name)
def dfs_travel(layer_name): def dfs_travel(layer_name):
if layer_name in layer_names: if layer_name in layer_names:
return return
...@@ -149,6 +156,20 @@ def __get_used_layers__(output_layers, extra_layers=None): ...@@ -149,6 +156,20 @@ def __get_used_layers__(output_layers, extra_layers=None):
for layer in output_layers: for layer in output_layers:
dfs_travel(layer.full_name) dfs_travel(layer.full_name)
# print layer needs to be specially handled because no other
# layer depends on it. It is used to print the result of some
# layers when running the model for debug purpose. So we explicitly
# add a print layer to the topolty if its input is in the toplogy.
for layer in cp.g_config.model_config.layers:
if layer.type == 'print':
used = True
for inp in layer.inputs:
if inp.input_layer_name not in layer_names:
used = False
break
if used:
layer_names.add(layer.name)
return layer_names return layer_names
...@@ -233,8 +254,8 @@ def __trim_submodel__(old_submodel, layer_names, input_layer_names, ...@@ -233,8 +254,8 @@ def __trim_submodel__(old_submodel, layer_names, input_layer_names,
def parse_network(output_layers, extra_layers=None): def parse_network(output_layers, extra_layers=None):
if not isinstance(output_layers, collections.Sequence): if not isinstance(output_layers, collections.Sequence):
output_layers = [output_layers] output_layers = [output_layers]
if extra_layers is not None and not isinstance(extra_layers, if extra_layers is not None:
collections.Sequence): if not isinstance(extra_layers, collections.Sequence):
extra_layers = [extra_layers] extra_layers = [extra_layers]
else: else:
extra_layers = [] extra_layers = []
...@@ -248,18 +269,29 @@ def parse_network(output_layers, extra_layers=None): ...@@ -248,18 +269,29 @@ def parse_network(output_layers, extra_layers=None):
model_config = ModelConfig() model_config = ModelConfig()
model_config.type = cp.g_config.model_config.type model_config.type = cp.g_config.model_config.type
for layer in output_layers:
model_config.output_layer_names.append(layer.full_name)
output_layer_names.add(layer.full_name)
for l in cp.g_config.model_config.layers: for l in cp.g_config.model_config.layers:
if l.name not in layer_names: if l.name not in layer_names:
continue continue
model_config.layers.extend([l]) model_config.layers.extend([l])
if l.type == 'data': if l.type == 'data':
if l.name in model_config.output_layer_names:
"""
In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This
data_layer is sure to be the output of the network in text
generation task, so this statement excludes such a special
data_layer from being inputs of the network, otherwise an error
will occur during data feeding.
"""
continue
model_config.input_layer_names.append(l.name) model_config.input_layer_names.append(l.name)
input_layer_names.add(l.name) input_layer_names.add(l.name)
for layer in output_layers:
model_config.output_layer_names.append(layer.full_name)
output_layer_names.add(layer.full_name)
for e in cp.g_config.model_config.evaluators: for e in cp.g_config.model_config.evaluators:
if e.name in evaluator_names: if e.name in evaluator_names:
model_config.evaluators.extend([e]) model_config.evaluators.extend([e])
......
from client import *
__all__ = ['client']
import ctypes
import os
path = os.path.join(os.path.dirname(__file__), "libpaddle_master.so")
lib = ctypes.cdll.LoadLibrary(path)
class client(object):
"""
client is a client to the master server.
"""
def __init__(self, addr, buf_size):
self.c = lib.paddle_new_master_client(addr, buf_size)
def close(self):
lib.paddle_release_master_client(self.c)
self.c = None
def set_dataset(self, paths):
holder_type = ctypes.c_char_p * len(paths)
holder = holder_type()
print paths
for idx, path in enumerate(paths):
c_ptr = ctypes.c_char_p(path)
holder[idx] = c_ptr
lib.paddle_set_dataset(self.c, holder, len(paths))
def next_record(self):
p = ctypes.c_char_p()
ret = ctypes.pointer(p)
size = lib.paddle_next_record(self.c, ret)
if size == 0:
# Empty record
return ""
record = ret.contents.value[:size]
# Memory created from C should be freed.
lib.mem_free(ret.contents)
return record
...@@ -45,7 +45,12 @@ class Optimizer(object): ...@@ -45,7 +45,12 @@ class Optimizer(object):
return swig_api.ParameterUpdater.createRemoteUpdater( return swig_api.ParameterUpdater.createRemoteUpdater(
self.__opt_conf__, pass_num, use_sparse_updater) self.__opt_conf__, pass_num, use_sparse_updater)
def create_updater(self, is_local, num_passes, use_sparse_updater): def __create_new_remote_updater__(self, pserver_spec):
return swig_api.ParameterUpdater.createNewRemoteUpdater(
self.__opt_conf__, pserver_spec)
def create_updater(self, is_local, num_passes, use_sparse_updater,
pserver_spec):
""" """
create proper parameter_updater by configuration. create proper parameter_updater by configuration.
:param is_local: create local or remote parameter updater :param is_local: create local or remote parameter updater
...@@ -64,8 +69,12 @@ class Optimizer(object): ...@@ -64,8 +69,12 @@ class Optimizer(object):
if is_local: if is_local:
parameter_updater = self.__create_local_updater__() parameter_updater = self.__create_local_updater__()
else: else:
if pserver_spec is None:
parameter_updater = self.__create_remote_updater__( parameter_updater = self.__create_remote_updater__(
num_passes, use_sparse_updater) num_passes, use_sparse_updater)
else:
parameter_updater = self.__create_new_remote_updater__(
pserver_spec)
return parameter_updater return parameter_updater
......
import numpy as np import numpy as np
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import paddle.trainer.config_parser as cp
import struct import struct
import tarfile import tarfile
import cStringIO import cStringIO
...@@ -18,8 +19,11 @@ def create(layers): ...@@ -18,8 +19,11 @@ def create(layers):
""" """
topology = Topology(layers) topology = Topology(layers)
pool = Parameters() pool = Parameters()
initializers = cp.g_parameter_initializer_map
for param in topology.proto().parameters: for param in topology.proto().parameters:
pool.__append_config__(param) pool.__append_config__(param)
if param.name in initializers:
pool[param.name] = initializers[param.name](param.name)
return pool return pool
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
__all__ = [ __all__ = [
'map_readers', 'buffered', 'compose', 'chain', 'shuffle', 'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
'ComposeNotAligned', 'firstn' 'ComposeNotAligned', 'firstn', 'xmap_readers'
] ]
import itertools import itertools
...@@ -224,3 +224,74 @@ def firstn(reader, n): ...@@ -224,3 +224,74 @@ def firstn(reader, n):
yield item yield item
return firstn_reader return firstn_reader
class XmapEndSignal():
pass
def xmap_readers(mapper, reader, process_num, buffer_size):
"""
Use multiprocess to map samples from reader by a mapper defined by user.
And this function contains a buffered decorator.
:param mapper: a function to map sample.
:type mapper: callable
:param reader: the data reader to read from
:type reader: callable
:param process_num: process number to handle original sample
:type process_num: int
:param buffer_size: max buffer size
:type buffer_size: int
:return: the decarated reader
:rtype: callable
"""
end = XmapEndSignal()
in_queue = Queue(buffer_size)
out_queue = Queue(buffer_size)
# define a worker to read samples from reader to in_queue
def read_worker(reader, in_queue):
for i in reader():
in_queue.put(i)
in_queue.put(end)
# start a read worker in a thread
t = Thread(target=read_worker, args=(reader, in_queue))
t.daemon = True
t.start()
# define a worker to handle samples from in_queue by mapper
# and put mapped samples into out_queue
def handle_worker(in_queue, out_queue, mapper):
sample = in_queue.get()
while not isinstance(sample, XmapEndSignal):
r = mapper(sample)
out_queue.put(r)
sample = in_queue.get()
in_queue.put(end)
out_queue.put(end)
# start several handle_workers
workers = []
for i in xrange(process_num):
worker = Thread(
target=handle_worker, args=(in_queue, out_queue, mapper))
worker.daemon = True
workers.append(worker)
for w in workers:
w.start()
def xreader():
sample = out_queue.get()
while not isinstance(sample, XmapEndSignal):
yield sample
sample = out_queue.get()
finish = 1
while finish < process_num:
sample = out_queue.get()
if isinstance(sample, XmapEndSignal):
finish += 1
else:
yield sample
return xreader
...@@ -164,6 +164,7 @@ class OtherLayerTest(unittest.TestCase): ...@@ -164,6 +164,7 @@ class OtherLayerTest(unittest.TestCase):
maxid = layer.max_id(input=inference) maxid = layer.max_id(input=inference)
sampling_id = layer.sampling_id(input=inference) sampling_id = layer.sampling_id(input=inference)
eos = layer.eos(input=maxid, eos_id=5) eos = layer.eos(input=maxid, eos_id=5)
layer.printer(maxid)
print layer.parse_network([maxid, sampling_id, eos]) print layer.parse_network([maxid, sampling_id, eos])
def test_slicing_joining_layer(self): def test_slicing_joining_layer(self):
......
...@@ -11,6 +11,9 @@ except ImportError: ...@@ -11,6 +11,9 @@ except ImportError:
sys.exit(0) sys.exit(0)
import paddle.v2.parameters as parameters import paddle.v2.parameters as parameters
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
from paddle.v2.attr import ParamAttr
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import random import random
import cStringIO import cStringIO
...@@ -55,6 +58,25 @@ class TestParameters(unittest.TestCase): ...@@ -55,6 +58,25 @@ class TestParameters(unittest.TestCase):
p1 = params_dup.get(name) p1 = params_dup.get(name)
self.assertTrue(numpy.isclose(p0, p1).all()) self.assertTrue(numpy.isclose(p0, p1).all())
def test_initializer(self):
def initializer(name):
assert name == "fc.w"
mat = numpy.ones((3, 2), dtype=numpy.float32)
mat[1, 1] = 2
return mat
x = layer.data(name="x", type=data_type.dense_vector(3))
y = layer.fc(x,
size=2,
bias_attr=False,
param_attr=ParamAttr(
name="fc.w", initializer=initializer))
params = parameters.create(y)
val = params["fc.w"]
assert val.shape == (3, 2)
expected = numpy.array([[1, 1], [1, 2], [1, 1]], numpy.float32)
assert numpy.logical_and.reduce(numpy.reshape(val == expected, 6))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -31,7 +31,6 @@ class Topology(object): ...@@ -31,7 +31,6 @@ class Topology(object):
def __init__(self, layers, extra_layers=None): def __init__(self, layers, extra_layers=None):
def __check__(layers): def __check__(layers):
if not isinstance(layers, collections.Sequence): if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers] layers = [layers]
for layer in layers: for layer in layers:
__check_layer_type__(layer) __check_layer_type__(layer)
...@@ -91,6 +90,7 @@ class Topology(object): ...@@ -91,6 +90,7 @@ class Topology(object):
[('image', dense_vector(768)), ('label', integer_value(10))] [('image', dense_vector(768)), ('label', integer_value(10))]
""" """
data_layers = self.data_layers() data_layers = self.data_layers()
return [(nm, data_layers[nm].data_type) return [(nm, data_layers[nm].data_type)
for nm in self.proto().input_layer_names] for nm in self.proto().input_layer_names]
......
...@@ -49,7 +49,8 @@ class SGD(object): ...@@ -49,7 +49,8 @@ class SGD(object):
parameters, parameters,
update_equation, update_equation,
extra_layers=None, extra_layers=None,
is_local=True): is_local=True,
pserver_spec=None):
if not isinstance(parameters, v2_parameters.Parameters): if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters') raise TypeError('parameters should be parameters')
...@@ -63,6 +64,7 @@ class SGD(object): ...@@ -63,6 +64,7 @@ class SGD(object):
self.__parameters__ = parameters self.__parameters__ = parameters
self.__topology_in_proto__ = topology.proto() self.__topology_in_proto__ = topology.proto()
self.__is_local__ = is_local self.__is_local__ = is_local
self.__pserver_spec__ = pserver_spec
self.__use_sparse_updater__ = self.__topology__.use_sparse_updater() self.__use_sparse_updater__ = self.__topology__.use_sparse_updater()
# # In local mode, disable sparse_remote_update. # # In local mode, disable sparse_remote_update.
...@@ -126,7 +128,8 @@ class SGD(object): ...@@ -126,7 +128,8 @@ class SGD(object):
__check_train_args__(**locals()) __check_train_args__(**locals())
self.__parameter_updater__ = self.__optimizer__.create_updater( self.__parameter_updater__ = self.__optimizer__.create_updater(
self.__is_local__, num_passes, self.__use_sparse_updater__) self.__is_local__, num_passes, self.__use_sparse_updater__,
self.__pserver_spec__)
self.__parameter_updater__.init(self.__gradient_machine__) self.__parameter_updater__.init(self.__gradient_machine__)
self.__gradient_machine__.start() self.__gradient_machine__.start()
......
from setuptools import setup from setuptools import setup
packages=['paddle', packages=['paddle',
'paddle.proto', 'paddle.proto',
'paddle.trainer', 'paddle.trainer',
...@@ -9,21 +8,25 @@ packages=['paddle', ...@@ -9,21 +8,25 @@ packages=['paddle',
'paddle.v2', 'paddle.v2',
'paddle.v2.dataset', 'paddle.v2.dataset',
'paddle.v2.reader', 'paddle.v2.reader',
'paddle.v2.plot'] 'paddle.v2.plot',
'paddle.v2.master']
setup_requires=["requests",
"numpy",
"protobuf==3.1",
"matplotlib",
"rarfile"]
if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
setup_requires+=["opencv-python"]
setup(name='paddle', setup(name='paddle',
version='${PADDLE_VERSION}', version='${PADDLE_VERSION}',
description='Parallel Distributed Deep Learning', description='Parallel Distributed Deep Learning',
install_requires=[ install_requires=setup_requires,
"requests",
"numpy",
"protobuf==${PROTOBUF_VERSION}",
"matplotlib",
"opencv-python",
"rarfile"
],
packages=packages, packages=packages,
package_data={'paddle.v2.master': ['libpaddle_master.so'], },
package_dir={ package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}' '': '${CMAKE_CURRENT_SOURCE_DIR}'
} },
) )
The examples in v1_api_demo are using v1_api now, and will be upgraded into v2_api later.
Thus, v1_api_demo is a temporary directory. We decide not to maintain it and will delete it in future.
Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and
[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
#Variational Autoencoder (VAE)
This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
In order to run the model, first download the MNIST dataset by running the shell script in ./data.
Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).
$python vae_train.py [--use_gpu 1]
The generated images will be stored in ./samples/
The corresponding models will be stored in ./params/
#!/usr/bin/env sh
# This script downloads the mnist data and unzips it.
set -e
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
rm -rf "$DIR/mnist_data"
mkdir "$DIR/mnist_data"
cd "$DIR/mnist_data"
echo "Downloading..."
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
do
if [ ! -e $fname ]; then
wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
gunzip ${fname}.gz
fi
done
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -13,39 +12,49 @@ ...@@ -13,39 +12,49 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
set -e import numpy as np
set -x
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
#download the dataset class MNISTloader():
echo "Downloading aclImdb..." def __init__(self,
#http://ai.stanford.edu/%7Eamaas/data/sentiment/ data_path="./data/mnist_data/",
wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz batch_size=60,
process='train'):
self.batch_size = batch_size
self.data_path = data_path
self._pointer = 0
self.image_batches = np.array([])
self.process = process
echo "Downloading mosesdecoder..." def _extract_images(self, filename, n):
#https://github.com/moses-smt/mosesdecoder f = open(filename, 'rb')
wget https://github.com/moses-smt/mosesdecoder/archive/master.zip f.read(16)
data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
#Mapping data into [-1, 1]
data = data / 255. * 2. - 1
data_batches = np.split(data, 60000 / self.batch_size, 0)
#extract package f.close()
echo "Unzipping..."
tar -zxvf aclImdb_v1.tar.gz
unzip master.zip
#move train and test set to imdb_data directory return data_batches
#in order to process when traing
mkdir -p imdb/train
mkdir -p imdb/test
cp -r aclImdb/train/pos/ imdb/train/pos @property
cp -r aclImdb/train/neg/ imdb/train/neg def pointer(self):
return self._pointer
cp -r aclImdb/test/pos/ imdb/test/pos def load_data(self):
cp -r aclImdb/test/neg/ imdb/test/neg TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
#remove compressed package if self.process == 'train':
rm aclImdb_v1.tar.gz self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
rm master.zip else:
self.image_batches = self._extract_images(TEST_IMAGES, 10000)
echo "Done." def next_batch(self):
batch = self.image_batches[self._pointer]
self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
return np.array(batch)
def reset_pointer(self):
self._pointer = 0
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
import numpy as np
is_generating = get_config_arg("is_generating", bool, False)
settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
X_dim = 28 * 28
h_dim = 128
z_dim = 100
def reparameterization(mu, logvar):
eps = ParamAttr(initial_mean=0., initial_std=1)
with mixed_layer() as sigma:
sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
return mu + sigma
def q_func(X):
"""
xavier initialization
"""
param_attr = ParamAttr(
name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
mu_param = ParamAttr(
name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
logvar_param = ParamAttr(
name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
share_layer = fc_layer(
X,
size=h_dim,
param_attr=param_attr,
bias_attr=bias_attr,
act=ReluActivation())
return (fc_layer(
share_layer,
size=z_dim,
param_attr=mu_param,
bias_attr=mu_bias,
act=LinearActivation()), fc_layer(
share_layer,
size=z_dim,
param_attr=logvar_param,
bias_attr=logvar_bias,
act=LinearActivation()))
def generator(z):
hidden_param = ParamAttr(
name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
prob_param = ParamAttr(
name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
hidden_layer = fc_layer(
z,
size=h_dim,
act=ReluActivation(),
param_attr=hidden_param,
bias_attr=hidden_bias)
prob = fc_layer(
hidden_layer,
size=X_dim,
act=SigmoidActivation(),
param_attr=prob_param,
bias_attr=prob_bias)
return prob
def reconstruct_error(prob, X):
cost = multi_binary_label_cross_entropy(input=prob, label=X)
return cost
def KL_loss(mu, logvar):
with mixed_layer() as mu_square:
mu_square += dotmul_operator(mu, mu, scale=1.)
cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
return cost
if not is_generating:
x_batch = data_layer(name='x_batch', size=X_dim)
mu, logvar = q_func(x_batch)
z_samples = reparameterization(mu, logvar)
prob = generator(z_samples)
outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
else:
z_samples = data_layer(name='noise', size=z_dim)
outputs(generator(z_samples))
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import random
import numpy as np
import cPickle
import sys, os
from PIL import Image
from paddle.trainer.config_parser import parse_config
from paddle.trainer.config_parser import logger
import py_paddle.swig_paddle as api
import dataloader
import matplotlib.pyplot as plt
def plot_samples(samples):
fig = plt.figure(figsize=(4, 4))
gs = gridspec.GridSpec(4, 4)
gs.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
plt.subplot(gs[i])
plt.axis('off')
plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
return fig
def CHECK_EQ(a, b):
assert a == b, "a=%s, b=%s" % (a, b)
def get_fake_samples(generator_machine, batch_size, noise):
gen_inputs = api.Arguments.createArguments(1)
gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
gen_outputs = api.Arguments.createArguments(0)
generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
return fake_samples
def copy_shared_parameters(src, dst):
'''
copy the parameters from src to dst
:param src: the source of the parameters
:type src: GradientMachine
:param dst: the destination of the parameters
:type dst: GradientMachine
'''
src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
src_params = dict([(p.getName(), p) for p in src_params])
for i in xrange(dst.getParameterSize()):
dst_param = dst.getParameter(i)
src_param = src_params.get(dst_param.getName(), None)
if src_param is None:
continue
src_value = src_param.getBuf(api.PARAMETER_VALUE)
dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
CHECK_EQ(len(src_value), len(dst_value))
dst_value.copyFrom(src_value)
dst_param.setValueUpdated()
def find(iterable, cond):
for item in iterable:
if cond(item):
return item
return None
def get_layer_size(model_conf, layer_name):
layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
return layer_conf.size
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--use_gpu", default="1", help="1 means use gpu for training")
parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
args = parser.parse_args()
use_gpu = args.use_gpu
assert use_gpu in ["0", "1"]
if not os.path.exists("./samples/"):
os.makedirs("./samples/")
if not os.path.exists("./params/"):
os.makedirs("./params/")
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
'--log_period=1000', '--gpu_id=' + args.gpu_id,
'--save_dir=' + "./params/")
conf = "vae_conf.py"
trainer_conf = parse_config(conf, "is_generating=False")
gener_conf = parse_config(conf, "is_generating=True")
batch_size = trainer_conf.opt_config.batch_size
noise_dim = get_layer_size(gener_conf.model_config, "noise")
mnist = dataloader.MNISTloader(batch_size=batch_size)
mnist.load_data()
training_machine = api.GradientMachine.createFromConfigProto(
trainer_conf.model_config)
generator_machine = api.GradientMachine.createFromConfigProto(
gener_conf.model_config)
trainer = api.Trainer.create(trainer_conf, training_machine)
trainer.startTrain()
for train_pass in xrange(100):
trainer.startTrainPass()
mnist.reset_pointer()
i = 0
it = 0
while mnist.pointer != 0 or i == 0:
X = mnist.next_batch().astype('float32')
inputs = api.Arguments.createArguments(1)
inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
trainer.trainOneDataBatch(batch_size, inputs)
if it % 1000 == 0:
outputs = api.Arguments.createArguments(0)
training_machine.forward(inputs, outputs, api.PASS_TEST)
loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
print "\niter: {}".format(str(it).zfill(3))
print "VAE loss: {}".format(str(loss).zfill(3))
#Sync parameters between networks (GradientMachine) at the beginning
copy_shared_parameters(training_machine, generator_machine)
z_samples = np.random.randn(batch_size,
noise_dim).astype('float32')
samples = get_fake_samples(generator_machine, batch_size,
z_samples)
#Generating the first 16 images for a picture.
figure = plot_samples(samples[:16])
plt.savefig(
"./samples/{}_{}.png".format(
str(train_pass).zfill(3), str(i).zfill(3)),
bbox_inches='tight')
plt.close(figure)
i += 1
it += 1
trainer.finishTrainPass()
trainer.finishTrain()
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册