diff --git a/tsm/README.md b/tsm/README.md
index 68e7c37b012ddb863119d52095390bc240bac3a1..bb5d148376bc0c1dd8c610d89e378b6b1fbcf1d1 100644
--- a/tsm/README.md
+++ b/tsm/README.md
@@ -14,7 +14,7 @@
Temporal Shift Module是由MIT和IBM Watson AI Lab的Ji Lin,Chuang Gan和Song Han等人提出的通过时间位移来提高网络视频理解能力的模块,其位移操作原理如下图所示。
-
+
Temporal shift module
@@ -44,7 +44,7 @@ TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分
### 数据准备
-TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](https://github.com/PaddlePaddle/models/blob/release/1.7/PaddleCV/video/data/dataset/README.md)
+TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](./dataset/README.md)
#### 小数据集验证
diff --git a/tsm/dataset/README.md b/tsm/dataset/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..55613ef3cbaf9715ba89c231b85614a29d57a136
--- /dev/null
+++ b/tsm/dataset/README.md
@@ -0,0 +1,78 @@
+# 数据使用说明
+
+## Kinetics数据集
+
+Kinetics数据集是DeepMind公开的大规模视频动作识别数据集,有Kinetics400与Kinetics600两个版本。这里使用Kinetics400数据集,具体的数据预处理过程如下。
+
+### mp4视频下载
+在Code\_Root目录下创建文件夹
+
+ cd $Code_Root/data/dataset && mkdir kinetics
+
+ cd kinetics && mkdir data_k400 && cd data_k400
+
+ mkdir train_mp4 && mkdir val_mp4
+
+ActivityNet官方提供了Kinetics的下载工具,具体参考其[官方repo ](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics)即可下载Kinetics400的mp4视频集合。将kinetics400的训练与验证集合分别下载到data/dataset/kinetics/data\_k400/train\_mp4与data/dataset/kinetics/data\_k400/val\_mp4。
+
+### mp4文件预处理
+
+为提高数据读取速度,提前将mp4文件解帧并打pickle包,dataloader从视频的pkl文件中读取数据(该方法耗费更多存储空间)。pkl文件里打包的内容为(video-id, label, [frame1, frame2,...,frameN])。
+
+在 data/dataset/kinetics/data\_k400目录下创建目录train\_pkl和val\_pkl
+
+ cd $Code_Root/data/dataset/kinetics/data_k400
+
+ mkdir train_pkl && mkdir val_pkl
+
+进入$Code\_Root/data/dataset/kinetics目录,使用video2pkl.py脚本进行数据转化。首先需要下载[train](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_train.csv)和[validation](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_val.csv)数据集的文件列表。
+
+首先生成预处理需要的数据集标签文件
+
+ python generate_label.py kinetics-400_train.csv kinetics400_label.txt
+
+然后执行如下程序:
+
+ python video2pkl.py kinetics-400_train.csv $Source_dir $Target_dir 8 #以8个进程为例
+
+- 该脚本依赖`ffmpeg`库,请预先安装`ffmpeg`
+
+对于train数据,
+
+ Source_dir = $Code_Root/data/dataset/kinetics/data_k400/train_mp4
+
+ Target_dir = $Code_Root/data/dataset/kinetics/data_k400/train_pkl
+
+对于val数据,
+
+ Source_dir = $Code_Root/data/dataset/kinetics/data_k400/val_mp4
+
+ Target_dir = $Code_Root/data/dataset/kinetics/data_k400/val_pkl
+
+这样即可将mp4文件解码并保存为pkl文件。
+
+### 生成训练和验证集list
+··
+ cd $Code_Root/data/dataset/kinetics
+
+ ls $Code_Root/data/dataset/kinetics/data_k400/train_pkl/* > train.list
+
+ ls $Code_Root/data/dataset/kinetics/data_k400/val_pkl/* > val.list
+
+ ls $Code_Root/data/dataset/kinetics/data_k400/val_pkl/* > test.list
+
+ ls $Code_Root/data/dataset/kinetics/data_k400/val_pkl/* > infer.list
+
+即可生成相应的文件列表,train.list和val.list的每一行表示一个pkl文件的绝对路径,示例如下:
+
+ /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/train_pkl/data_batch_100-097
+ /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/train_pkl/data_batch_100-114
+ /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/train_pkl/data_batch_100-118
+ ...
+
+或者
+
+ /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/val_pkl/data_batch_102-085
+ /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/val_pkl/data_batch_102-086
+ /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/val_pkl/data_batch_102-090
+ ...
diff --git a/tsm/dataset/kinetics/generate_label.py b/tsm/dataset/kinetics/generate_label.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7608e86244c305bc31aa341d34320b71034c2e2
--- /dev/null
+++ b/tsm/dataset/kinetics/generate_label.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+# kinetics-400_train.csv should be down loaded first and set as sys.argv[1]
+# sys.argv[2] can be set as kinetics400_label.txt
+# python generate_label.py kinetics-400_train.csv kinetics400_label.txt
+
+num_classes = 400
+
+fname = sys.argv[1]
+outname = sys.argv[2]
+fl = open(fname).readlines()
+fl = fl[1:]
+outf = open(outname, 'w')
+
+label_list = []
+for line in fl:
+ label = line.strip().split(',')[0].strip('"')
+ if label in label_list:
+ continue
+ else:
+ label_list.append(label)
+
+assert len(label_list
+ ) == num_classes, "there should be {} labels in list, but ".format(
+ num_classes, len(label_list))
+
+label_list.sort()
+for i in range(num_classes):
+ outf.write('{} {}'.format(label_list[i], i) + '\n')
+
+outf.close()
diff --git a/tsm/dataset/kinetics/video2pkl.py b/tsm/dataset/kinetics/video2pkl.py
new file mode 100644
index 0000000000000000000000000000000000000000..78d1b09b7bf6efb7f96535fa66bee2762bbccc5d
--- /dev/null
+++ b/tsm/dataset/kinetics/video2pkl.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import sys
+import glob
+try:
+ import cPickle as pickle
+except:
+ import pickle
+from multiprocessing import Pool
+
+# example command line: python generate_k400_pkl.py kinetics-400_train.csv 8
+#
+# kinetics-400_train.csv is the training set file of K400 official release
+# each line contains laebl,youtube_id,time_start,time_end,split,is_cc
+
+assert (len(sys.argv) == 5)
+
+f = open(sys.argv[1])
+source_dir = sys.argv[2]
+target_dir = sys.argv[3]
+num_threads = sys.argv[4]
+all_video_entries = [x.strip().split(',') for x in f.readlines()]
+all_video_entries = all_video_entries[1:]
+f.close()
+
+category_label_map = {}
+f = open('kinetics400_label.txt')
+for line in f:
+ ens = line.strip().split(' ')
+ category = " ".join(ens[0:-1])
+ label = int(ens[-1])
+ category_label_map[category] = label
+f.close()
+
+
+def generate_pkl(entry):
+ mode = entry[4]
+ category = entry[0].strip('"')
+ category_dir = category
+ video_path = os.path.join(
+ './',
+ entry[1] + "_%06d" % int(entry[2]) + "_%06d" % int(entry[3]) + ".mp4")
+ video_path = os.path.join(source_dir, category_dir, video_path)
+ label = category_label_map[category]
+
+ vid = './' + video_path.split('/')[-1].split('.')[0]
+ if os.path.exists(video_path):
+ if not os.path.exists(vid):
+ os.makedirs(vid)
+ os.system('ffmpeg -i ' + video_path + ' -q 0 ' + vid + '/%06d.jpg')
+ else:
+ print("File not exists {}".format(video_path))
+ return
+
+ images = sorted(glob.glob(vid + '/*.jpg'))
+ ims = []
+ for img in images:
+ f = open(img, 'rb')
+ ims.append(f.read())
+ f.close()
+
+ output_pkl = vid + ".pkl"
+ output_pkl = os.path.join(target_dir, output_pkl)
+ f = open(output_pkl, 'wb')
+ pickle.dump((vid, label, ims), f, protocol=2)
+ f.close()
+
+ os.system('rm -rf %s' % vid)
+
+
+pool = Pool(processes=int(sys.argv[4]))
+pool.map(generate_pkl, all_video_entries)
+pool.close()
+pool.join()
diff --git a/tsm/images/temporal_shift.png b/tsm/images/temporal_shift.png
new file mode 100644
index 0000000000000000000000000000000000000000..7679c4459d2b0ee37134b99fe1e8177b1a69f8b0
Binary files /dev/null and b/tsm/images/temporal_shift.png differ