diff --git a/doc/imgs/w2v_train.png b/doc/imgs/w2v_train.png new file mode 100644 index 0000000000000000000000000000000000000000..b32aa14327003e138ec7ccbf035866c4bf73edf7 Binary files /dev/null and b/doc/imgs/w2v_train.png differ diff --git a/models/recall/word2vec/README.md b/models/recall/word2vec/README.md index b41c10c0102195dc50cbdd55eba85a1e0ee65644..241f236f932eed787bad2d617874531d8a6d563b 100644 --- a/models/recall/word2vec/README.md +++ b/models/recall/word2vec/README.md @@ -38,11 +38,15 @@ - [FAQ](#FAQ) ## 模型简介 -本例实现了skip-gram模式的word2vector模型,如下图中的skip-gram部分: -![](https://ai-studio-static-online.cdn.bcebos.com/bf0217bcb42e455284290a100670072989d432939b7e43e38b78eea6b60732c0) +本例实现了skip-gram模式的word2vector模型,如下图所示: +

+ +

以每一个词为中心词X,然后在窗口内和临近的词Y组成样本对(X,Y)用于网络训练。在实际训练过程中还会根据自定义的负采样率生成负样本来加强训练的效果 具体的训练思路如下: -![](https://ai-studio-static-online.cdn.bcebos.com/ad45d6dfff2f4fa69c639a6b5d2bbe46c79ac67658464aa0a069a04eb2800cb6) +

+ +

推荐用户参考[ IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/124377)教程获取更详细的信息。 diff --git a/models/recall/word2vec/config.yaml b/models/recall/word2vec/config.yaml index 34a25e59ecfa4ccd292a3b6e358c83ac827ed59f..3e8347a94748560e85734a4c5a68d4b529c29ba4 100755 --- a/models/recall/word2vec/config.yaml +++ b/models/recall/word2vec/config.yaml @@ -42,38 +42,40 @@ hyper_parameters: window_size: 5 # select runner by name -mode: train_runner +mode: [single_cpu_train, single_cpu_infer] # config of each runner. # runner is a kind of paddle training class, which wraps the train/infer process. runner: -- name: train_runner +- name: single_cpu_train class: train # num of epochs - epochs: 2 + epochs: 5 # device to run training or infer device: cpu save_checkpoint_interval: 1 # save model interval of epochs save_inference_interval: 1 # save inference - save_checkpoint_path: "increment" # save checkpoint path - save_inference_path: "inference" # save inference path + save_checkpoint_path: "increment_w2v" # save checkpoint path + save_inference_path: "inference_w2v" # save inference path save_inference_feed_varnames: [] # feed vars of save inference save_inference_fetch_varnames: [] # fetch vars of save inference init_model_path: "" # load model path print_interval: 1 -- name: infer_runner + phases: [phase1] +- name: single_cpu_infer class: infer # device to run training or infer device: cpu - init_model_path: "increment/0" # load model path + init_model_path: "increment_w2v" # load model path print_interval: 1 + phases: [phase2] # runner will run all the phase in each epoch phase: - name: phase1 model: "{workspace}/model.py" # user-defined model dataset_name: dataset_train # select dataset by name + thread_num: 5 +- name: phase2 + model: "{workspace}/model.py" # user-defined model + dataset_name: dataset_infer # select dataset by name thread_num: 1 -# - name: phase2 -# model: "{workspace}/model.py" # user-defined model -# dataset_name: dataset_infer # select dataset by name -# thread_num: 1