From 085981bf1caf5d7d0b26d05f7c7e9487e1b35190 Mon Sep 17 00:00:00 2001 From: Conglong Li Date: Mon, 24 Apr 2023 15:58:02 -0700 Subject: [PATCH] add deepspeed chat blog links, add tags (#3369) --- docs/_posts/2020-02-13-release.md | 2 +- docs/_posts/2020-02-13-turing-nlg.md | 2 +- docs/_posts/2020-03-17-reduce-scatter.md | 1 + docs/_posts/2020-05-19-bert-record.md | 3 +-- docs/_posts/2020-05-19-press-release.md | 2 +- docs/_posts/2020-05-19-zero-stage2.md | 2 +- docs/_posts/2020-05-28-fastest-bert-training.md | 2 +- docs/_posts/2020-07-24-deepspeed-webinar.md | 2 +- docs/_posts/2020-08-07-webinar-on-demand.md | 2 +- docs/_posts/2020-09-08-sparse-attention-news.md | 2 +- docs/_posts/2020-09-09-ZeRO-Offload.md | 2 +- docs/_posts/2020-09-09-onebit-adam-blog-post.md | 2 +- docs/_posts/2020-09-09-onebit-adam-news.md | 2 +- docs/_posts/2020-09-09-pipeline-parallelism.md | 2 +- docs/_posts/2020-09-09-sparse-attention.md | 2 +- docs/_posts/2020-10-28-progressive-layer-dropping-news.md | 2 +- docs/_posts/2021-03-08-zero3-offload.md | 2 +- docs/_posts/2021-05-05-MoQ.md | 2 +- docs/_posts/2021-05-05-inference-kernel-optimization.md | 2 +- docs/_posts/2021-05-14-inference-release.md | 2 +- docs/_posts/2021-08-18-deepspeed-moe.md | 2 +- docs/_posts/2021-11-15-autotuning.md | 2 +- docs/_posts/2021-12-09-deepspeed-moe-nlg.md | 2 +- docs/_posts/2022-01-19-moe-inference.md | 2 +- docs/_posts/2022-03-21-amd-support.md | 2 +- docs/_posts/2022-07-26-deepspeed-azure.md | 2 +- docs/_posts/2022-09-10-zero-inference.md | 2 +- docs/_posts/2022-10-11-mii.md | 2 +- docs/_posts/2022-12-12-data-efficiency.md | 2 +- docs/_posts/2023-03-31-multi-modal.md | 2 +- docs/_posts/2023-04-24-deepspeed-chat-chinese.md | 7 +++++++ docs/_posts/2023-04-24-deepspeed-chat-japanese.md | 7 +++++++ docs/_posts/2023-04-24-deepspeed-chat.md | 7 +++++++ 33 files changed, 51 insertions(+), 30 deletions(-) create mode 100644 docs/_posts/2023-04-24-deepspeed-chat-chinese.md create mode 100644 docs/_posts/2023-04-24-deepspeed-chat-japanese.md create mode 100644 docs/_posts/2023-04-24-deepspeed-chat.md diff --git a/docs/_posts/2020-02-13-release.md b/docs/_posts/2020-02-13-release.md index 792ff7bf..a97a4ba9 100644 --- a/docs/_posts/2020-02-13-release.md +++ b/docs/_posts/2020-02-13-release.md @@ -3,5 +3,5 @@ title: "ZeRO & DeepSpeed: New system optimizations enable training models with o date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/ZeRO-deepspeed-new-system-optimizations-enable-training-models-with-over-100-billion-parameters/ excerpt: "" -tags: training ZeRO +tags: training ZeRO English --- diff --git a/docs/_posts/2020-02-13-turing-nlg.md b/docs/_posts/2020-02-13-turing-nlg.md index 0da59aa8..240f6d78 100644 --- a/docs/_posts/2020-02-13-turing-nlg.md +++ b/docs/_posts/2020-02-13-turing-nlg.md @@ -3,5 +3,5 @@ title: "Turing-NLG: A 17-billion-parameter language model by Microsoft" date: 2020-02-13 link: https://www.microsoft.com/en-us/research/blog/turing-nlg-a-17-billion-parameter-language-model-by-microsoft/ excerpt: "DeepSpeed was used to train the world's largest language model." -tags: training +tags: training English --- diff --git a/docs/_posts/2020-03-17-reduce-scatter.md b/docs/_posts/2020-03-17-reduce-scatter.md index 1753a22e..329409df 100644 --- a/docs/_posts/2020-03-17-reduce-scatter.md +++ b/docs/_posts/2020-03-17-reduce-scatter.md @@ -1,6 +1,7 @@ --- title: "ZeRO stage 1 with reduced communication" sneak_preview: true +tags: training ZeRO English excerpt: "Partition-aware ZeRO with up to 2x reduction in communication time!" --- diff --git a/docs/_posts/2020-05-19-bert-record.md b/docs/_posts/2020-05-19-bert-record.md index 4c2a93e5..b47ad0b0 100644 --- a/docs/_posts/2020-05-19-bert-record.md +++ b/docs/_posts/2020-05-19-bert-record.md @@ -1,10 +1,9 @@ --- title: "The Fastest and Most Efficient BERT Training through Optimized Transformer Kernels" excerpt: "" -tags: training date: 2020-05-19 00:00:00 toc: false -tags: training +tags: training English --- We introduce new technology to accelerate single GPU performance via kernel diff --git a/docs/_posts/2020-05-19-press-release.md b/docs/_posts/2020-05-19-press-release.md index 9022a7db..a6611b11 100644 --- a/docs/_posts/2020-05-19-press-release.md +++ b/docs/_posts/2020-05-19-press-release.md @@ -2,6 +2,6 @@ title: "ZeRO-2 & DeepSpeed: Shattering Barriers of Deep Learning Speed & Scale" excerpt: "" link: https://www.microsoft.com/en-us/research/blog/ZeRO-2-deepspeed-shattering-barriers-of-deep-learning-speed-scale/ -tags: training ZeRO +tags: training ZeRO English date: 2020-05-19 02:00:00 --- diff --git a/docs/_posts/2020-05-19-zero-stage2.md b/docs/_posts/2020-05-19-zero-stage2.md index 4f35012d..44f6cc19 100644 --- a/docs/_posts/2020-05-19-zero-stage2.md +++ b/docs/_posts/2020-05-19-zero-stage2.md @@ -1,7 +1,7 @@ --- title: "An Order-of-Magnitude Larger and Faster Training with ZeRO-2" excerpt: "" -tags: training ZeRO +tags: training ZeRO English date: 2020-05-19 01:00:00 toc: false --- diff --git a/docs/_posts/2020-05-28-fastest-bert-training.md b/docs/_posts/2020-05-28-fastest-bert-training.md index 99d132c1..62be6c1b 100644 --- a/docs/_posts/2020-05-28-fastest-bert-training.md +++ b/docs/_posts/2020-05-28-fastest-bert-training.md @@ -1,7 +1,7 @@ --- title: "Microsoft DeepSpeed achieves the fastest BERT training time" excerpt: "" -tags: training +tags: training English date: 2020-05-28 00:00:00 --- diff --git a/docs/_posts/2020-07-24-deepspeed-webinar.md b/docs/_posts/2020-07-24-deepspeed-webinar.md index be4ee777..a5b4aa15 100644 --- a/docs/_posts/2020-07-24-deepspeed-webinar.md +++ b/docs/_posts/2020-07-24-deepspeed-webinar.md @@ -1,7 +1,7 @@ --- title: "DeepSpeed Microsoft Research Webinar on August 6th, 2020" excerpt: "" -tags: presentations +tags: presentations English link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.html image: /assets/images/webinar-aug2020.png date: 2020-07-24 00:00:00 diff --git a/docs/_posts/2020-08-07-webinar-on-demand.md b/docs/_posts/2020-08-07-webinar-on-demand.md index 983e17ec..8b258e88 100644 --- a/docs/_posts/2020-08-07-webinar-on-demand.md +++ b/docs/_posts/2020-08-07-webinar-on-demand.md @@ -1,7 +1,7 @@ --- title: "DeepSpeed Microsoft Research Webinar is now on-demand" excerpt: "" -tags: presentations +tags: presentations English link: https://note.microsoft.com/MSR-Webinar-DeepSpeed-Registration-On-Demand.html date: 2020-08-07 00:00:00 --- diff --git a/docs/_posts/2020-09-08-sparse-attention-news.md b/docs/_posts/2020-09-08-sparse-attention-news.md index c5f2a104..79de33a8 100644 --- a/docs/_posts/2020-09-08-sparse-attention-news.md +++ b/docs/_posts/2020-09-08-sparse-attention-news.md @@ -1,7 +1,7 @@ --- title: "Powering 10x longer sequences and 6x faster execution through DeepSpeed Sparse Attention" excerpt: "" -tags: training +tags: training English date: 2020-09-09 00:00:00 toc: false --- diff --git a/docs/_posts/2020-09-09-ZeRO-Offload.md b/docs/_posts/2020-09-09-ZeRO-Offload.md index c270cead..8e2e8423 100755 --- a/docs/_posts/2020-09-09-ZeRO-Offload.md +++ b/docs/_posts/2020-09-09-ZeRO-Offload.md @@ -2,7 +2,7 @@ title: "10x bigger model training on a single GPU with ZeRO-Offload" excerpt: "" date: 2020-09-09 00:00:00 -tags: training ZeRO +tags: training ZeRO English toc: false --- diff --git a/docs/_posts/2020-09-09-onebit-adam-blog-post.md b/docs/_posts/2020-09-09-onebit-adam-blog-post.md index 413a3d0c..8152190f 100644 --- a/docs/_posts/2020-09-09-onebit-adam-blog-post.md +++ b/docs/_posts/2020-09-09-onebit-adam-blog-post.md @@ -2,7 +2,7 @@ title: "DeepSpeed with 1-bit Adam: 5x less communication and 3.4x faster training" excerpt: "" date: 2020-09-09 00:00:00 -tags: training +tags: training English --- ## 1. Introduction diff --git a/docs/_posts/2020-09-09-onebit-adam-news.md b/docs/_posts/2020-09-09-onebit-adam-news.md index 4ec3c3c8..d0adcb09 100644 --- a/docs/_posts/2020-09-09-onebit-adam-news.md +++ b/docs/_posts/2020-09-09-onebit-adam-news.md @@ -2,7 +2,7 @@ title: "Up to 5x less communication and 3.4x faster training through 1-bit Adam" excerpt: "" date: 2020-09-09 00:00:00 -tags: training +tags: training English toc: false --- diff --git a/docs/_posts/2020-09-09-pipeline-parallelism.md b/docs/_posts/2020-09-09-pipeline-parallelism.md index 4f2e53ed..48343ebd 100644 --- a/docs/_posts/2020-09-09-pipeline-parallelism.md +++ b/docs/_posts/2020-09-09-pipeline-parallelism.md @@ -2,7 +2,7 @@ title: "Training a Trillion Parameters with Pipeline Parallelism" excerpt: "" date: 2020-09-09 00:00:00 -tags: training +tags: training English --- DeepSpeed includes new support for pipeline parallelism! DeepSpeed's training diff --git a/docs/_posts/2020-09-09-sparse-attention.md b/docs/_posts/2020-09-09-sparse-attention.md index aa0fa0bb..32f4cf2b 100644 --- a/docs/_posts/2020-09-09-sparse-attention.md +++ b/docs/_posts/2020-09-09-sparse-attention.md @@ -2,7 +2,7 @@ title: "DeepSpeed Sparse Attention" excerpt: "" date: 2020-09-09 01:00:00 -tags: training inference +tags: training inference English --- Attention-based deep learning models such as the transformers are highly effective in capturing relationship between tokens in an input sequence, even across long distances. As a result, they are used with text, image, and sound-based inputs, where the sequence length can be in thousands of tokens. However, despite the effectiveness of attention modules to capture long term dependencies, in practice, their application to long sequence input is limited by compute and memory requirements of the attention computation that grow quadratically, `O(n^2)`, with the sequence length `n`. diff --git a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md index 9664e4de..ee518f53 100755 --- a/docs/_posts/2020-10-28-progressive-layer-dropping-news.md +++ b/docs/_posts/2020-10-28-progressive-layer-dropping-news.md @@ -2,7 +2,7 @@ title: "Progressive Layer Dropping" excerpt: "" date: 2020-10-29 00:00:00 -tags: training +tags: training English toc: false --- diff --git a/docs/_posts/2021-03-08-zero3-offload.md b/docs/_posts/2021-03-08-zero3-offload.md index 9008ebc9..2bca2bdd 100644 --- a/docs/_posts/2021-03-08-zero3-offload.md +++ b/docs/_posts/2021-03-08-zero3-offload.md @@ -2,7 +2,7 @@ title: "DeepSpeed ZeRO-3 Offload" excerpt: "" date: 2021-03-08 00:00:00 -tags: training ZeRO +tags: training ZeRO English --- Today we are announcing the release of ZeRO-3 Offload, a highly efficient and easy to use implementation of ZeRO Stage 3 and ZeRO Offload combined, geared towards our continued goal of democratizing AI by making efficient large-scale DL training available to everyone. The key benefits of ZeRO-3 Offload are: diff --git a/docs/_posts/2021-05-05-MoQ.md b/docs/_posts/2021-05-05-MoQ.md index e6f7872a..5dd5006e 100644 --- a/docs/_posts/2021-05-05-MoQ.md +++ b/docs/_posts/2021-05-05-MoQ.md @@ -2,7 +2,7 @@ title: "Mixture-of-Quantization: A novel quantization approach for reducing model size with minimal accuracy impact" excerpt: "" date: 2021-05-05 00:00:00 -tags: inference +tags: inference English --- ## A unified suite for quantization-aware training and inference diff --git a/docs/_posts/2021-05-05-inference-kernel-optimization.md b/docs/_posts/2021-05-05-inference-kernel-optimization.md index 63e3ac66..991295de 100644 --- a/docs/_posts/2021-05-05-inference-kernel-optimization.md +++ b/docs/_posts/2021-05-05-inference-kernel-optimization.md @@ -2,7 +2,7 @@ title: "DeepSpeed Inference: Multi-GPU inference with customized inference kernels and quantization support" excerpt: "" date: 2021-03-16 00:00:00 -tags: inference +tags: inference English --- While DeepSpeed supports training advanced large-scale models, using these trained models in the desired application scenarios is still challenging due to three major limitations in existing inference solutions: 1) lack of support for multi-GPU inference to fit large models and meet latency requirements, 2) limited GPU kernel performance when running inference with small batch sizes, and 3) difficulties in exploiting quantization, which includes both quantizing the model to reduce the model size and latency as well as supporting high-performance inference of quantized models without specialized hardware. diff --git a/docs/_posts/2021-05-14-inference-release.md b/docs/_posts/2021-05-14-inference-release.md index fd5cca2e..14c300d0 100644 --- a/docs/_posts/2021-05-14-inference-release.md +++ b/docs/_posts/2021-05-14-inference-release.md @@ -3,5 +3,5 @@ title: "DeepSpeed: Accelerating large-scale model inference and training via sys date: 2021-05-14 link: https://www.microsoft.com/en-us/research/blog/deepspeed-accelerating-large-scale-model-inference-and-training-via-system-optimizations-and-compression/ excerpt: "" -tags: inference +tags: inference English --- diff --git a/docs/_posts/2021-08-18-deepspeed-moe.md b/docs/_posts/2021-08-18-deepspeed-moe.md index 5bd9667f..665c0975 100644 --- a/docs/_posts/2021-08-18-deepspeed-moe.md +++ b/docs/_posts/2021-08-18-deepspeed-moe.md @@ -3,5 +3,5 @@ title: "DeepSpeed powers 8x larger MoE model training with high performance" excerpt: "" link: https://www.microsoft.com/en-us/research/blog/deepspeed-powers-8x-larger-moe-model-training-with-high-performance/ date: 2021-08-18 00:00:00 -tags: training +tags: training English --- diff --git a/docs/_posts/2021-11-15-autotuning.md b/docs/_posts/2021-11-15-autotuning.md index ee48d44c..71acf544 100644 --- a/docs/_posts/2021-11-15-autotuning.md +++ b/docs/_posts/2021-11-15-autotuning.md @@ -2,7 +2,7 @@ title: "Autotuning: Automatically discover the optimal DeepSpeed configuration that delivers good training speed" excerpt: "" date: 2021-11-16 10:00:00 -tags: training +tags: training English toc: false --- diff --git a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md index 6402202c..40e549d8 100644 --- a/docs/_posts/2021-12-09-deepspeed-moe-nlg.md +++ b/docs/_posts/2021-12-09-deepspeed-moe-nlg.md @@ -2,7 +2,7 @@ title: "DeepSpeed-MoE for NLG: Reducing the training cost of language models by 5 times" excerpt: "" date: 2021-12-09 22:00:00 -tags: training +tags: training English --- Autoregressive transformer-based natural language generation (referred to as diff --git a/docs/_posts/2022-01-19-moe-inference.md b/docs/_posts/2022-01-19-moe-inference.md index f2ac1c6d..66ff5b51 100644 --- a/docs/_posts/2022-01-19-moe-inference.md +++ b/docs/_posts/2022-01-19-moe-inference.md @@ -3,5 +3,5 @@ title: "DeepSpeed: Advancing MoE inference and training to power next-generation excerpt: "" link: https://www.microsoft.com/en-us/research/blog/deepspeed-advancing-moe-inference-and-training-to-power-next-generation-ai-scale/ date: 2022-01-19 00:00:00 -tags: inference +tags: inference English --- diff --git a/docs/_posts/2022-03-21-amd-support.md b/docs/_posts/2022-03-21-amd-support.md index ba8917bc..01b2a52c 100644 --- a/docs/_posts/2022-03-21-amd-support.md +++ b/docs/_posts/2022-03-21-amd-support.md @@ -3,5 +3,5 @@ title: "Supporting efficient large model training on AMD Instinct GPUs with Deep excerpt: "" link: https://cloudblogs.microsoft.com/opensource/2022/03/21/supporting-efficient-large-model-training-on-amd-instinct-gpus-with-deepspeed/ date: 2022-03-21 00:00:00 -tags: training ZeRO +tags: training ZeRO English --- diff --git a/docs/_posts/2022-07-26-deepspeed-azure.md b/docs/_posts/2022-07-26-deepspeed-azure.md index 128cbf4a..51e30669 100644 --- a/docs/_posts/2022-07-26-deepspeed-azure.md +++ b/docs/_posts/2022-07-26-deepspeed-azure.md @@ -2,7 +2,7 @@ title: "Azure empowers easy-to-use, high-performance, and hyperscale model training using DeepSpeed" excerpt: "" date: 2022-07-26 00:09:00 -tags: training azure +tags: training azure English --- ## Introduction diff --git a/docs/_posts/2022-09-10-zero-inference.md b/docs/_posts/2022-09-10-zero-inference.md index dd718b9f..59a3e3bf 100644 --- a/docs/_posts/2022-09-10-zero-inference.md +++ b/docs/_posts/2022-09-10-zero-inference.md @@ -2,7 +2,7 @@ title: "ZeRO-Inference: Democratizing massive model inference" excerpt: "" date: 2022-09-10 00:09:00 -tags: inference ZeRO +tags: inference ZeRO English --- ## Introduction diff --git a/docs/_posts/2022-10-11-mii.md b/docs/_posts/2022-10-11-mii.md index 8a397317..e0b43f51 100644 --- a/docs/_posts/2022-10-11-mii.md +++ b/docs/_posts/2022-10-11-mii.md @@ -2,7 +2,7 @@ title: "DeepSpeed-MII: instant speedup on 24,000+ open-source DL models with up to 40x cheaper inference" excerpt: "" date: 2022-10-11 00:09:00 -tags: inference +tags: inference English --- [ ![Text Generation Models](/assets/images/mii/hero.png) ](/assets/images/mii/hero.png){: .align-center} diff --git a/docs/_posts/2022-12-12-data-efficiency.md b/docs/_posts/2022-12-12-data-efficiency.md index 3b6adb4d..52148707 100644 --- a/docs/_posts/2022-12-12-data-efficiency.md +++ b/docs/_posts/2022-12-12-data-efficiency.md @@ -2,7 +2,7 @@ title: "DeepSpeed Data Efficiency: A composable library that makes better use of data, increases training efficiency, and improves model quality" excerpt: "" date: 2022-12-12 00:09:00 -tags: training +tags: training English --- [ ![DeepSpeed Data Efficiency](/assets/images/data_efficiency/data_efficiecy_fig0.png) ](/assets/images/data_efficiency/data_efficiecy_fig0.png){: .align-center} diff --git a/docs/_posts/2023-03-31-multi-modal.md b/docs/_posts/2023-03-31-multi-modal.md index 045c9271..aaef9cfb 100644 --- a/docs/_posts/2023-03-31-multi-modal.md +++ b/docs/_posts/2023-03-31-multi-modal.md @@ -2,7 +2,7 @@ title: "Scaling Large-Scale Generative Mixture-of-Expert Multimodal Model With VL-MoE " excerpt: "" date: 2023-03-31 00:09:00 -tags: training +tags: training English --- The field of Artificial Intelligence-Generated Content (AIGC) is rapidly growing, with the goal of making content creation more efficient and accessible. One of the most exciting areas of AIGC is the development of large-scale multi-modal models like [Flamingo](https://arxiv.org/abs/2204.14198), [BLIP](https://arxiv.org/abs/2301.12597), and [GPT4](https://arxiv.org/abs/2303.08774), which can accept inputs from multiple resources, e.g., image, text, audio, etc., and generate a variety of formats as outputs. For example, image creation can be made through stable diffusion and DALLE using the prompt text, and the new feature in the coming Office can create slides with texts, images, animations, etc., by leveraging the power of the new Microsoft Office Copilot. diff --git a/docs/_posts/2023-04-24-deepspeed-chat-chinese.md b/docs/_posts/2023-04-24-deepspeed-chat-chinese.md new file mode 100644 index 00000000..2fd96232 --- /dev/null +++ b/docs/_posts/2023-04-24-deepspeed-chat-chinese.md @@ -0,0 +1,7 @@ +--- +title: "DeepSpeed Chat: 一键式RLHF训练,让你的类ChatGPT千亿大模型提速省钱15倍" +excerpt: "" +link: https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed-chat/chinese/README.md +date: 2023-04-24 00:00:00 +tags: training ZeRO RLHF Chinese +--- diff --git a/docs/_posts/2023-04-24-deepspeed-chat-japanese.md b/docs/_posts/2023-04-24-deepspeed-chat-japanese.md new file mode 100644 index 00000000..63200846 --- /dev/null +++ b/docs/_posts/2023-04-24-deepspeed-chat-japanese.md @@ -0,0 +1,7 @@ +--- +title: "DeepSpeed Chat: ChatGPTライクなモデルを簡単・高速・低コストに、あらゆるスケールで学習" +excerpt: "" +link: https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed-chat/japanese/README.md +date: 2023-04-24 00:00:00 +tags: training ZeRO RLHF Japanese +--- diff --git a/docs/_posts/2023-04-24-deepspeed-chat.md b/docs/_posts/2023-04-24-deepspeed-chat.md new file mode 100644 index 00000000..70b627b9 --- /dev/null +++ b/docs/_posts/2023-04-24-deepspeed-chat.md @@ -0,0 +1,7 @@ +--- +title: "DeepSpeed Chat: Easy, Fast and Affordable RLHF Training of ChatGPT-like Models at All Scales" +excerpt: "" +link: https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed-chat/README.md +date: 2023-04-24 00:00:00 +tags: training ZeRO RLHF English +--- -- GitLab