diff --git a/README.md b/README.md index f1410590dcff1efcb233299b74d3cd85aab1ea60..c4a78d14457309d5856ae61499f32f5cdb10b911 100644 --- a/README.md +++ b/README.md @@ -86,17 +86,17 @@ python main.py --task_type evaluate ## 测试结果 -受限于模型推理速度,目前只测试了pass@1指标。 +受限于模型推理速度,目前只测试了pass@1指标。()里面是官方给出的准确率 | | python | java | cpp | js | go | |----------------------|--------|--------|--------|--------|--------| -| chatgpt | 64.02% | 42.68% | 26.22% | 47.00% | 31.70% | +| chatgpt | 64.02%(67.0%) | 42.68% | 26.22% | 47.00% | 31.70% | | bbt-7B | 0.61% | 1.83% | 1.22% | 1.83% | 0.00% | | bbt-13B | 2.49% | 0.00% | 1.90% | 1.83% | 0.61% | | chatglm2-6B | 7.93% | 5.45% | 0.61% | 6.70% | 1.83% | -| codegeex2-6B | 29.90% | 27.43% | 6.70% | 24.40% | 17.68% | -| llama2-7B | 5.49% | 8.54% | 1.22% | 3.66% | 6.10% | -| baichuan-7B | 7.93% | 1.83% | 0.00% | 6.71% | 6.71% | +| codegeex2-6B | 29.90%(35.9%) | 27.43% | 6.70% | 24.40% | 17.68% | +| llama2-7B | 5.49%(12.8%) | 8.54% | 1.22% | 3.66% | 6.10% | +| baichuan-7B | 7.93%(9.20%) | 1.83% | 0.00% | 6.71% | 6.71% | | chatgpt+codegeex2-6B | 64.02% | 43.90% | 30.49% | 50.66% | 44.51% | | iflycode | 46.95% | 29.87% | 32.91% | 43.29% | 21.34% |