提交 ab7aa43d 编写于 作者: H Hui Zhang

add pypinyin tools

上级 e94da615
[bumpversion]
commit = True
tag = True
current_version = 0.41.0
[bumpversion:file:pypinyin/__init__.py]
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
version: 2
jobs:
python3.8: &DEFAULT
docker:
- image: circleci/python:3.8
environment:
TOX_ENV: py38
RUN_CHECK: 1
working_directory: ~/repo
steps:
- checkout
# Download and cache dependencies
# - restore_cache:
# keys:
# - v1-dependencies-{{ .Environment.TOX_ENV }}-{{ checksum "requirements_dev.txt" }}
- run:
name: install dependencies
command: |
# pip install -U pip virtualenv --user
if ! which virtualenv; then
pip install 'virtualenv<=20.0.21' --user
fi
export PATH="~/.local/bin:$PATH"
virtualenv venv
. venv/bin/activate
pip install codecov
pip install tox
if [[ $RUN_CHECK == 1 ]]; then
pip install -U -r requirements_dev.txt
fi
if [[ $(python -c "import sys; print(sys.stdin.encoding)" |grep None) ]]; then
export PYTHONIOENCODING=utf-8
fi
#
# - save_cache:
# paths:
# - ./venv
# key: v1-dependencies-{{ .Environment.TOX_ENV }}-{{ checksum "requirements_dev.txt" }}
- run:
name: run tests
command: |
. venv/bin/activate
if [[ $RUN_CHECK == 1 ]]; then
pre-commit run --all-files
mypy pypinyin
fi
tox -e $TOX_ENV
python setup.py install
pypinyin hello
echo hello | pypinyin
pypinyin < setup.cfg
codecov
- store_artifacts:
path: test-reports
destination: test-reports
python3.9:
<<: *DEFAULT
docker:
- image: circleci/python:3.9
environment:
TOX_ENV: py39
python3.7:
<<: *DEFAULT
docker:
- image: circleci/python:3.7
environment:
TOX_ENV: py37
python3.6:
<<: *DEFAULT
docker:
- image: circleci/python:3.6
environment:
TOX_ENV: py36
python3.5:
<<: *DEFAULT
docker:
- image: circleci/python:3.5
environment:
TOX_ENV: py35
python3.4:
<<: *DEFAULT
docker:
- image: circleci/python:3.4
environment:
TOX_ENV: py34
python2.7:
<<: *DEFAULT
docker:
- image: circleci/python:2.7
environment:
TOX_ENV: py27
# python2.6:
# <<: *DEFAULT
# docker:
# - image: python:2.6
# environment:
# TOX_ENV: py33
pypy2:
<<: *DEFAULT
docker:
- image: pypy:2
environment:
TOX_ENV: pypy
pypy3:
<<: *DEFAULT
docker:
- image: pypy:3
environment:
TOX_ENV: pypy3
workflows:
version: 2
testing:
jobs:
- python3.9
- python3.8
- python3.7
- python3.6
- python3.5
- python3.4
- python2.7
- pypy2
- pypy3
[run]
branch = True
omit =
# pypinyin/runner.py
pypinyin/__main__.py
[report]
exclude_lines =
pragma: no cover
except NameError
except ImportError
pass
def main
if py3:
if __name__ == .__main__.:
# EditorConfig is awesome: http://EditorConfig.org
# top-most EditorConfig file
root = true
# Unix-style newlines with a newline ending every file
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
# Indentiation
[*.{py,rst}]
indent_style = space
indent_size = 4
[Makefile]
indent_style = tab
indent_size = 4
[*.{ini,yml}]
indent_style = space
indent_size = 2
[*.md]
trim_trailing_whitespace = false
[flake8]
########## OPTIONS ##########
# Set the maximum length that any line (with some exceptions) may be.
max-line-length = 120
################### FILE PATTERNS ##########################
# Provide a comma-separated list of glob patterns to exclude from checks.
exclude =
# git folder
.git,
# python cache
__pycache__,
third_party/,
# Provide a comma-separate list of glob patterns to include for checks.
filename =
*.py
########## RULES ##########
# ERROR CODES
#
# E/W - PEP8 errors/warnings (pycodestyle)
# F - linting errors (pyflakes)
# C - McCabe complexity error (mccabe)
#
# W503 - line break before binary operator
# Specify a list of codes to ignore.
ignore =
W503
E252,E262,E127,E265,E126,E266,E241,E261,E128,E125
W291,W293,W605
E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
# to line this up with executable bit
EXE001,
# these ignores are from flake8-bugbear; please fix!
B007,B008,
# these ignores are from flake8-comprehensions; please fix!
C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
# Specify the list of error codes you wish Flake8 to report.
select =
E,
W,
F,
C
# Contributing
* 如果是关于单个汉字的拼音有误的问题,麻烦前往 [pinyin-data][pinyin-data] 进行反馈。
* 如果是关于词组的拼音有误的问题,麻烦前往 [phrase-pinyin-data][phrase-pinyin-data] 进行反馈。
* 有任何疑问或建议欢迎创建 [issue][issue] 或提交 [PR][pr]
* 项目代码开发方面的问题可以看看 [开发文档][开发文档]
Thanks for contributing! :heart:
[pinyin-data]: https://github.com/mozillazg/pinyin-data/issues
[phrase-pinyin-data]: https://github.com/mozillazg/phrase-pinyin-data
[issue]: https://github.com/mozillazg/python-pinyin/issues
[pr]: https://github.com/mozillazg/python-pinyin/pulls
[开发文档]: https://pypinyin.readthedocs.io/zh_CN/develop/develop.html
## 运行环境
* 操作系统(Linux/macOS/Windows):
* Python 版本:
* pypinyin 版本:
<!--
P.S. 可以通过 `python -V` 获取 Python 版本。
P.S. 可以通过 `pypinyin -V` 或者 `pip freeze |grep pypinyin``pypinyin.__version__` 获取 pypinyin 版本信息。
-->
## 问题描述
## 问题复现步骤
<!--
感谢反馈!❤️
-->
## PR 描述
## 待办事项
* [ ] 符合代码规范
* [ ] 单元测试
* [ ] 文档
<!--
感谢你的贡献!❤️
P.S. 麻烦选择 `develop` 分支作为 PR 的目标分支,谢谢~
-->
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: CI
on: [push, pull_request]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest]
# python-version: [3.7, 3.8]
python-version: [3.9]
tox-env: [py37, py38, py39]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install tox
- name: Test with tox
run: tox -e ${{ matrix.tox-env}}
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
name: "CodeQL"
on:
push:
branches: [master, develop]
pull_request:
# The branches below must be a subset of the branches above
branches: [master, develop]
schedule:
- cron: '0 2 * * 6'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# Override automatic language detection by changing the below list
# Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
language: ['python']
# Learn more...
# https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
# We must fetch at least the immediate parents so that if this is
# a pull request then we can checkout the head.
fetch-depth: 2
# If this run was triggered by a pull request event, then checkout
# the head of the pull request instead of the merge commit.
- run: git checkout HEAD^2
if: ${{ github.event_name == 'pull_request' }}
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v1
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
*.py[cod]
*.sw[op]
# C extensions
*.so
# Packages
*.egg
*.egg-info
dist
build
eggs
parts
bin
var
sdist
develop-eggs
.installed.cfg
lib
lib64
_build
# Installer logs
pip-log.txt
# Unit test / coverage reports
.coverage
.tox
nosetests.xml
htmlcov
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
tools/words.txt
*~
tools/phrases_dict.txt
venv
.cache/
2.7/
.python-version
venv2.7/
venvPyInstaller/
output.dat
vocab.bin
vocab.large.bin
.mypy_cache/
.pytest_cache/
/pypinyin/phrases_dict_large.py
[submodule "pinyin-data"]
path = pinyin-data
url = https://github.com/mozillazg/pinyin-data.git
[submodule "phrase-pinyin-data"]
path = phrase-pinyin-data
url = https://github.com/mozillazg/phrase-pinyin-data.git
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks.git
rev: v3.4.0
hooks:
- id: check-merge-conflict
- id: debug-statements
exclude: 'tools/|(pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py))'
- id: double-quote-string-fixer
exclude: 'pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py)'
- id: end-of-file-fixer
exclude: '.bumpversion.cfg'
- id: requirements-txt-fixer
- id: trailing-whitespace
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4
hooks:
- id: flake8
exclude: 'tools|pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py)|(docs/conf.py)'
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: 'v0.812'
# hooks:
# - id: mypy
# files: 'pypinyin/'
- repo: https://github.com/pre-commit/mirrors-yapf.git
sha: v0.16.0
hooks:
- id: yapf
files: \.py$
exclude: (?=phrase-pinyin-data|pinyin-data).*(\.py)$
[style]
based_on_style = pep8
column_limit = 80
{
"scanSettings": {
"baseBranches": []
},
"checkRunSettings": {
"vulnerableCheckRunConclusionLevel": "failure",
"displayMode": "diff"
},
"issueSettings": {
"minSeverityLevel": "LOW"
}
}
此差异已折叠。
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at mozillazg101@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
[homepage]: http://contributor-covenant.org
[version]: http://contributor-covenant.org/version/1/4/
The MIT License (MIT)
Copyright (c) 2016 mozillazg, 闲耘 <hotoo.cn@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
include README.rst LICENSE.txt CHANGELOG.rst
recursive-include pypinyin *.pyi py.typed
help:
@echo "test run test"
@echo "publish publish to PyPI"
@echo "publish_test publish to TestPyPI"
@echo "docs_html make html docs"
@echo "docs_serve serve docs"
@echo "gen_data gen pinyin data"
@echo "gen_pinyin_dict gen single hanzi pinyin dict"
@echo "gen_phrases_dict gen phrase hanzi pinyin dict"
@echo "lint run lint"
@echo "clean - remove all build, test, coverage and Python artifacts"
@echo "clean-build - remove build artifacts"
@echo "clean-pyc - remove Python file artifacts"
@echo "clean-test - remove test and coverage artifacts"
.PHONY: test
test: lint
@echo "run test"
make testonly
.PHONY: testonly
testonly:
py.test --random-order --cov pypinyin tests/ pypinyin/
.PHONY: publish
publish: clean
@echo "publish to pypi"
python setup.py sdist
python setup.py bdist_wheel
twine upload dist/*
.PHONY: publish_test
publish_test: clean
@echo "publish to test pypi"
python setup.py sdist
python setup.py bdist_wheel
twine upload --repository test dist/*
.PHONY: docs_html
docs_html:
cd docs && make html
.PHONY: docs_serve
docs_serve: docs_html
cd docs/_build/html && python -m http.server
.PHONY: gen_data
gen_data: gen_pinyin_dict gen_phrases_dict
.PHONY: gen_pinyin_dict
gen_pinyin_dict:
python gen_pinyin_dict.py pinyin-data/pinyin.txt pypinyin/pinyin_dict.py
.PHONY: gen_phrases_dict
gen_phrases_dict:
python gen_phrases_dict.py phrase-pinyin-data/pinyin.txt pypinyin/phrases_dict_large.py
python tidy_phrases_dict.py
.PHONY: lint
lint:
pre-commit run --all-files
mypy --strict pypinyin
clean: clean-build clean-pyc clean-test
clean-build:
rm -fr build/
rm -fr dist/
rm -fr .eggs/
find . -name '*.egg-info' -exec rm -fr {} +
find . -name '*.egg' -exec rm -f {} +
clean-pyc:
find . -name '*.pyc' -exec rm -f {} +
find . -name '*.pyo' -exec rm -f {} +
find . -name '*~' -exec rm -f {} +
find . -name '__pycache__' -exec rm -fr {} +
clean-test:
rm -fr .tox/
rm -f .coverage
rm -fr htmlcov/
rebase_master:
git fetch origin && git rebase origin/master
merge_dev:
git merge --no-ff origin/develop
bump_patch:
bumpversion --verbose patch
bump_minor:
bumpversion --verbose minor
start_next:
git push && git push --tags && git checkout develop && git rebase master && git push
# Modify from
* [python-pinyin](https://github.com/mozillazg/python-pinyin.git)
commit: 55e524aa1b7b8eec3d15c5306043c6cdd5938b03
licence: MIT
## Features
* only support py3
* remove pyi
汉字拼音转换工具(Python 版)
=============================
|Build| |GitHubAction| |Coverage| |Pypi version| |DOI|
将汉字转为拼音。可以用于汉字注音、排序、检索(`Russian translation`_) 。
基于 `hotoo/pinyin <https://github.com/hotoo/pinyin>`__ 开发。
* Documentation: http://pypinyin.rtfd.io/
* GitHub: https://github.com/mozillazg/python-pinyin
* License: MIT license
* PyPI: https://pypi.org/project/pypinyin
* Python version: 2.7, pypy, pypy3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
.. contents::
特性
----
* 根据词组智能匹配最正确的拼音。
* 支持多音字。
* 简单的繁体支持, 注音支持。
* 支持多种不同拼音/注音风格。
安装
----
.. code-block:: bash
$ pip install pypinyin
使用示例
--------
Python 3(Python 2 下把 ``'中心'`` 替换为 ``u'中心'`` 即可):
.. code-block:: python
>>> from pypinyin import pinyin, lazy_pinyin, Style
>>> pinyin('中心')
[['zhōng'], ['xīn']]
>>> pinyin('中心', heteronym=True) # 启用多音字模式
[['zhōng', 'zhòng'], ['xīn']]
>>> pinyin('中心', style=Style.FIRST_LETTER) # 设置拼音风格
[['z'], ['x']]
>>> pinyin('中心', style=Style.TONE2, heteronym=True)
[['zho1ng', 'zho4ng'], ['xi1n']]
>>> pinyin('中心', style=Style.TONE3, heteronym=True)
[['zhong1', 'zhong4'], ['xin1']]
>>> pinyin('中心', style=Style.BOPOMOFO) # 注音风格
[['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
>>> lazy_pinyin('中心') # 不考虑多音字的情况
['zhong', 'xin']
>>> lazy_pinyin('战略', v_to_u=True) # 不使用 v 表示 ü
['zhan', 'lüe']
# 使用 5 标识轻声
>>> lazy_pinyin('衣裳', style=Style.TONE3, neutral_tone_with_five=True)
['yi1', 'shang5']
**注意事项** :
* 默认情况下拼音结果不会标明哪个韵母是轻声,轻声的韵母没有声调或数字标识(可以通过参数 ``neutral_tone_with_five=True`` 开启使用 ``5`` 标识轻声 )。
* 默认情况下无声调相关拼音风格下的结果会使用 ``v`` 表示 ``ü`` (可以通过参数 ``v_to_u=True`` 开启使用 ``ü`` 代替 ``v`` )。
* 默认情况下会原样输出没有拼音的字符(自定义处理没有拼音的字符的方法见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#handle-no-pinyin>`__ )。
命令行工具:
.. code-block:: console
$ pypinyin 音乐
yīn yuè
$ pypinyin -h
文档
--------
详细文档请访问:http://pypinyin.rtfd.io/ 。
项目代码开发方面的问题可以看看 `开发文档`_ 。
FAQ
---------
词语中的多音字拼音有误?
+++++++++++++++++++++++++++++
目前是通过词组拼音库的方式来解决多音字问题的。如果出现拼音有误的情况,
可以自定义词组拼音来调整词语中的拼音:
.. code-block:: python
>>> from pypinyin import Style, pinyin, load_phrases_dict
>>> pinyin('步履蹒跚')
[['bù'], ['lǚ'], ['mán'], ['shān']]
>>> load_phrases_dict({'步履蹒跚': [['bù'], ['lǚ'], ['pán'], ['shān']]})
>>> pinyin('步履蹒跚')
[['bù'], ['lǚ'], ['pán'], ['shān']]
详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#custom-dict>`__ 。
如果是分词导致的拼音有误的话,可以先使用其他的分词模块对数据进行分词处理,
然后将分词后的词组结果列表作为函数的参数即可:
.. code-block:: python
>>> # 使用其他分词模块分词,比如 jieba 之类,
>>> #或者基于 phrases_dict.py 里的词语数据使用其他分词算法分词
>>> words = list(jieba.cut('每股24.67美元的确定性协议'))
>>> pinyin(words)
为什么没有 y, w, yu 几个声母?
++++++++++++++++++++++++++++++++++++++++++++
.. code-block:: python
>>> from pypinyin import Style, pinyin
>>> pinyin('下雨天', style=Style.INITIALS)
[['x'], [''], ['t']]
因为根据 `《汉语拼音方案》 <http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ ,
y,w,ü (yu) 都不是声母。
声母风格(INITIALS)下,“雨”、“我”、“圆”等汉字返回空字符串,因为根据
`《汉语拼音方案》 <http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ ,
y,w,ü (yu) 都不是声母,在某些特定韵母无声母时,才加上 y 或 w,而 ü 也有其特定规则。 —— @hotoo
**如果你觉得这个给你带来了麻烦,那么也请小心一些无声母的汉字(如“啊”、“饿”、“按”、“昂”等)。
这时候你也许需要的是首字母风格(FIRST_LETTER)**。 —— @hotoo
参考: `hotoo/pinyin#57 <https://github.com/hotoo/pinyin/issues/57>`__,
`#22 <https://github.com/mozillazg/python-pinyin/pull/22>`__,
`#27 <https://github.com/mozillazg/python-pinyin/issues/27>`__,
`#44 <https://github.com/mozillazg/python-pinyin/issues/44>`__
如果觉得这个行为不是你想要的,就是想把 y 当成声母的话,可以指定 ``strict=False`` ,
这个可能会符合你的预期:
.. code-block:: python
>>> from pypinyin import Style, pinyin
>>> pinyin('下雨天', style=Style.INITIALS)
[['x'], [''], ['t']]
>>> pinyin('下雨天', style=Style.INITIALS, strict=False)
[['x'], ['y'], ['t']]
详见 `strict 参数的影响`_ 。
如何减少内存占用
++++++++++++++++++++
如果对拼音的准确性不是特别在意的话,可以通过设置环境变量 ``PYPINYIN_NO_PHRASES``
和 ``PYPINYIN_NO_DICT_COPY`` 来节省内存。
详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/faq.html#no-phrases>`__
更多 FAQ 详见文档中的
`FAQ <https://pypinyin.readthedocs.io/zh_CN/master/faq.html>`__ 部分。
.. _#13 : https://github.com/mozillazg/python-pinyin/issues/113
.. _strict 参数的影响: https://pypinyin.readthedocs.io/zh_CN/master/usage.html#strict
拼音数据
---------
* 单个汉字的拼音使用 `pinyin-data`_ 的数据
* 词组的拼音使用 `phrase-pinyin-data`_ 的数据
* 声母和韵母使用 `《汉语拼音方案》 <http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ 的数据
Related Projects
-----------------
* `hotoo/pinyin`__: 汉字拼音转换工具 Node.js/JavaScript 版。
* `mozillazg/go-pinyin`__: 汉字拼音转换工具 Go 版。
* `mozillazg/rust-pinyin`__: 汉字拼音转换工具 Rust 版。
__ https://github.com/hotoo/pinyin
__ https://github.com/mozillazg/go-pinyin
__ https://github.com/mozillazg/rust-pinyin
.. |Build| image:: https://img.shields.io/circleci/project/github/mozillazg/python-pinyin/master.svg
:target: https://circleci.com/gh/mozillazg/python-pinyin
.. |GitHubAction| image:: https://github.com/mozillazg/python-pinyin/workflows/CI/badge.svg
:target: https://github.com/mozillazg/python-pinyin/actions
.. |Coverage| image:: https://img.shields.io/codecov/c/github/mozillazg/python-pinyin/master.svg
:target: https://codecov.io/gh/mozillazg/python-pinyin
.. |PyPI version| image:: https://img.shields.io/pypi/v/pypinyin.svg
:target: https://pypi.org/project/pypinyin/
.. |DOI| image:: https://zenodo.org/badge/12830126.svg
:target: https://zenodo.org/badge/latestdoi/12830126
.. _Russian translation: https://github.com/mozillazg/python-pinyin/blob/master/README_ru.rst
.. _pinyin-data: https://github.com/mozillazg/pinyin-data
.. _phrase-pinyin-data: https://github.com/mozillazg/phrase-pinyin-data
.. _开发文档: https://pypinyin.readthedocs.io/zh_CN/develop/develop.html
../CHANGELOG.rst
\ No newline at end of file
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BaiduPCS.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BaiduPCS.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/BaiduPCS"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BaiduPCS"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
API
====
.. _style:
拼音风格
-----------
.. autoclass:: pypinyin.Style
:members:
:undoc-members:
:member-order: bysource
.. _core_api:
核心 API
-------------
.. autofunction:: pypinyin.pinyin
.. autofunction:: pypinyin.lazy_pinyin
.. autofunction:: pypinyin.load_single_dict
.. autofunction:: pypinyin.load_phrases_dict
.. autofunction:: pypinyin.slug
.. _convert_style:
注册新的拼音风格
-----------------
.. autofunction:: pypinyin.style.register
.. _seg:
.. _#27: https://github.com/mozillazg/python-pinyin/issues/27
# -*- coding: utf-8 -*-
#
# pypinyin documentation build configuration file, created by
# sphinx-quickstart on Fri Sep 06 22:22:13 2013.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('..'))
# sys.path.insert(0, os.path.abspath('../pypinyin'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.viewcode',
'sphinx.ext.extlinks',
'sphinx.ext.todo',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
import pypinyin
# General information about the project.
project = pypinyin.__title__
copyright = pypinyin.__copyright__
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = pypinyin.__version__
# The full version, including alpha/beta/rc tags.
release = pypinyin.__version__
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
language = 'zh_CN'
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'nature'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'pypinyindoc'
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'pypinyin.tex', 'pypinyin Documentation', 'mozillazg', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [('index', 'pypinyin', 'pypinyin Documentation', ['mozillazg'], 1)]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'pypinyin', 'pypinyin Documentation', 'mozillazg', 'pypinyin',
'One line description of project.', 'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
# -- Options for Epub output ---------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = 'pypinyin'
epub_author = 'mozillazg'
epub_publisher = 'mozillazg'
epub_copyright = '2016 mozillazg'
# The language of the text. It defaults to the language option
# or en if the language is not set.
#epub_language = ''
# The scheme of the identifier. Typical schemes are ISBN or URL.
#epub_scheme = ''
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#epub_identifier = ''
# A unique identification for the text.
#epub_uid = ''
# A tuple containing the cover image and cover page html template filenames.
#epub_cover = ()
# A sequence of (type, uri, title) tuples for the guide element of content.opf.
#epub_guide = ()
# HTML files that should be inserted before the pages created by sphinx.
# The format is a list of tuples containing the path and title.
#epub_pre_files = []
# HTML files that should be inserted after the pages created by sphinx.
# The format is a list of tuples containing the path and title.
#epub_post_files = []
# A list of files that should not be packed into the epub file.
#epub_exclude_files = []
# The depth of the table of contents in toc.ncx.
#epub_tocdepth = 3
# Allow duplicate toc entries.
#epub_tocdup = True
# Fix unsupported image types using the PIL.
#epub_fix_images = False
# Scale large images.
#epub_max_image_width = 0
# If 'no', URL addresses will not be shown.
#epub_show_urls = 'inline'
# If false, no index is generated.
#epub_use_index = True
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'http://docs.python.org/': None}
.. _contrib:
contrib
========
.. _tone_convert:
拼音转换
--------
.. autofunction:: pypinyin.contrib.tone_convert.to_normal
.. autofunction:: pypinyin.contrib.tone_convert.to_tone
.. autofunction:: pypinyin.contrib.tone_convert.to_tone2
.. autofunction:: pypinyin.contrib.tone_convert.to_tone3
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_normal
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone2
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone3
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_normal
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone3
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_normal
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone2
V2UMixin
---------
.. autoclass:: pypinyin.contrib.uv.V2UMixin
NeutralToneWith5Mixin
-----------------------
.. autoclass:: pypinyin.contrib.neutral_tone.NeutralToneWith5Mixin
.. _develop:
开发文档
========
准备开发环境
-------------
::
$ virtualenv venv
$ . venv/bin/activate
(venv) $ pip install -U -r requirements_dev.txt
(venv) $ pip install -e .
(venv) $ pre-commit install
TODO: 把这个步骤放到一个 make 命令中。
.. note::
推荐在 Python 3.6+ 环境下进行开发。
测试
------
可以通过 ``make test`` 命令在当前 Python 版本下运行单元测试: ::
(venv) $ make test
可以通过 ``tox`` 测试程序在多个 Python 版本下的单元测试结果(这一步也可以在提 PR 的时候通过 CI 来运行): ::
(venv) $ tox
.. note::
如果对测试有疑问或者有些测试实在无法通过,可以先提交 PR 大家一起来看看。
目录结构
--------
关键文件和目录 ::
$ tree -L 2
.
├── CHANGELOG.rst # 更新日志
├── Makefile
├── README.rst
├── docs # 文档
├── gen_phrases_dict.py # 生成 phrases_dict.py 的脚本
├── gen_pinyin_dict.py # 生成 pinyin_dict.py 的脚本
├── phrase-pinyin-data # gen_phrases_dict.py 使用的数据源
├── pinyin-data # gen_pinyin_dict.py 使用的数据源
├── pypinyin # pypinyin 模块源代码
│   ├── __init__.py
│   ├── __main__.py # 命令行程序的入口
│   ├── compat.py
│   ├── constants.py
│   ├── contrib # 目前包含了一个分词模块
│   ├── core.py # pypinyin 模块的核心逻辑
│   ├── phonetic_symbol.py
│   ├── phrases_dict.py # 词组的拼音数据,由 gen_phrases_dict.py 生成
│   ├── pinyin_dict.py # 单个汉字的拼音数据,由 gen_pinyin_dict.py 生成
│   ├── runner.py # 命令行程序的主逻辑
│   ├── standard.py # strict=True 时的拼音转换逻辑
│   ├── style # 各种拼音风格在 style 目录下实现
│   ├── utils.py
├── pytest.ini
├── requirements_dev.txt
├── setup.cfg
├── setup.py
├── tests
├── tox.ini
实现思路/主逻辑
----------------
主逻辑:
1. 对输入的字符串按是否是汉字进行分词(``seg``)
2. 对分词结果的每个词条进行获取词条拼音的逻辑
1. 检查词条是否是汉字,不是汉字则走处理没有拼音数据的逻辑(``handle_nopinyin``)
2. 检查词条是否在 ``PHRASES_DICT`` 中,如果在直接取 ``PHRASES_DICT`` 中这个词条的拼音数据
3. 如果词条不在 ``PHRASES_DICT`` 中,遍历词条包含的字符,每个字符进行 ``single_pinyin`` 逻辑处理
3. ``single_pinyin`` 的逻辑:
1. 检查字符是否在 ``PINYIN_DICT`` 中,如果在的话,取 ``PINYIN_DICT`` 中这个字符的拼音数据
2. 如果不在的话,走 ``handle_nopinyin`` 逻辑
4. ``handle_nopinyin`` 逻辑: 根据 ``errors`` 参数的值返回不同的结果。
5. 对上面的步骤获得的拼音数据按指定的拼音风格进行转换。
* ``PHRASES_DICT``:词组拼音数据
* ``PINYIN_DICT``: 单个汉字的拼音数据
TODO: 画流程图
发布新版本
----------
1. 切分到 develop 分支
2. rebase master 分支的代码: ``make rebase_master``
3. 通过 ``make gen_data`` 生成最新的数据文件
4. 通过 ``make test`` 跑测试
5. 更新 CHANGELOG
6. 提交代码
7. 检查 develop 分支的 CI 结果
8. 切换到 master 分支
9. 合并 develop 分支代码: ``make merge_dev``
10. 更新版本号:
* 大改动(1.1.x -> 1.2.x):``make bump_minor``
* 小改动(1.1.1 -> 1.1.2):``make bump_patch``
11. 发布到 test pypi: ``make publish_test``
12. 安装和测试发布到 test pypi 上的版本
13. 发布到 pypi: ``make publish``
14. 安装和测试发布到 pypi 上的版本
15. 提交 master 分支代码,更新 develop 分支代码,进入下一个开发阶段:``make start_next``
FAQ
-----
.. _no_phrases:
如何禁用内置的“词组拼音库”
++++++++++++++++++++++++++++++++
设置环境变量 ``PYPINYIN_NO_PHRASES=true`` 即可
.. _no_dict_copy:
如何禁用默认的“拼音库”copy 操作
+++++++++++++++++++++++++++++++++++++++++++
设置环境变量 ``PYPINYIN_NO_DICT_COPY=true`` 即可.
副作用: 用户的自定义拼音库出现问题时, 无法回退到自带的拼音库.
.. _limit_memory:
如何减少内存占用
+++++++++++++++++++++
如果对拼音正确性不在意的话,可以按照上面所说的设置环境变量 ``PYPINYIN_NO_PHRASES``
和 ``PYPINYIN_NO_DICT_COPY`` 详见 `#13`_
.. _initials_problem:
``INITIALS`` 声母风格下,以 ``y``, ``w``, ``yu`` 开头的汉字返回空字符串
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
比如:
.. code:: python
pinyin('火影忍者', style=Style.INITIALS)
[['h'], [''], ['r'], ['zh']]
因为 ``y``, ``w``, ``yu`` 都不是声母。参考:
`hotoo/pinyin#57 <https://github.com/hotoo/pinyin/issues/57>`__,
`#22 <https://github.com/mozillazg/python-pinyin/pull/22>`__,
`#27 <https://github.com/mozillazg/python-pinyin/issues/27>`__,
`#44 <https://github.com/mozillazg/python-pinyin/issues/44>`__
声母风格(INITIALS)下,“雨”、“我”、“圆”等汉字返回空字符串,因为根据
`《汉语拼音方案》 <http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ ,
y,w,ü (yu) 都不是声母,在某些特定韵母无声母时,才加上 y 或 w,而 ü 也有其特定规则。
如果你觉得这个给你带来了麻烦,那么也请小心一些无声母的汉字(如“啊”、“饿”、“按”、“昂”等)。
这时候你也许需要的是首字母风格(FIRST_LETTER)。 —— @hotoo
如果觉得这个行为不是你想要的,就是想把 y 当成声母的话,可以指定 ``strict=False`` , 这个可能会符合你的预期。详见 `strict 参数的影响`_
.. _#13: https://github.com/mozillazg/python-pinyin/issues/113
.. _strict 参数的影响: https://pypinyin.readthedocs.io/zh_CN/master/usage.html#strict
.. pypinyin documentation master file, created by
sphinx-quickstart on Fri Sep 06 22:22:13 2013.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
汉字拼音转换工具(Python 版)
=============================
|Build| |Coverage| |Pypi version|
将汉字转为拼音。可以用于汉字注音、排序、检索(`Russian translation`_) 。
基于 `hotoo/pinyin <https://github.com/hotoo/pinyin>`__ 开发。
* Documentation: http://pypinyin.rtfd.io
* GitHub: https://github.com/mozillazg/python-pinyin
* License: MIT license
* PyPI: https://pypi.org/project/pypinyin
* Python version: 2.7, pypy, pypy3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
特性
----
* 根据词组智能匹配最正确的拼音。
* 支持多音字。
* 简单的繁体支持, 注音支持。
* 支持多种不同拼音风格。
.. |Build| image:: https://img.shields.io/circleci/project/github/mozillazg/python-pinyin/master.svg
:target: https://circleci.com/gh/mozillazg/python-pinyin
.. |Coverage| image:: https://img.shields.io/codecov/c/github/mozillazg/python-pinyin/master.svg
:target: https://codecov.io/gh/mozillazg/python-pinyin
.. |PyPI version| image:: https://img.shields.io/pypi/v/pypinyin.svg
:target: https://pypi.org/project/pypinyin/
.. |PyPI downloads| image:: https://img.shields.io/pypi/dm/pypinyin.svg
:target: https://pypi.org/project/pypinyin/
.. _Russian translation: https://github.com/mozillazg/python-pinyin/blob/master/README_ru.rst
Contents
--------
.. toctree::
:maxdepth: 4
installation
usage
api
contrib
develop
faq
related
CHANGELOG
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
安装
======
可以使用 pip 进行安装:
.. code-block:: bash
$ pip install pypinyin
easy_install 安装:
.. code-block:: bash
$ easy_install pypinyin
源码安装:
.. code-block:: bash
$ python setup.py install
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. xml to make Docutils-native XML files
echo. pseudoxml to make pseudoxml-XML files for display purposes
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
%SPHINXBUILD% 2> nul
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\BaiduPCS.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\BaiduPCS.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdfja" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf-ja
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
if "%1" == "xml" (
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The XML files are in %BUILDDIR%/xml.
goto end
)
if "%1" == "pseudoxml" (
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
goto end
)
:end
Related Projects
===================
* `hotoo/pinyin`__: 汉字拼音转换工具 Node.js/JavaScript 版。
* `mozillazg/go-pinyin`__: 汉字拼音转换工具 Go 版。
* `mozillazg/rust-pinyin`__: 汉字拼音转换工具 Rust 版。
__ https://github.com/hotoo/pinyin
__ https://github.com/mozillazg/go-pinyin
__ https://github.com/mozillazg/rust-pinyin
使用
======
.. _example:
示例
-------
.. code-block:: python
>>> from pypinyin import pinyin, lazy_pinyin, Style
>>> pinyin('中心')
[['zhōng'], ['xīn']]
>>> pinyin('中心', heteronym=True) # 启用多音字模式
[['zhōng', 'zhòng'], ['xīn']]
>>> pinyin('中心', style=Style.FIRST_LETTER) # 设置拼音风格
[['z'], ['x']]
>>> pinyin('中心', style=Style.TONE2, heteronym=True)
[['zho1ng', 'zho4ng'], ['xi1n']]
>>> lazy_pinyin('中心') # 不考虑多音字的情况
['zhong', 'xin']
**注意事项** :
* 默认情况下拼音结果不会标明哪个韵母是轻声,轻声的韵母没有声调或数字标识(可以通过参数 ``neutral_tone_with_five=True`` 开启使用 ``5`` 标识轻声 )。
* 默认情况下无声调相关拼音风格下的结果会使用 ``v`` 表示 ``ü`` (可以通过参数 ``v_to_u=True`` 开启使用 ``ü`` 代替 ``v`` )。
* 默认情况下会原样输出没有拼音的字符(自定义处理没有拼音的字符的方法见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#handle-no-pinyin>`__ )。
.. _handle_no_pinyin:
处理不包含拼音的字符
---------------------
当程序遇到不包含拼音的字符(串)时,会根据 ``errors`` 参数的值做相应的处理:
* ``default`` (默认行为): 不做任何处理,原样返回::
pinyin('你好☆☆')
[['nǐ'], ['hǎo'], ['☆☆']]
* ``ignore`` : 忽略该字符 ::
pinyin('你好☆☆', errors='ignore')
[['nǐ'], ['hǎo']]
* ``replace`` : 替换为去掉 ``\u`` 的 unicode 编码::
pinyin('你好☆☆', errors='replace')
[['nǐ'], ['hǎo'], ['26062606']]
* callable 对象 : 提供一个回调函数,接受无拼音字符(串)作为参数,
支持的返回值类型: ``unicode`` 或 ``list`` 或 ``None`` 。::
pinyin('你好☆☆', errors=lambda x: 'star')
[['nǐ'], ['hǎo'], ['star']]
pinyin('你好☆☆', errors=lambda x: None)
[['nǐ'], ['hǎo']]
返回值类型为 ``list`` 时,会自动 expend list ::
pinyin('你好☆☆', errors=lambda x: ['star' for _ in x])
[['nǐ'], ['hǎo'], ['star'], ['star']]
# 指定多音字
pinyin('你好☆☆', heteronym=True, errors=lambda x: [['star', '☆'] for _ in x])
[['nǐ'], ['hǎo'], ['star', '☆'], ['star', '☆']]
.. _custom_dict:
自定义拼音库
------------
如果对结果不满意,可以通过
:py:func:`~pypinyin.load_single_dict` 或
:py:func:`~pypinyin.load_phrases_dict`
以自定义拼音库的方式修正结果:
.. code-block:: python
>> from pypinyin import lazy_pinyin, load_phrases_dict, Style, load_single_dict
>> hans = '桔子'
>> lazy_pinyin(hans, style=Style.TONE2)
['jie2', 'zi3']
>> load_phrases_dict({'桔子': [['jú'], ['zǐ']]}) # 增加 "桔子" 词组
>> lazy_pinyin(hans, style=Style.TONE2)
['ju2', 'zi3']
>>
>> hans = '还没'
>> lazy_pinyin(hans, style=Style.TONE2)
['hua2n', 'me2i']
>> load_single_dict({ord('还'): 'hái,huán'}) # 调整 "还" 字的拼音顺序
>>> lazy_pinyin('还没', style=Style.TONE2)
['ha2i', 'me2i']
.. _custom_style:
自定义拼音风格
----------------
可以通过 :py:func:`~pypinyin.style.register` 来实现自定义拼音风格的需求:
.. code-block:: python
In [1]: from pypinyin import lazy_pinyin
In [2]: from pypinyin.style import register
In [3]: @register('kiss')
...: def kiss(pinyin, **kwargs):
...: return '😘 {0}'.format(pinyin)
...:
In [4]: lazy_pinyin('么么', style='kiss')
Out[4]: ['😘 me', '😘 me']
.. _strict:
``strict`` 参数的影响
-------------------------------
``strict`` 参数用于控制处理声母和韵母时是否严格遵循 `《汉语拼音方案》`_ 标准:
.. code-block:: python
In [1]: from pypinyin import Style, lazy_pinyin
In [2]: lazy_pinyin('乌', style=Style.TONE)
Out[2]: ['wū']
In [3]: lazy_pinyin('乌', style=Style.INITIALS)
Out[3]: ['']
In [4]: lazy_pinyin('乌', style=Style.INITIALS, strict=False)
Out[4]: ['w']
In [5]: lazy_pinyin('迂', style=Style.TONE)
Out[5]: ['yū']
In [6]: lazy_pinyin('迂', style=Style.FINALS_TONE)
Out[6]: ['ǖ']
In [7]: lazy_pinyin('迂', style=Style.FINALS_TONE, strict=False)
Out[7]: ['ū']
当 ``strict=True`` 时根据 `《汉语拼音方案》`_ 的如下规则处理声母、在韵母相关风格下还原正确的韵母
(只对只获取声母或只获取韵母相关拼音风格有效,不影响其他获取完整拼音信息的拼音风格的结果):
* 21 个声母: ``b p m f d t n l g k h j q x zh ch sh r z c s`` (**y, w 不是声母**)
* i行的韵母,前面没有声母的时候,写成yi(衣),ya(呀),ye(耶),yao(腰),you(忧),yan(烟),
yin(因),yang(央),ying(英),yong(雍)。(**y 不是声母**)
* u行的韵母,前面没有声母的时候,写成wu(乌),wa(蛙),wo(窝),wai(歪),wei(威),wan(弯),
wen(温),wang(汪),weng(翁)。(**w 不是声母**)
* ü行的韵母,前面没有声母的时候,写成yu(迂),yue(约),yuan(冤),yun(晕);ü上两点省略。
(**韵母相关风格下还原正确的韵母 ü**)
* ü行的韵跟声母j,q,x拼的时候,写成ju(居),qu(区),xu(虚),ü上两点也省略;
但是跟声母n,l拼的时候,仍然写成nü(女),lü(吕)。(**韵母相关风格下还原正确的韵母 ü**)
* iou,uei,uen前面加声母的时候,写成iu,ui,un。例如niu(牛),gui(归),lun(论)。
(**韵母相关风格下还原正确的韵母 iou,uei,uen**)
当 ``strict=False`` 时就是不遵守上面的规则来处理声母和韵母,
比如:``y``, ``w`` 会被当做声母,yu(迂) 的韵母就是一般认为的 ``u`` 等。
具体差异可以查看 `tests/test_standard.py <https://github.com/mozillazg/python-pinyin/blob/master/tests/test_standard.py>`_ 中的对比结果测试用例
.. _cli:
命令行工具
------------
程序内置了一个命令行工具 ``pypinyin`` :
.. code-block:: console
$ pypinyin 音乐
yīn yuè
$ pypinyin -h
命令行工具支持如下参数:
.. code-block:: console
$ pypinyin -h
usage: pypinyin [-h] [-V] [-f {pinyin,slug}]
[-s {NORMAL,zhao,TONE,zh4ao,TONE2,zha4o,TONE3,zhao4,INITIALS,zh,FIRST_LETTER,z,FINALS,ao,FINALS_TONE,4ao,FINALS_TONE2,a4o,FINALS_TONE3,ao4,BOPOMOFO,BOPOMOFO_FIRST,CYRILLIC,CYRILLIC_FIRST}]
[-p SEPARATOR] [-e {default,ignore,replace}] [-m]
hans
convert chinese to pinyin.
positional arguments:
hans chinese string
optional arguments:
-h, --help show this help message and exit
-V, --version show program's version number and exit
-f {pinyin,slug}, --func {pinyin,slug}
function name (default: "pinyin")
-s {NORMAL,zhao,TONE,zh4ao,TONE2,zha4o,TONE3,zhao4,INITIALS,zh,FIRST_LETTER,z,FINALS,ao,FINALS_TONE,4ao,FINALS_TONE2,a4o,FINALS_TONE3,ao4,BOPOMOFO,BOPOMOFO_FIRST,CYRILLIC,CYRILLIC_FIRST}, --style {NORMAL,zhao,TONE,zh4ao,TONE2,zha4o,TONE3,zhao4,INITIALS,zh,FIRST_LETTER,z,FINALS,ao,FINALS_TONE,4ao,FINALS_TONE2,a4o,FINALS_TONE3,ao4,BOPOMOFO,BOPOMOFO_FIRST,CYRILLIC,CYRILLIC_FIRST}
pinyin style (default: "zh4ao")
-p SEPARATOR, --separator SEPARATOR
slug separator (default: "-")
-e {default,ignore,replace}, --errors {default,ignore,replace}
how to handle none-pinyin string (default: "default")
-m, --heteronym enable heteronym
``-s``, ``--style`` 参数可以选值的含义如下:
================== =========================================
-s 或 --style 的值 对应的拼音风格
================== =========================================
zhao :py:attr:`~pypinyin.Style.NORMAL`
zh4ao :py:attr:`~pypinyin.Style.TONE`
zha4o :py:attr:`~pypinyin.Style.TONE2`
zhao4 :py:attr:`~pypinyin.Style.TONE3`
zh :py:attr:`~pypinyin.Style.INITIALS`
z :py:attr:`~pypinyin.Style.FIRST_LETTER`
ao :py:attr:`~pypinyin.Style.FINALS`
4ao :py:attr:`~pypinyin.Style.FINALS_TONE`
a4o :py:attr:`~pypinyin.Style.FINALS_TONE2`
ao4 :py:attr:`~pypinyin.Style.FINALS_TONE3`
NORMAL :py:attr:`~pypinyin.Style.NORMAL`
TONE :py:attr:`~pypinyin.Style.TONE`
TONE2 :py:attr:`~pypinyin.Style.TONE2`
TONE3 :py:attr:`~pypinyin.Style.TONE3`
INITIALS :py:attr:`~pypinyin.Style.INITIALS`
FIRST_LETTER :py:attr:`~pypinyin.Style.FIRST_LETTER`
FINALS :py:attr:`~pypinyin.Style.FINALS`
FINALS_TONE :py:attr:`~pypinyin.Style.FINALS_TONE`
FINALS_TONE2 :py:attr:`~pypinyin.Style.FINALS_TONE2`
FINALS_TONE3 :py:attr:`~pypinyin.Style.FINALS_TONE3`
BOPOMOFO :py:attr:`~pypinyin.Style.BOPOMOFO`
BOPOMOFO_FIRST :py:attr:`~pypinyin.Style.BOPOMOFO_FIRST`
CYRILLIC :py:attr:`~pypinyin.Style.CYRILLIC`
CYRILLIC_FIRST :py:attr:`~pypinyin.Style.CYRILLIC_FIRST`
================== =========================================
.. _《汉语拼音方案》: http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html
import sys
def remove_dup_items(lst):
new_lst = []
for item in lst:
if item not in new_lst:
new_lst.append(item)
return new_lst
def parse(fp):
phrases_dict = {}
for line in in_fp.readlines():
line = line.strip()
if line.startswith('#') or not line:
continue
# 中国: zhōng guó
data = line.split('#')[0]
hanzi, pinyin = data.strip().split(':')
hanzi = hanzi.strip()
# [[zhōng], [guó]]
pinyin_list = [[s] for s in pinyin.split()]
if hanzi not in phrases_dict:
phrases_dict[hanzi] = pinyin_list
else:
for index, value in enumerate(phrases_dict[hanzi]):
value.extend(pinyin_list[index])
phrases_dict[hanzi][index] = remove_dup_items(value)
return phrases_dict
def main(in_fp, out_fp):
out_fp.write('''# Warning: Auto-generated file, don't edit.
phrases_dict = {
''')
hanzi_pairs = sorted(parse(in_fp).items(), key=lambda x: x[0])
for hanzi, pinyin_list in hanzi_pairs:
# 中国: [[zhōng], [guó]]
new_line = " '{hanzi}': {pinyin_list},\n".format(
hanzi=hanzi.strip(), pinyin_list=pinyin_list)
out_fp.write(new_line)
out_fp.write('}\n')
if __name__ == '__main__':
if len(sys.argv) == 1:
print('python gen_phrases_dict.py INPUT OUTPUT')
sys.exit(1)
in_f = sys.argv[1]
out_f = sys.argv[2]
with open(in_f) as in_fp, open(out_f, 'w') as out_fp:
main(in_fp, out_fp)
import sys
def main(in_fp, out_fp):
out_fp.write('''# Warning: Auto-generated file, don't edit.
pinyin_dict = {
''')
for line in in_fp.readlines():
line = line.strip()
if line.startswith('#') or not line:
continue
else:
# line is U+4E2D: zhōng,zhòng # 中
# raw_line U+4E2D: zhōng,zhòng
raw_line = line.split('#')[0].strip()
# 0x4E2D: zhōng,zhòng
new_line = raw_line.replace('U+', '0x')
# 0x4E2D: 'zhōng,zhòng
new_line = new_line.replace(': ', ": '")
# 0x4E2D: 'zhōng,zhòng'\n
new_line = " {new_line}',\n".format(new_line=new_line)
out_fp.write(new_line)
out_fp.write('}\n')
if __name__ == '__main__':
if len(sys.argv) == 1:
print('python gen_pinyin_dict.py INPUT OUTPUT')
sys.exit(1)
in_f = sys.argv[1]
out_f = sys.argv[2]
with open(in_f) as in_fp, open(out_f, 'w') as out_fp:
main(in_fp, out_fp)
[bumpversion]
commit = True
tag = True
current_version = 0.10.5
[bumpversion:file:merge.py]
[bumpversion:file:pinyin.txt]
[bumpversion:file:large_pinyin.txt]
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
new.txt
cc-cedict.zip
cedict_ts.u8
language: python
python:
- 3.6
script:
- make merge
# ChangeLog
## [0.10.5] (2020-11-22)
* 增加 `还君明珠` 的拼音。
## [0.10.4] (2020-10-08)
* 纠正一些词语的拼音。
## [0.10.3] (2020-07-05)
* 增加 `还珠` 的拼音。
## [0.10.2] (2019-10-26)
* 纠正一些词语的拼音。
## [0.10.1] (2019-07-06)
* 修正部分拼音数据。
## [0.10.0] (2019-05-10)
* 新增 `cc_cedict.txt`: [cc-cedict.org](https://cc-cedict.org/) 拼音数据。Thanks [@hanabi1224]
* 纠正一些词语的拼音
## [0.9.2] (2019-04-06)
* 修复部分词语的拼音声调标错了位置的问题
## [0.9.1] (2019-03-31)
* 纠正一批词语的的拼音:
* `鸟事`
* `虮虱相吊`
* `别鹤离鸾`
* `年华垂暮`
* `本枝百世`
* `操戈同室`
* 部分词语中 `丢` 的拼音
## [0.9.0] (2019-02-23)
* 新增 `腌臢: ā zā`
* `朝阳` 增加 `cháo yáng` 这个音
* 新增 `土地``领地``基地`
## [0.8.5] (2018-12-26)
* 纠正 `油炸``洗发` 的拼音
## [0.8.4] (2018-09-16)
* 纠正 `步履蹒跚` 的拼音
* 纠正部分词语中 `长` 的拼音
## [0.8.3] (2018-08-04)
* 纠正部分 `查``大` 的读音 (via [ee1ded4])
## [0.8.2] (2018-07-28)
* 纠正 `有一只` 的读音 (via [330b348])
## [0.8.1] (2018-07-28)
* 纠正几个 `一` 的读音 (via [6e3b9eb])
* 修复部分拼音包含 `xh` 的问题 (via [ae12df98])
## [0.8.0] (2018-07-08)
* 纠正 `称雨道晴` 的拼音 (via [67412ab])
* 纠正部分词语中 `干` 的拼音 (via [38474cb])
* 增加 `时长` 的拼音 (via [c40b965])
## [0.7.3] (2018-06-10)
* 纠正 `一语中的`, `一语中人` 的拼音 (via [3b62ed3])
## [0.7.2] (2018-06-10)
* 纠正部分拼音数据 (via [af5d783])
## [0.7.1] (2018-06-04)
* 纠正 `负债累累` `经纶济世` 的拼音 (via [#16])
## [0.7.0] (2018-05-27)
* 新增 zdic_cibs.txt 和 zdic_cybs.txt (via [#13])
* `zdic_cibs.txt`: [汉典网](http://www.zdic.net) 汉语词典拼音数据
* `zdic_cybs.txt`: [汉典网](http://www.zdic.net) 成语词典拼音数据
* 增加基于 zdic_cibs.txt 和 zdic_cybs.txt 的 large_pinyin.txt (via [#13])
* 纠正部分读音(via [#10],[#11], [#15])
## [0.6.0] (2018-03-11)
* Revert [#3](https://github.com/mozillazg/phrase-pinyin-data/pull/3) 增加的拼音数据(错误有点多)
## [0.5.1] (2017-10-25)
* 修正一批缺少 ā 和 dī 不对的词语(via [#7][#7])
## [0.5.0] (2017-07-09)
* 增加 `还贷` 的拼音(Thanks [@zhuangh](https://github.com/zhuangh))
## [0.4.1] (2017-04-10)
* 纠正 `朝阳`, `昂昂自若` 的拼音(via [e6d6d27][e6d6d27], [6e7ea16][6e7ea16])
## [0.4.0] (2017-03-22)
* 新增2万多个词组拼音数据(via [fc50fcd][fc50fcd], 感谢 [@onsunsl][@onsunsl] 分享他/她收集的43400个拼音数据: [#3][#3] ).
## [0.3.1] (2017-03-13)
* 纠正 `斯事体大` 的拼音
## [0.3.0] (2017-03-12)
* 增加 overwrite.txt 用于新增/纠正拼音数据
* 纠正 `便宜`, `所长`, `打开天窗说亮话` 的拼音数据
* 增加 `朝阳区`
## [0.2.0] (2017-03-04)
* 添加一批拼音(via [04de9f7][04de9f7])。
## 0.1.0 (2017-03-04)
* Initial Release
[0.10.4]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.3...v0.10.4
[0.10.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.2...v0.10.3
[0.10.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.1...v0.10.2
[0.10.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.0...v0.10.1
[0.10.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.2...v0.10.0
[0.9.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.1...v0.9.2
[0.9.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.0...v0.9.1
[0.9.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.5...v0.9.0
[0.8.5]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.4...v0.8.5
[0.8.4]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.3...v0.8.4
[0.8.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.2...v0.8.3
[0.8.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.1...v0.8.2
[0.8.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.0...v0.8.1
[0.8.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.3...v0.8.0
[0.7.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.2...v0.7.3
[0.7.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.1...v0.7.2
[0.7.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.0...v0.7.1
[0.7.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.6.0...v0.7.0
[0.6.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.5.0...v0.6.0
[0.5.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.5.0...v0.5.1
[0.5.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.4.1...v0.5.0
[0.4.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.4.0...v0.4.1
[0.4.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.3.1...v0.4.0
[0.3.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.3.0...v0.3.1
[0.3.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.2.0...v0.3.0
[0.2.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.1.0...v0.2.0
[04de9f7]: https://github.com/mozillazg/phrase-pinyin-data/commit/04de9f7f520e2f2188cb4c468c30d6fb811a20ba
[fc50fcd]: https://github.com/mozillazg/phrase-pinyin-data/commit/fc50fcd7faa94205096d582fc7a1b31265943a85
[@onsunsl]: https://github.com/onsunsl
[#3]: https://github.com/mozillazg/phrase-pinyin-data/pull/3
[e6d6d27]: https://github.com/mozillazg/phrase-pinyin-data/commit/e6d6d270900fdca32ccbe9a414ea4642e537e522
[6e7ea16]: https://github.com/mozillazg/phrase-pinyin-data/commit/6e7ea167dee0c812514f0bf9701ff5c103a566af
[#7]: https://github.com/mozillazg/phrase-pinyin-data/pull/7
[#10]: https://github.com/mozillazg/phrase-pinyin-data/pull/10
[#11]: https://github.com/mozillazg/phrase-pinyin-data/pull/11
[#13]: https://github.com/mozillazg/phrase-pinyin-data/pull/13
[#15]: https://github.com/mozillazg/phrase-pinyin-data/pull/15
[#16]: https://github.com/mozillazg/phrase-pinyin-data/pull/16
[af5d783]: https://github.com/mozillazg/phrase-pinyin-data/commit/af5d7831b0e84e4a5306e304b3b2da3268e35f17
[3b62ed3]: https://github.com/mozillazg/phrase-pinyin-data/commit/3b62ed303f129868c7ccee4f2d5e44dcea7d30d4
[67412ab]: https://github.com/mozillazg/phrase-pinyin-data/commit/67412abbf8570ac80a41dc012f228c0864823a62
[38474cb]: https://github.com/mozillazg/phrase-pinyin-data/commit/38474cb91dedd27b3d51b39811704f3d045837b1
[c40b965]: https://github.com/mozillazg/phrase-pinyin-data/commit/c40b9653ea2ab066d1c0606e9e07dd4225ff2485
[6e3b9eb]: https://github.com/mozillazg/phrase-pinyin-data/commit/6e3b9eb805ed3e3a5955c179e752ec5e1293216f
[ae12df98]: https://github.com/mozillazg/phrase-pinyin-data/commit/ae12df98438a508249bdf591334b6415bb5ccf8d
[330b348]: https://github.com/mozillazg/phrase-pinyin-data/commit/330b3481ba350de07b580991a5a8b7a83aaefde9
[ee1ded4]: https://github.com/mozillazg/phrase-pinyin-data/commit/ee1ded4938624ac4ce3dc7991ab370e09dbd745c
[@hanabi1224]: https://github.com/hanabi1224
[0.10.5]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.4...v0.10.5
MIT License
Copyright (c) 2017 mozillazg
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
.PHONY: help
help:
@echo "merge update pinyin.txt and large_pinyin.txt"
@echo "er find r"
@echo "check check unexpected char"
@echo "cedict_get get latest cedict data"
@echo "cedict parse latest cedict data"
.PHONY: merge
merge:
python merge.py pinyin.txt overwrite.txt > new.txt && mv new.txt pinyin.txt
python merge.py zdic_cibs.txt zdic_cybs.txt cc_cedict.txt pinyin.txt overwrite.txt > new.txt && mv new.txt large_pinyin.txt
.PHONY: er
er:
cat overwrite.txt|grep 儿|grep -v ér|grep -v er
.PHONY: tone_mark
tone_mark:
ls *.txt | xargs -L 1 sed -i 's/ùo/uò/g'
ls *.txt | xargs -L 1 sed -i 's/oǔ/ǒu/g'
ls *.txt | xargs -L 1 sed -i 's/ùi/uì/g'
ls *.txt | xargs -L 1 sed -i 's/íe/ié/g'
ls *.txt | xargs -L 1 sed -i 's/ùi/uì/g'
ls *.txt | xargs -L 1 sed -i 's/ǐe/iě/g'
ls *.txt | xargs -L 1 sed -i 's/aō/āo/g'
ls *.txt | xargs -L 1 sed -i 's/ìan/iàn/g'
ls *.txt | xargs -L 1 sed -i 's/īan/iān/g'
.PHONY: check
check: tone_mark
-rg 'ɡ|ɑ'
.PHONY: cedict_get
cedict_get:
python -m pip install -U -r requirements_dev.txt
python get_latest_cc_cedict.py
.PHONY: cedict
cedict:
python -m pip install -U -r requirements_dev.txt
python parse_latest_cc_cedict.py
# phrase-pinyin-data [![Build Status](https://travis-ci.org/mozillazg/phrase-pinyin-data.svg?branch=master)](https://travis-ci.org/mozillazg/phrase-pinyin-data)
词语拼音数据。
## 数据介绍
拼音数据的格式:
```
{phrase}: {pinyin}
```
*`#` 开头的行是注释
* 行尾的 `#` 也是注释
* `{phrase}` 汉字词语
* `{pinyin}` 词语的拼音,使用空格分隔每个汉字的拼音
* 一行一个词语的读音,有多个音的词语会出现在多行
* 示例:
```
# 注释
中国: zhōng guó
北京: běi jīng # 注释
```
文件说明:
* `overwrite.txt`: 手工纠正的拼音数据
* `pinyin.txt`: `pinyin.txt + overwrite.txt` 后的拼音数据
* `zdic_cibs.txt`: [汉典网](http://www.zdic.net/) 汉语词典拼音数据
* `zdic_cybs.txt`: [汉典网](http://www.zdic.net/) 成语词典拼音数据
* `cc_cedict.txt`: [cc-cedict.org](https://cc-cedict.org/) 拼音数据
* `large_pinyin.txt`: `zdic_cibs.txt + zdic_cybs.txt + cc_cedict.txt + pinyin.txt + overwrite.txt` 后的拼音数据
## 修改数据
* 修改 `pinyin.txt``overwrite.txt` 都可以了
* 执行 `make merge` 命令可以按照合并规则生成最新的 `pinyin.txt`
## 参考资料
* 初始数据基于 [phrases-dict.js](https://github.com/hotoo/pinyin/blob/05f74496c34ccb32db1a0fd0b358a798a22a51e5/data/phrases-dict.js)[phrases_dict.py](https://github.com/mozillazg/python-pinyin/blob/366de0363ff1fb9a718ce668448bea59de09a4bf/pypinyin/phrases_dict.py)
* [汉典 zdic.net](http://www.zdic.net/)
* [字海网,叶典网](http://zisea.com/)
* [国学大师_国学网](http://www.guoxuedashi.com/)
* [CC-CEDICT download - MDBG English to Chinese dictionary](http://www.mdbg.net/chindict/chindict.php?page=cc-cedict)
* [漢語大詞典](http://www.ivantsoi.com/hydcd/search.html)
## 相关项目
* [mozillazg/pinyin-data](https://github.com/mozillazg/pinyin-data): 汉字拼音数据
# -*- coding: utf-8 -*-
import os
import io
import shutil
import codecs
import zipfile
import requests
ROOT = os.path.dirname(os.path.realpath(__file__))
if __name__ == '__main__':
DOWNLOAD_URL = 'https://cc-cedict.org/editor/editor_export_cedict.php?c=zip'
zip_file_path = os.path.join(ROOT, 'cc-cedict.zip')
with open(zip_file_path, 'wb') as f:
response = requests.get(DOWNLOAD_URL, stream=True)
shutil.copyfileobj(response.raw, f)
with open(zip_file_path, 'rb') as fp:
z = zipfile.ZipFile(fp)
z.extractall(ROOT)
# -*- coding: utf-8 -*-
import sys
import codecs
def parse(lines):
"""
:yield: hanzi, others
"""
for line in lines:
line = line.strip()
if line.startswith('#') or not line:
continue
hanzi, others = line.split(':', 1)
yield hanzi.strip(), others.strip()
def merge(pinyin_d_list):
"""
:rtype: dict
"""
final_d = {}
for overwrite_d in pinyin_d_list:
final_d.update(overwrite_d)
return final_d
def sort(pinyin_d):
"""
:rtype: list
"""
return sorted(pinyin_d.items(), key=lambda x: x[0])
def output(pinyin_s):
print('# version: 0.10.5')
print('# source: https://github.com/mozillazg/phrase-pinyin-data')
for hanzi, pinyin in pinyin_s:
hanzi = hanzi.split('_')[0]
print('{hanzi}: {pinyin}'.format(hanzi=hanzi, pinyin=pinyin))
def main(files):
pinyin_d_list = []
for name in files:
with codecs.open(name, 'r', 'utf-8-sig') as fp:
d = {}
for h, p in parse(fp):
d.setdefault(h, p)
pinyin_d_list.append(d)
pinyin_d = merge(pinyin_d_list)
output(sort(pinyin_d))
if __name__ == '__main__':
main(sys.argv[1:])
# 新增或纠正的拼音数据
# 升级版本的时候会合并回 pinyin.txt
# 示例
斯事体大: sī shì tǐ dà
朝阳: zhāo yáng
朝阳_2: cháo yáng
还君明珠: huán jūn míng zhū
# -*- coding: utf-8 -*-
import os
import io
import re
import codecs
from pypinyin.phonetic_symbol import phonetic_symbol
from pypinyin.pinyin_dict import pinyin_dict
from pypinyin.style.tone import ToneConverter
ROOT = os.path.dirname(os.path.realpath(__file__))
tone_converter = ToneConverter()
tone3_2_tone_dict = {}
for k, v in pinyin_dict.items():
parts = v.split(',')
for part in parts:
part = part.strip()
if part:
tone3 = tone_converter.to_tone3(part).strip().lower()
if tone3:
tone3_2_tone_dict[tone3] = part
def tone3_to_tone1(tone3):
tone3 = tone3.strip().lower()
# 儿化
if tone3 == 'r5':
return 'er'
# 轻声
if '5' in tone3:
new = tone3.replace('5', '')
if new:
return new
# 律
if 'u:' in tone3:
tone3 = tone3.replace('u:', 'v')
return tone3_2_tone_dict[tone3]
if __name__ == '__main__':
LINE_PARTS_RE = re.compile(
r'(?P<zht>\w+)\s+(?P<zhs>\w+)\s+\[(?P<py>.+?)\]')
LETTER_DIGIT_RE = re.compile(r'[a-zA-Z0-9]')
cnt = 0
with codecs.open(os.path.join(ROOT, 'cc_cedict.txt'), 'w', 'utf-8-sig') as fpw:
with codecs.open(os.path.join(ROOT, 'cedict_ts.u8'), 'r', 'utf-8-sig') as fpr:
for line in fpr:
line_stripped = line.strip()
if not line or line_stripped[0] == '#' or line_stripped[0] == '%':
continue
# print(line_stripped)
parts = LINE_PARTS_RE.match(line_stripped)
if not parts:
continue
zhs = parts.group('zhs')
py = parts.group('py').split()
try:
tone1 = [tone3_to_tone1(i) for i in py]
except Exception as e:
print(e)
#input()
continue
#print(zhs, py, tone1)
if LETTER_DIGIT_RE.search(zhs):
continue
if len(zhs) < 2:
continue
fpw.write(f'{zhs}: {" ".join(tone1)}\n')
cnt += 1
if cnt % 10000 == 0:
print(f'{cnt} lines processed...')
此差异已折叠。
[bumpversion]
commit = True
tag = True
current_version = 0.10.2
[bumpversion:file:merge_unihan.py]
[bumpversion:file:pinyin.txt]
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Python application
on:
push:
branches: [ ]
pull_request:
branches: [ ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: try merge_unihan
run: |
make merge_unihan
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
#Ipython Notebook
.ipynb_checkpoints
language: python
python:
- "3.5"
script:
- make merge_unihan
# ChangeLog
## [0.10.2] (2021-03-13)
* 修改 `帧` 的最常用读音为 `zhēn`
* 修复 `zdic.txt` 中两个拼音字母 `è í` 使用不当的问题. Thanks [@Ace-Who](https://github.com/Ace-Who)
## [0.10.1] (2020-11-22)
* 调整 `地``謦` 的拼音顺序
## [0.10.0] (2020-10-07)
* 新增 `kTGHZ2013.txt`: [Unihan Database][unihan][kTGHZ2013](http://www.unicode.org/reports/tr38/#kTGHZ2013) 部分的拼音数据(来源于《通用规范汉字字典》的拼音数据)
* 修正部分拼音的读音
* 生成 `pinyin.txt` 时合并来自 `kTGHZ2013.txt` 的拼音数据
## [0.9.0] (2020-06-06)
* 更新 Unihan 数据版本为 13.0.0
## [0.8.1] (2019-10-26)
* 修正 `迹``分` 的读音。
## [0.8.0] (2019-06-01)
* 增加 `kanji.txt` 日本自造汉字的拼音数据 via [#32]. Thanks [@LuoZijun](https://github.com/LuoZijun)
* 去掉几个有误的轻声数据
## [0.7.0] (2019-03-31)
* 更新 Unihan 数据版本为 12.0.0
## [0.6.2] (2018-09-16)
* 修改 `蹒` 的最常用读音为 `pán`
## [0.6.1] (2018-08-04)
* 修改 `著` 的默认读音为 `zhù` via [8802f31]
## [0.6.0] (2018-07-08)
* 更新 Unihan 数据版本为 11.0.0 via [68dc169]
## [0.5.1] (2018-04-19)
* 更正 `卓``啥` 的拼音数据 via [#26] 。Thanks [@shibingli](https://github.com/shibingli)
* 更新 `〇` 的拼音数据 via [#27]
## [0.5.0] (2018-03-18)
* 更新 Unihan 数据版本为 10.0.0 via [#19][#19]
* 新增 kMandarin_overwrite.txt 用于手工纠正 kMandarin.txt 中有误的拼音数据 via [#21][#21]
* 更正 `讽``识` 的最常用读音 via [#20][#20]
* 更正 埔,彷,珖,U+275C8 的常用发音 [635b238c4](https://github.com/mozillazg/pinyin-data/commit/635b238c4d21e55d8fd66299c8da3ae555253b3a)
## [0.4.1] (2017-02-12)
* `妳` 的最常用拼音调整为 `nǐ` via [eb08200](https://github.com/mozillazg/pinyin-data/commit/eb08200d0a203c57ecc62ec7a118765518430238)
* `钭` 的拼音更新为 `tǒu,dǒu` via [fb9e64e](https://github.com/mozillazg/pinyin-data/commit/fb9e64e6c0a20eb0e792e8a402dffbf8cc2dfa57)
## [0.4.0] (2016-10-17)
* Update PUA.txt 详见 [#7](https://github.com/mozillazg/pinyin-data/issues/7) thanks [@Artoria2e5][@Artoria2e5]
* Rename PUA.txt to GBK_PUA.txt 详见 [#7](https://github.com/mozillazg/pinyin-data/issues/7)
* Add kMandarin_8105.txt (《通用规范汉字表》里 8105 个汉字最常用的一个读音) [#9][#9] [#11][#11]
* Update pinyin.txt with latest data
## [0.3.0] (2016-08-19)
* Fixed format of zdic.txt via [b8e4394](https://github.com/mozillazg/pinyin-data/commit/b8e439490d2c6e8c711652983db52fb69136919b).
* Fixed some pinyin: 罗 via [468ffaa](https://github.com/mozillazg/pinyin-data/commit/468ffaa8eb678637c7565a02e6836255bd0df06c).
* Support Chinese that in PUA([Private Use Area](https://en.wikipedia.org/wiki/Private_Use_Areas>)) via [#2](https://github.com/mozillazg/pinyin-data/pull/2).
* pinyin.txt add line comments that startswith `#` via [9944f79](https://github.com/mozillazg/pinyin-data/commit/9944f795e191fb3606d65ada84b6fad5665f8776).
## [0.2.0] (2016-07-19)
* Update to the latest version of [Unihan Database](http://www.unicode.org/charts/unihan.html):
> Date: 2016-06-01 07:01:48 GMT [JHJ]
> Unicode version: 9.0.0
## 0.1.0 (2016-03-11)
* Initial Release
[@Artoria2e5]: https://github.com/Artoria2e5
[#9]: https://github.com/mozillazg/pinyin-data/pull/9
[#11]: https://github.com/mozillazg/pinyin-data/pull/11
[#19]: https://github.com/mozillazg/pinyin-data/pull/19
[#20]: https://github.com/mozillazg/pinyin-data/pull/20
[#21]: https://github.com/mozillazg/pinyin-data/pull/21
[#26]: https://github.com/mozillazg/pinyin-data/pull/26
[#27]: https://github.com/mozillazg/pinyin-data/pull/27
[68dc169]: https://github.com/mozillazg/pinyin-data/commit/68dc169c3f0f02cb9bf53290edab2d2d2463e0c5
[8802f31]: https://github.com/mozillazg/pinyin-data/commit/8802f31e0e65c6e34a497adb55993425741a9d41
[#32]: https://github.com/mozillazg/pinyin-data/pull/32
[unihan]: http://www.unicode.org/charts/unihan.html
[0.2.0]: https://github.com/mozillazg/pinyin-data/compare/v0.1.0...v0.2.0
[0.3.0]: https://github.com/mozillazg/pinyin-data/compare/v0.2.0...v0.3.0
[0.4.0]: https://github.com/mozillazg/pinyin-data/compare/v0.3.0...v0.4.0
[0.4.1]: https://github.com/mozillazg/pinyin-data/compare/v0.4.0...v0.4.1
[0.5.0]: https://github.com/mozillazg/pinyin-data/compare/v0.4.1...v0.5.0
[0.5.1]: https://github.com/mozillazg/pinyin-data/compare/v0.5.0...v0.5.1
[0.6.0]: https://github.com/mozillazg/pinyin-data/compare/v0.5.1...v0.6.0
[0.6.1]: https://github.com/mozillazg/pinyin-data/compare/v0.6.0...v0.6.1
[0.6.2]: https://github.com/mozillazg/pinyin-data/compare/v0.6.1...v0.6.2
[0.7.0]: https://github.com/mozillazg/pinyin-data/compare/v0.6.2...v0.7.0
[0.8.0]: https://github.com/mozillazg/pinyin-data/compare/v0.7.0...v0.8.0
[0.8.1]: https://github.com/mozillazg/pinyin-data/compare/v0.8.0...v0.8.1
[0.9.0]: https://github.com/mozillazg/pinyin-data/compare/v0.8.1...v0.9.0
[0.10.0]: https://github.com/mozillazg/pinyin-data/compare/v0.9.0...v0.10.0
[0.10.1]: https://github.com/mozillazg/pinyin-data/compare/v0.10.0...v0.10.1
[0.10.2]: https://github.com/mozillazg/pinyin-data/compare/v0.10.1...v0.10.2
# GBK/GB 18030 PUA 映射
# 详见:https://zh.wikipedia.org/wiki/GB_18030#PUA
# U+E815: #  Unihan: U+2E81 ⺁
U+E816: zuǒ #  Unihan: U+20087 𠂇
# U+E817: #  Unihan: U+20089 𠂉
U+E818: gǔn #  Unihan: U+200CC 𠃌
# U+E819: #  Unihan: U+2E84 ⺄
U+E81A: zhòu,zhū #  Unihan: U+3473 㑳
U+E81B: zhòu #  Unihan: U+3447 㑇
# U+E81C: #  Unihan: U+2E88 ⺈
# U+E81D: #  Unihan: U+2E8B ⺋
# U+E81E: #  Unihan: U+9FB4 龴
U+E81F: wāi #  Unihan: U+359E 㖞
U+E820: hǎn #  Unihan: U+361A 㘚
U+E821: hǎn #  Unihan: U+360E 㘎
# U+E822: #  Unihan: U+2E8C ⺌
# U+E823: #  Unihan: U+2E97 ⺗
U+E824: zhòu,chǎo #  Unihan: U+396E 㥮
U+E825: zhòu #  Unihan: U+3918 㤘
# U+E826: #  Unihan: U+9FB5 龵
U+E827: gāng #  Unihan: U+39CF 㧏
U+E828: kuǎi #  Unihan: U+39DF 㧟
U+E829: sǒng #  Unihan: U+3A73 㩳
U+E82A: sǒng #  Unihan: U+39D0 㧐
# U+E82B: #  Unihan: U+9FB6 龶
# U+E82C: #  Unihan: U+9FB7 龷
U+E82D: gāng #  Unihan: U+3B4E 㭎
U+E82E: kuài #  Unihan: U+3C6E 㱮
U+E82F: tà #  Unihan: U+3CE0 㳠
# U+E830: #  Unihan: U+2EA7 ⺧
U+E831: pěng #  Unihan: U+215D7 𡗗
# U+E832: #  Unihan: U+9FB8 龸
# U+E833: #  Unihan: U+2EAA ⺪
U+E834: lōu #  Unihan: U+4056 䁖
U+E835: cǎn #  Unihan: U+415F 䅟
# U+E836: #  Unihan: U+2EAE ⺮
U+E837: chōu,chóu #  Unihan: U+4337 䌷
# U+E838: #  Unihan: U+2EB3 ⺳
# U+E839: #  Unihan: U+2EB6 ⺶
# U+E83A: #  Unihan: U+2EB7 ⺷
U+E83B: zāi #  Unihan: U+2298F 𢦏
U+E83C: bà,bēi #  Unihan: U+43B1 䎱
U+E83D: bà #  Unihan: U+43AC 䎬
# U+E83E: #  Unihan: U+2EBB ⺻
U+E83F: zhuān #  Unihan: U+43DD 䏝
U+E840: qióng #  Unihan: U+44D6 䓖
U+E841: kuì,huì #  Unihan: U+4661 䙡
U+E842: kuì #  Unihan: U+464C 䙌
# U+E843: #  Unihan: U+9FB9 龹
U+E844: xīn #  Unihan: U+4723 䜣
U+E845: yàn #  Unihan: U+4729 䜩
U+E846: jìng,qíng #  Unihan: U+477C 䝼
U+E847: qíng #  Unihan: U+478D 䞍
# U+E848: #  Unihan: U+2ECA ⻊
U+E849: shàn #  Unihan: U+4947 䥇
U+E84A: yé #  Unihan: U+497A 䥺
U+E84B: pō #  Unihan: U+497D 䥽
U+E84C: shàn #  Unihan: U+4982 䦂
U+E84D: zhuō #  Unihan: U+4983 䦃
U+E84E: shàn #  Unihan: U+4985 䦅
U+E84F: jué #  Unihan: U+4986 䦆
U+E850: wěn,chuài #  Unihan: U+499F 䦟
U+E851: zhèng #  Unihan: U+499B 䦛
U+E852: chuài #  Unihan: U+49B7 䦷
U+E853: zhèng #  Unihan: U+49B6 䦶
# U+E854: #  Unihan: U+9FBA 龺
U+E855: yíng #  Unihan: U+241FE 𤇾
U+E856: yú #  Unihan: U+4CA3 䲣
U+E857: yìn #  Unihan: U+4C9F 䲟
U+E858: chūn #  Unihan: U+4CA0 䲠
U+E859: qiū #  Unihan: U+4CA1 䲡
U+E85A: yú #  Unihan: U+4C77 䱷
U+E85B: téng #  Unihan: U+4CA2 䲢
U+E85C: shī #  Unihan: U+4D13 䴓
U+E85D: jiāo #  Unihan: U+4D14 䴔
U+E85E: liè #  Unihan: U+4D15 䴕
U+E85F: jīng #  Unihan: U+4D16 䴖
U+E860: jú #  Unihan: U+4D17 䴗
U+E861: tī #  Unihan: U+4D18 䴘
U+E862: pì #  Unihan: U+4D19 䴙
U+E863: yǎn #  Unihan: U+4DAE 䶮
# U+E864: #  Unihan: U+9FBB 龻
The MIT License (MIT)
Copyright (c) 2016 mozillazg
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
.PHONY: help
help:
@echo "merge_unihan merge Unihan data"
@echo "pua generate PUA"
@echo "check check unexpected char"
.PHONY: merge_unihan
merge_unihan: check
python merge_unihan.py
.PHONY: pua
pua:
python tools/gen_gb_pua.py > GBK_PUA.txt
.PHONY: check
check:
-rg 'ɡ|ɑ|í|è'
# pinyin-data [![Build Status](https://travis-ci.org/mozillazg/pinyin-data.svg?branch=master)](https://travis-ci.org/mozillazg/pinyin-data)
汉字拼音数据。
## 数据介绍
拼音数据的格式:
{code point}: {pinyins} # {hanzi} {comments}
*`#` 开头的行是注释,行内 `#` 后面的字符也是注释
* `{pinyins}` 中使用逗号分隔多个拼音
* 示例:
# 注释
U+4E2D: zhōng,zhòng # 中
[Unihan Database][unihan] 数据版本:
> Date: 2020-02-18 18:27:33 GMT [JHJ]
> Unicode version: 13.0.0
* `kTGHZ2013.txt`: [Unihan Database][unihan][kTGHZ2013](http://www.unicode.org/reports/tr38/#kTGHZ2013) 部分的拼音数据(来源于《通用规范汉字字典》的拼音数据)
* `kHanyuPinyin.txt`: [Unihan Database][unihan][kHanyuPinyin](http://www.unicode.org/reports/tr38/#kHanyuPinyin) 部分的拼音数据(来源于《漢語大字典》的拼音数据)
* `kXHC1983.txt`: [Unihan Database][unihan][kXHC1983](http://www.unicode.org/reports/tr38/#kXHC1983) 部分的拼音数据(来源于《现代汉语词典》的拼音数据)
* `kHanyuPinlu.txt`: [Unihan Database][unihan][kHanyuPinlu](http://www.unicode.org/reports/tr38/#kHanyuPinlu) 部分的拼音数据(来源于《現代漢語頻率詞典》的拼音数据)
* `kMandarin.txt`: [Unihan Database][unihan][kMandarin](http://www.unicode.org/reports/tr38/#kMandarin) 部分的拼音数据(普通话中最常用的一个读音。zh-CN 为主,如果 zh-CN 中没有则使用 zh-TW 中的拼音)
* `kMandarin_overwrite.txt`: 手工纠正 `kMandarin.txt` 中有误的拼音数据(**可以修改**
* `GBK_PUA.txt`: [Private Use Area](https://en.wikipedia.org/wiki/Private_Use_Areas) 中有拼音的汉字,参考 [GB 18030 - 维基百科,自由的百科全书](https://zh.wikipedia.org/wiki/GB_18030#PUA)**可以修改**
* `nonCJKUI.txt`: 不属于 [CJK Unified Ideograph](https://en.wikipedia.org/wiki/CJK_Unified_Ideographs) 但是却有拼音的字符(**可以修改**
* `kanji.txt`: [日本自造汉字](https://zh.wikipedia.org/wiki/%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97#7_%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97%E7%9A%84%E6%B1%89%E8%AF%AD%E6%99%AE%E9%80%9A%E8%AF%9D%E8%A7%84%E8%8C%83%E8%AF%BB%E9%9F%B3%E8%A1%A8) 的拼音数据 (**可以修改**
* `kMandarin_8105.txt`: [《通用规范汉字表》](https://zh.wikipedia.org/wiki/通用规范汉字表)(2013 年版)里 8105 个汉字最常用的一个读音 (**可以修改**)
* `overwrite.txt`: 手工纠正的拼音数据(**可以修改**
* `pinyin.txt`: 合并上述文件后的拼音数据
* `zdic.txt`: [汉典网](http://zdic.net) 的拼音数据(**可以修改**
## 修改数据
* 上面标注了 **可以修改** 字样的文件都可以直接修改
* 如果汉字的拼音不需要修改,只是调整第一个读音的话,可以直接修改 `kMandarin_8105.txt` 这个文件
* 执行 `merge_unihan` 命令可以按照合并规则生成最新的 `pinyin.txt` 文件
* 进入 unihan 目录,执行 `make update` 命令可以更新最新的 Unihan 数据
## 参考资料
* [汉语拼音方案](http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html)
* [Unihan Database Lookup](http://www.unicode.org/charts/unihan.html)
* [汉典 zdic.net](http://www.zdic.net/)
* [字海网,叶典网](http://zisea.com/)
* [国学大师_国学网](http://www.guoxuedashi.com/)
* [Unicode、GB2312、GBK和GB18030中的汉字](http://www.fmddlmyy.cn/text24.html)
* [GB 18030 - 维基百科,自由的百科全书](https://zh.wikipedia.org/wiki/GB_18030#PUA)
* [通用规范汉字表 - 维基百科,自由的百科全书](https://zh.wikipedia.org/wiki/%E9%80%9A%E7%94%A8%E8%A7%84%E8%8C%83%E6%B1%89%E5%AD%97%E8%A1%A8)
* [China’s 通用规范汉字表 (Tōngyòng Guīfàn Hànzìbiǎo)](https://blogs.adobe.com/CCJKType/2014/03/china-8105.html)
* [日本汉字的汉语读音规范](http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/201001/t20100115_75698.html)
* [日本汉字的汉语普通话规范读音表- 维基百科](https://zh.wikipedia.org/wiki/%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97#7_%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97%E7%9A%84%E6%B1%89%E8%AF%AD%E6%99%AE%E9%80%9A%E8%AF%9D%E8%A7%84%E8%8C%83%E8%AF%BB%E9%9F%B3%E8%A1%A8)
* [漢語大字典(第二版)](http://www.ivantsoi.com/hydzd/index.html)
[unihan]: http://www.unicode.org/charts/unihan.html
## 相关项目
* [mozillazg/phrase-pinyin-data](https://github.com/mozillazg/phrase-pinyin-data): 词语拼音数据
此差异已折叠。
此差异已折叠。
此差异已折叠。
U+389C: kāng # 㢜
U+60B7: lì # 悷
U+417F: huá # 䅿
U+46BE: rén # 䚾
U+4B78: fù # 䭸
U+4B7B: fēn # 䭻
U+4CC9: dōng # 䳉
U+4D7B: huì # 䵻
U+57D4: pǔ # 埔
U+5A47: cǎi # 婇
U+5F6F: piāo # 彯
U+5F77: páng # 彷
U+60B7: lì # 悷
U+65FD: tūn # 旽
U+6A0B: tōng # 樋
U+6ADA: lǘ # 櫚
U+6E5E: zhēn # 湞
U+73D6: guāng # 珖
U+77A1: guī # 瞡
U+7BC9: zhù # 築
U+815C: méi # 腜
U+816C: róu # 腬
U+8192: ōu # 膒
U+8491: yīn # 蒑
U+8A09: fàn # 訉
U+90D8: lǚ # 郘
U+9D24: zhōng # 鴤
U+2031A: nòng # 𠌚
U+2141D: fú # 𡐝
U+21594: nuó # 𡖔
U+2199D: xiāo # 𡦝
U+21B0D: mí # 𡬍
U+21B10: yí # 𡬐
U+21B15: lóng # 𡬕
U+2243F: rǎng # 𢐿
U+2273D: kuí # 𢜽
U+22741: hōng # 𢝁
U+22892: sū # 𢢒
U+22A10: jí # 𢨐
U+245ED: xià # 𤗭
U+24704: huái # 𤜄
U+247AE: zhài # 𤞮
U+24856: yán # 𤡖
U+248B5: lài # 𤢵
U+249EB: jīn # 𤧫
U+2546B: kān # 𥑫
U+2588D: hù # 𥢍
U+2588F: diàn # 𥢏
U+25C1F: yuán # 𥰟
U+272D5: kùn # 𧋕
U+2757A: shuāng # 𧕺
U+275C8: nú # 𧗈
U+27956: lí # 𧥖
U+280A2: jí # 𨂢
U+2824B: tuō # 𨉋
U+284A8: hài # 𨒨
U+28ABF: liú # 𨪿
U+28DED: chán # 𨷭
U+28E30: jú # 𨸰
U+293CF: wéi # 𩏏
U+295F5: zhēng # 𩗵
U+29B5D: wǒ # 𩭝
U+2A048: zhuāng # 𪁈
U+2A2A2: shí # 𪊢
U+8B9D: zhán # 讝
U+3D14: jí # 㴔
U+8B26: qǐng # 謦
此差异已折叠。
此差异已折叠。
U+5302: yún # 匂 yún 为日本汉字读音; xiōng 为现代汉语读音;
U+4E3C: dǎn # 丼 dǎn 为日本汉字读音; jǐng 为现代汉语读音;
U+8FBB: shí # 辻
U+8FBC: rù # 込
U+51E7: jīn # 凧
U+6763: shān # 杣
U+67A0: zá # 枠
U+7551: tián # 畑
U+6803: lì # 栃
U+6802: méi # 栂
U+5CE0: kǎ # 峠
U+4FE3: yǔ # 俣
U+7C7E: rèn # 籾
U+7560: tián # 畠
U+96EB: xià # 雫
U+7B39: shì # 笹
U+5840: píng # 塀
U+6919: chāng # 椙
U+7872: yù # 硲
U+86EF: lǎo # 蛯
U+55B0: cān # 喰
U+643E: zhà # 搾
U+698A: shén # 榊
U+50CD: dòng # 働
U+7CC0: huā # 糀
U+9786: bǐng # 鞆
U+69C7: zhēn # 槇
U+6A2B: jiān # 樫
U+9D2B: tián # 鴫
U+567A: xīn # 噺
U+7C17: liáng # 簗
U+9EBF: mó # 麿
# -*- coding: utf-8 -*-
import collections
def code_to_hanzi(code):
hanzi = chr(int(code.replace('U+', '0x'), 16))
return hanzi
def sort_pinyin_dict(pinyin_dict):
return collections.OrderedDict(
sorted(pinyin_dict.items(),
key=lambda item: int(item[0].replace('U+', '0x'), 16))
)
def remove_dup_items(lst):
new_lst = []
for item in lst:
if item not in new_lst:
new_lst.append(item)
return new_lst
def parse_pinyins(fp):
pinyin_map = {}
for line in fp:
line = line.strip()
if line.startswith('#') or not line:
continue
code, pinyin = line.split('#')[0].split(':')
pinyin = ','.join([x.strip() for x in pinyin.split() if x.strip()])
pinyin_map[code.strip()] = pinyin.split(',')
return pinyin_map
def merge(raw_pinyin_map, adjust_pinyin_map, overwrite_pinyin_map):
new_pinyin_map = {}
for code, pinyins in raw_pinyin_map.items():
if code in overwrite_pinyin_map:
pinyins = overwrite_pinyin_map[code]
elif code in adjust_pinyin_map:
pinyins = adjust_pinyin_map[code] + pinyins
new_pinyin_map[code] = remove_dup_items(pinyins)
return new_pinyin_map
def save_data(pinyin_map, writer):
for code, pinyins in pinyin_map.items():
hanzi = code_to_hanzi(code)
line = '{code}: {pinyin} # {hanzi}\n'.format(
code=code, pinyin=','.join(pinyins), hanzi=hanzi
)
writer.write(line)
def extend_pinyins(old_map, new_map, only_no_exists=False):
for code, pinyins in new_map.items():
if only_no_exists: # 只当 code 不存在时才更新
if code not in old_map:
old_map[code] = pinyins
else:
old_map.setdefault(code, []).extend(pinyins)
if __name__ == '__main__':
raw_pinyin_map = {}
with open('kHanyuPinyin.txt') as fp:
khanyupinyin = parse_pinyins(fp)
raw_pinyin_map.update(khanyupinyin)
with open('kXHC1983.txt') as fp:
kxhc1983 = parse_pinyins(fp)
extend_pinyins(raw_pinyin_map, kxhc1983)
with open('nonCJKUI.txt') as fp:
noncjkui = parse_pinyins(fp)
extend_pinyins(raw_pinyin_map, noncjkui)
with open('kMandarin_8105.txt') as fp:
adjust_pinyin_map = parse_pinyins(fp)
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
with open('kMandarin_overwrite.txt') as fp:
_map = parse_pinyins(fp)
extend_pinyins(adjust_pinyin_map, _map)
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
with open('kMandarin.txt') as fp:
_map = parse_pinyins(fp)
extend_pinyins(adjust_pinyin_map, _map)
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
with open('kTGHZ2013.txt') as fp:
_map = parse_pinyins(fp)
extend_pinyins(adjust_pinyin_map, _map)
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
with open('kHanyuPinlu.txt') as fp:
khanyupinyinlu = parse_pinyins(fp)
extend_pinyins(adjust_pinyin_map, _map)
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
with open('GBK_PUA.txt') as fp:
pua_pinyin_map = parse_pinyins(fp)
extend_pinyins(raw_pinyin_map, pua_pinyin_map)
with open('kanji.txt') as fp:
_map = parse_pinyins(fp)
extend_pinyins(raw_pinyin_map, _map, only_no_exists=True)
with open('overwrite.txt') as fp:
overwrite_pinyin_map = parse_pinyins(fp)
extend_pinyins(raw_pinyin_map, overwrite_pinyin_map)
new_pinyin_map = merge(raw_pinyin_map, adjust_pinyin_map,
overwrite_pinyin_map)
new_pinyin_map = sort_pinyin_dict(new_pinyin_map)
assert len(new_pinyin_map) == len(raw_pinyin_map)
code_set = set(new_pinyin_map.keys())
assert set(khanyupinyin.keys()) - code_set == set()
assert set(khanyupinyinlu.keys()) - code_set == set()
assert set(kxhc1983.keys()) - code_set == set()
assert set(adjust_pinyin_map.keys()) - code_set == set()
assert set(overwrite_pinyin_map.keys()) - code_set == set()
assert set(pua_pinyin_map.keys()) - code_set == set()
with open('pinyin.txt', 'w') as fp:
fp.write('# version: 0.10.2\n')
fp.write('# source: https://github.com/mozillazg/pinyin-data\n')
save_data(new_pinyin_map, fp)
# 手工纠正错误的拼音数据
# 井号开头的行将会被忽略,可以用作注释
# 数据格式:{code point}: {pinyins} # {hanzi}
# 示例:
# U+4E2D: zhōng,zhòng # 中
U+5353: zhuó,zhuō # 卓
U+5565: shá,shà # 啥
U+5666: yuě,huì # 噦
U+59B3: nǐ,nǎi # 妳
U+8BB8: xǔ,hǔ # 许
U+94AD: tǒu,dǒu # 钭
U+9E00: chǔ,zhú,chù # 鸀
U+E815: yè # 
U+E816: zuǒ,yǒu # 
U+E81B: zhòu,zhū # 
U+E81D: jié,jiē # 
U+E824: zhòu # 
U+E826: shǒu # 
U+E82B: fēng # 
U+E82C: gòng # 
U+E82E: huì,kuì # 
U+E830: jiān # 
U+E831: ēn # 
U+E832: xiǎo # 
U+E834: lóu,lǘ # 
U+E835: cǎn,shān,cēn # 
U+E836: zhú # 
U+E838: wǎng # 
U+E83A: yáng,xiáng # 
U+E83D: bà,bēi # 
U+E83F: zhuān,zhuán,chuǎn,chún # 
U+E842: kuì,huì # 
U+E843: juǎn # 
U+E846: qíng # 
U+E84A: yé,yá # 
U+E850: chuài # 
U+E854: zhuó # 
U+E864: luán # 
U+241FE: yíng # 𤇾
U+275C8: nú # 𧗈
U+47C1: xiāo,chāo # 䟁
U+9EBF: mí # 麿
U+7C17: zhù # 簗
U+8279: cǎo # 艹
U+88CF: lǐ # 裏
U+88E1: lǐ # 裡
U+5206: fēn,fèn,fén # 分
U+208E1: fèng # 𠣡
U+2589F: hù # 𥢟
U+258F9: ràn # 𥣹
U+287B3: qú # 𨞳
U+2A008: yuān # 𪀈
U+9EFE: mǐn,miǎn,měng # 黾
U+55A3: xǔ # 喣
U+529A: zhú # 劚
U+532E: kuì,guì # 匮
U+9400: kuì,guì # 鐀
U+87AB: shì,zhē # 螫
U+5C82: qǐ,kǎi # 岂
U+534E: huá,huà,huā # 华
U+5455: ǒu,ōu,òu # 呕
U+4ECE: cóng,zòng # 从
U+513F: ér,er,rén # 儿
此差异已折叠。
# -*- coding: utf-8 -*-
"""生成初始的 kMandarin_8105.txt"""
from merge_unihan import parse_pinyins, code_to_hanzi
def parse_china_x():
with open('tools/china-8105-06062014.txt') as fp:
for line in fp:
line = line.strip()
if line.startswith('#') or not line:
continue
yield line.split()[0]
def parse_zdic():
with open('zdic.txt') as fp:
return parse_pinyins(fp)
def parse_kmandain():
with open('pinyin.txt') as fp:
return parse_pinyins(fp)
def diff(kmandarin, zdic, commons):
for key in commons:
hanzi = code_to_hanzi(key)
if key in kmandarin:
value = kmandarin[key][0]
if key in zdic and value != zdic[key][0]:
yield '{0}: {1} # {2} -> {3}'.format(
key, value, hanzi, zdic[key][0]
)
else:
yield '{0}: {1} # {2}'.format(key, value, hanzi)
elif key in zdic:
value = zdic[key][0]
yield '{0}: {1} # {2}'.format(key, value, hanzi)
else:
yield '# {0}: {1} # {2}'.format(key, '<-', hanzi)
if __name__ == '__main__':
zdic = parse_zdic()
kmandarin = parse_kmandain()
commons = parse_china_x()
lst = diff(kmandarin, zdic, commons)
for x in lst:
print(x)
# -*- coding: utf-8 -*-
import re
import sys
sys.path.append('.')
from merge_unihan import parse_pinyins
def get_pinyins(file_path):
with open(file_path) as fp:
return parse_pinyins(fp)
def get_pua_map():
text = '''
# A6D9 E78D () FE10 (︐)
# A6DA E78E () FE12 (︒)
# A6DB E78F () FE11 (︑)
# A6DC E790 () FE13 (︓)
# A6DD E791 () FE14 (︔)
# A6DE E792 () FE15 (︕)
# A6DF E793 () FE16 (︖)
# A6EC E794 () FE17 (︗)
# A6ED E795 () FE18 (︘)
# A8BC E7C7 () 1E3F (ḿ) 1E3F (ḿ)
# A8BF E7C8 () 01F9 (ǹ) 01F9 (ǹ)
# A989 E7E7 () 303E (〾) 303E (〾)
# A98A E7E8 () 2FF0 (⿰) 2FF0 (⿰)
# A98B E7E9 () 2FF1 (⿱) 2FF1 (⿱)
# A98C E7EA () 2FF2 (⿲) 2FF2 (⿲)
# A98D E7EB () 2FF3 (⿳) 2FF3 (⿳)
# A98E E7EC () 2FF4 (⿴) 2FF4 (⿴)
# A98F E7ED () 2FF5 (⿵) 2FF5 (⿵)
# A990 E7EE () 2FF6 (⿶) 2FF6 (⿶)
# A991 E7EF () 2FF7 (⿷) 2FF7 (⿷)
# A992 E7F0 () 2FF8 (⿸) 2FF8 (⿸)
# A993 E7F1 () 2FF9 (⿹) 2FF9 (⿹)
# A994 E7F2 () 2FFA (⿺) 2FFA (⿺)
# A995 E7F3 () 2FFB (⿻) 2FFB (⿻)
FE50 E815 () 2E81 (⺁) 2E81 (⺁)
FE51 E816 () E816 () 20087 (𠂇)
FE52 E817 () E817 () 20089 (𠂉)
FE53 E818 () E818 () 200CC (𠃌)
FE54 E819 () 2E84 (⺄) 2E84 (⺄)
FE55 E81A () 3473 (㑳) 3473 (㑳)
FE56 E81B () 3447 (㑇) 3447 (㑇)
FE57 E81C () 2E88 (⺈) 2E88 (⺈)
FE58 E81D () 2E8B (⺋) 2E8B (⺋)
FE59 E81E () E81E () 9FB4 (龴)
FE5A E81F () 359E (㖞) 359E (㖞)
FE5B E820 () 361A (㘚) 361A (㘚)
FE5C E821 () 360E (㘎) 360E (㘎)
FE5D E822 () 2E8C (⺌) 2E8C (⺌)
FE5E E823 () 2E97 (⺗) 2E97 (⺗)
FE5F E824 () 396E (㥮) 396E (㥮)
FE60 E825 () 3918 (㤘) 3918 (㤘)
FE61 E826 () E826 () 9FB5 (龵)
FE62 E827 () 39CF (㧏) 39CF (㧏)
FE63 E828 () 39DF (㧟) 39DF (㧟)
FE64 E829 () 3A73 (㩳) 3A73 (㩳)
FE65 E82A () 39D0 (㧐) 39D0 (㧐)
FE66 E82B () E82B () 9FB6 (龶)
FE67 E82C () E82C () 9FB7 (龷)
FE68 E82D () 3B4E (㭎) 3B4E (㭎)
FE69 E82E () 3C6E (㱮) 3C6E (㱮)
FE6A E82F () 3CE0 (㳠) 3CE0 (㳠)
FE6B E830 () 2EA7 (⺧) 2EA7 (⺧)
FE6C E831 () E831 () 215D7 (𡗗)
FE6D E832 () E832 () 9FB8 (龸)
FE6E E833 () 2EAA (⺪) 2EAA (⺪)
FE6F E834 () 4056 (䁖) 4056 (䁖)
FE70 E835 () 415F (䅟) 415F (䅟)
FE71 E836 () 2EAE (⺮) 2EAE (⺮)
FE72 E837 () 4337 (䌷) 4337 (䌷)
FE73 E838 () 2EB3 (⺳) 2EB3 (⺳)
FE74 E839 () 2EB6 (⺶) 2EB6 (⺶)
FE75 E83A () 2EB7 (⺷) 2EB7 (⺷)
FE76 E83B () E83B () 2298F (𢦏)
FE77 E83C () 43B1 (䎱) 43B1 (䎱)
FE78 E83D () 43AC (䎬) 43AC (䎬)
FE79 E83E () 2EBB (⺻) 2EBB (⺻)
FE7A E83F () 43DD (䏝) 43DD (䏝)
FE7B E840 () 44D6 (䓖) 44D6 (䓖)
FE7C E841 () 4661 (䙡) 4661 (䙡)
FE7D E842 () 464C (䙌) 464C (䙌)
FE7E E843 () E843 () 9FB9 (龹)
FE80 E844 () 4723 (䜣) 4723 (䜣)
FE81 E845 () 4729 (䜩) 4729 (䜩)
FE82 E846 () 477C (䝼) 477C (䝼)
FE83 E847 () 478D (䞍) 478D (䞍)
FE84 E848 () 2ECA (⻊) 2ECA (⻊)
FE85 E849 () 4947 (䥇) 4947 (䥇)
FE86 E84A () 497A (䥺) 497A (䥺)
FE87 E84B () 497D (䥽) 497D (䥽)
FE88 E84C () 4982 (䦂) 4982 (䦂)
FE89 E84D () 4983 (䦃) 4983 (䦃)
FE8A E84E () 4985 (䦅) 4985 (䦅)
FE8B E84F () 4986 (䦆) 4986 (䦆)
FE8C E850 () 499F (䦟) 499F (䦟)
FE8D E851 () 499B (䦛) 499B (䦛)
FE8E E852 () 49B7 (䦷) 49B7 (䦷)
FE8F E853 () 49B6 (䦶) 49B6 (䦶)
FE90 E854 () E854 () 9FBA (龺)
FE91 E855 () E855 () 241FE (𤇾)
FE92 E856 () 4CA3 (䲣) 4CA3 (䲣)
FE93 E857 () 4C9F (䲟) 4C9F (䲟)
FE94 E858 () 4CA0 (䲠) 4CA0 (䲠)
FE95 E859 () 4CA1 (䲡) 4CA1 (䲡)
FE96 E85A () 4C77 (䱷) 4C77 (䱷)
FE97 E85B () 4CA2 (䲢) 4CA2 (䲢)
FE98 E85C () 4D13 (䴓) 4D13 (䴓)
FE99 E85D () 4D14 (䴔) 4D14 (䴔)
FE9A E85E () 4D15 (䴕) 4D15 (䴕)
FE9B E85F () 4D16 (䴖) 4D16 (䴖)
FE9C E860 () 4D17 (䴗) 4D17 (䴗)
FE9D E861 () 4D18 (䴘) 4D18 (䴘)
FE9E E862 () 4D19 (䴙) 4D19 (䴙)
FE9F E863 () 4DAE (䶮) 4DAE (䶮)
FEA0 E864 () E864 () 9FBB (龻)
'''.strip()
for line in text.split('\n'):
if line.startswith('#'):
continue
gb, gbk, gb_18030, unicode_4_1 = line.split('\t')
# print(gb, gbk, gb_18030, unicode_4_1)
# print(get_han_point(gbk), get_han_point(unicode_4_1))
yield get_han_point(gbk), get_han_point(unicode_4_1)
def get_han_point(text):
if not text:
return '', ''
regex = re.compile(r'(?P<point>[A-Z0-9]+) \((?P<han>[^\)]+)\)')
result = regex.findall(text)
return result[0]
def point_to_u_point(point):
point = point.upper()
if not point.startswith('U+'):
point = 'U+' + point
return point
def gen_pua_data(gbk, unicode_4_1, pinyin_map):
gbk_point, gbk_han = gbk
gbk_point = point_to_u_point(gbk_point)
unicode_4_1_point, unicode_4_1_han = unicode_4_1
unicode_4_1_point = point_to_u_point(unicode_4_1_point)
pinyins = ','.join(pinyin_map.get(unicode_4_1_point, []))
prefix = ''
if not pinyins:
prefix = '# '
return (
'{prefix}{gbk_point}: {pinyins} # {gbk_han} '
'Unihan: {unicode_4_1_point} {unicode_4_1_han}'
).format(**locals())
if __name__ == '__main__':
pinyin_map = get_pinyins('pinyin.txt')
print('# GBK/GB 18030 PUA 映射\n'
'# 详见:https://zh.wikipedia.org/wiki/GB_18030#PUA')
for gbk, unicode_4_1 in get_pua_map():
print(gen_pua_data(gbk, unicode_4_1, pinyin_map))
# -*- coding: utf-8 -*-
"""补充 8105 中汉字的拼音数据"""
from collections import namedtuple
import re
import sys
from pyquery import PyQuery
import requests
re_pinyin = re.compile(r'拼音:(?P<pinyin>\S+) ')
re_code = re.compile(r'统一码\w?:(?P<code>\S+) ')
re_alternate = re.compile(r'异体字:\s+?(?P<alternate>\S+)')
HanziInfo = namedtuple('HanziInfo', 'pinyin code alternate')
def fetch_html(url, params):
response = requests.get(url, params=params)
return response.content
def fetch_info(hanzi):
url = 'http://www.guoxuedashi.com/zidian/so.php'
params = {
'sokeyzi': hanzi,
'kz': 1,
'submit': '',
}
html = fetch_html(url, params)
pq = PyQuery(html)
pq = PyQuery(pq('table.zui td')[1])
text = pq('tr').text()
text_alternate = pq(html)('.info_txt2')('em').text()
pinyin = ''
pinyin_match = re_pinyin.search(text)
if pinyin_match is not None:
pinyin = pinyin_match.group('pinyin')
code = re_code.search(text).group('code')
alternate = ''
alternate_match = re_alternate.search(text_alternate)
if alternate_match is not None:
alternate = alternate_match.group('alternate')
return HanziInfo(pinyin, code, alternate)
def parse_hanzi(hanzi):
info = fetch_info(hanzi)
if (not info.pinyin) and info.alternate:
alternate = fetch_info(info.alternate)
else:
alternate = ''
return HanziInfo(info.pinyin, info.code, alternate)
def main(lines):
for line in lines:
if line.startswith('# U+') and '<-' in line:
# # U+xxx ... -> U+xxx
code = line.split(':')[0].strip('# ')
# U+xxx -> xxx
code = code[2:]
info = parse_hanzi(code)
pinyin = info.pinyin
extra = ''
if (not pinyin) and info.alternate:
alternate = info.alternate
pinyin = alternate.pinyin
extra = ' => U+{0}'.format(alternate.code)
if ',' in pinyin:
first_pinyin, extra_pinyin = pinyin.split(',', 1)
pinyin = first_pinyin
extra += ' ?-> ' + extra_pinyin
if pinyin:
line = line.strip()
# # U+xxx -> U+xxx
line = line[2:]
line = line.replace('<-', pinyin)
if extra:
line += extra
yield line.strip()
if __name__ == '__main__':
args = sys.argv[1:]
input_file = args[0]
with open(input_file) as fp:
for line in main(fp):
print(line)
.PHONY: help
help:
@echo "parse parse Unihan database "
@echo "update update Unihan database"
@echo "diff diff between Unihan data and parsed data"
.PHONY:parse
parse:
@python parse_pinyin.py
.PHONY:update
update:
-rm Unihan*
wget ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip -O Unihan.zip
unzip Unihan.zip
python parse_pinyin.py
.PHONY:diff
diff:
@bash diff.sh
# Unihan Database
http://www.unicode.org/charts/unihan.html
Update Unihan databse:
```
make update
```
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
function main() {
printf '%-14s %-8s %-8s\n' '' 'parsed' 'Unihan'
for kind in 'kHanyuPinyin' 'kMandarin' 'kHanyuPinlu' 'kXHC1983'
do
unihanCount=$(less Unihan_Readings.txt |grep -v '^#' |grep -c "$kind")
parsedCount=$(less "$kind".txt | grep -c "")
printf '%-14s %-8s %-8s\n' "$kind" "$parsedCount" "$unihanCount"
done
}
main
../kHanyuPinlu.txt
\ No newline at end of file
../kHanyuPinyin.txt
\ No newline at end of file
../kMandarin.txt
\ No newline at end of file
../kTGHZ2013.txt
\ No newline at end of file
../kXHC1983.txt
\ No newline at end of file
此差异已折叠。
"""汉字拼音转换工具."""
from pypinyin.constants import BOPOMOFO
from pypinyin.constants import BOPOMOFO_FIRST
from pypinyin.constants import CYRILLIC
from pypinyin.constants import CYRILLIC_FIRST
from pypinyin.constants import FINALS
from pypinyin.constants import FINALS_TONE
from pypinyin.constants import FINALS_TONE2
from pypinyin.constants import FINALS_TONE3
from pypinyin.constants import FIRST_LETTER
from pypinyin.constants import INITIALS
from pypinyin.constants import NORMAL
from pypinyin.constants import Style
from pypinyin.constants import STYLE_BOPOMOFO
from pypinyin.constants import STYLE_BOPOMOFO_FIRST
from pypinyin.constants import STYLE_CYRILLIC
from pypinyin.constants import STYLE_CYRILLIC_FIRST
from pypinyin.constants import STYLE_FINALS
from pypinyin.constants import STYLE_FINALS_TONE
from pypinyin.constants import STYLE_FINALS_TONE2
from pypinyin.constants import STYLE_FINALS_TONE3
from pypinyin.constants import STYLE_FIRST_LETTER
from pypinyin.constants import STYLE_INITIALS
from pypinyin.constants import STYLE_NORMAL
from pypinyin.constants import STYLE_TONE
from pypinyin.constants import STYLE_TONE2
from pypinyin.constants import STYLE_TONE3
from pypinyin.constants import TONE
from pypinyin.constants import TONE2
from pypinyin.constants import TONE3
from pypinyin.core import lazy_pinyin
from pypinyin.core import load_phrases_dict
from pypinyin.core import load_single_dict
from pypinyin.core import pinyin
from pypinyin.core import slug
__all__ = [
'pinyin', 'lazy_pinyin', 'slug', 'load_single_dict', 'load_phrases_dict',
'Style', 'STYLE_NORMAL', 'NORMAL', 'STYLE_TONE', 'TONE', 'STYLE_TONE2',
'TONE2', 'STYLE_TONE3', 'TONE3', 'STYLE_INITIALS', 'INITIALS',
'STYLE_FINALS', 'FINALS', 'STYLE_FINALS_TONE', 'FINALS_TONE',
'STYLE_FINALS_TONE2', 'FINALS_TONE2', 'STYLE_FINALS_TONE3', 'FINALS_TONE3',
'STYLE_FIRST_LETTER', 'FIRST_LETTER', 'STYLE_BOPOMOFO', 'BOPOMOFO',
'STYLE_BOPOMOFO_FIRST', 'BOPOMOFO_FIRST', 'STYLE_CYRILLIC', 'CYRILLIC',
'STYLE_CYRILLIC_FIRST', 'CYRILLIC_FIRST'
]
__title__ = 'pypinyin'
__version__ = '0.41.0'
__license__ = 'MIT'
__author__ = 'Hui Zhang'
__copyright__ = 'Copyright (c) 2021 Hui Zhang'
#!/usr/bin/env python3
from pypinyin.runner import main
if __name__ == '__main__':
main()
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册