場景說明
把中文漢字啦桌、轉成漢語拼音甜滨,包括:
- 純漢字轉拼音
- 漢字里面加有字母轉拼音
- 轉加聲調的拼音
- 轉用數(shù)字表示聲調的拼音
install
$ pip install pinyin
引用
代碼
pinyinutil.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pinyin
def to_pinyin(var_str):
"""
漢字[釣魚島是中國的]=>拼音[diaoyudaoshizhongguode]\n
漢字[我是shui]=>拼音[woshishui]\n
漢字[AreYou好]=>拼音[AreYouhao]\n
漢字[None]=>拼音[]\n
漢字[]=>拼音[]\n
:param var_str: str 類型的字符串
:return: 漢字轉小寫拼音
"""
if isinstance(var_str, str):
if var_str == 'None':
return ""
else:
return pinyin.get(var_str, format='strip', delimiter="")
else:
return '類型不對'
測試代碼:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import unittest
from util.pinyinutil import *
from pinyin._compat import u
class TestPinYin(unittest.TestCase):
def test_to_pinyin(self):
list = ['釣魚島是中國的', '我是shui', 'AreYou好', None, '']
for i in list:
print('漢字[%s]=>拼音[%s]' % (i, to_pinyin(str(i))))
def test_get(self):
self.assertEqual(pinyin.get('你好'),
pinyin.get('你好', format="diacritical"))
self.assertEqual(pinyin.get(u('你好'), format="strip"), u('nihao'))
self.assertEqual(pinyin.get(u('你好'), format="numerical"), u('ni3hao3'))
self.assertEqual(pinyin.get(u('你好'), format="diacritical"), u('nǐhǎo'))
self.assertEqual(pinyin.get('你好嗎?'), u('nǐhǎoma?'))
self.assertEqual(pinyin.get('你好嗎雳殊?'), u('nǐhǎoma赘风?'))
self.assertEqual(pinyin.get('你好'), u('nǐhǎo'))
self.assertEqual(pinyin.get('葉'), u('yè'))
self.assertEqual(pinyin.get('少女'), u('shǎonv?'))
def test_get_with_delimiter(self):
self.assertEqual(pinyin.get('你好', " "), u('nǐ hǎo'))
self.assertEqual(pinyin.get('你好嗎?', " "), u('nǐ hǎo ma ?'))
self.assertEqual(pinyin.get('你好嗎影暴?', " "), u('nǐ hǎo ma 续语?'))
def test_get_initial_with_delimiter(self):
self.assertEqual(pinyin.get_initial('你好', "-"), u('n-h'))
self.assertEqual(pinyin.get_initial('你好嗎?', "-"), u('n-h-m-?'))
self.assertEqual(pinyin.get_initial('你好嗎?', "-"), u('n-h-m-肝箱?'))
def test_get_initial(self):
self.assertEqual(pinyin.get_initial('你好'), u('n h'))
self.assertEqual(pinyin.get_initial('你好嗎?'), u('n h m ?'))
self.assertEqual(pinyin.get_initial('你好嗎哄褒?'), u('n h m ?'))
self.assertEqual(pinyin.get_initial('你好'), 'n h')
def test_mixed_chinese_english_input(self):
self.assertEqual(pinyin.get('hi你好'), u('hinǐhǎo'))
def test_correct_diacritical(self):
self.assertEqual(pinyin.get("操"), u("cāo"))
self.assertEqual(pinyin.get("小"), u("xiǎo"))
self.assertEqual(pinyin.get("絕"), u("jué"))
self.assertEqual(pinyin.get("被"), u("bèi"))
self.assertEqual(pinyin.get("略"), u("lvè"))
if __name__ == '__main__':
unittest.main()