#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
from unicodedata import normalize
from pandas.compat import PY3, iteritems, u_safe
import pandas.core.strings as strings
# soundmarks require special handlings
_HKANA = 'ァアィイゥウェエォオカキクケコサシスセソタチッツテトナニヌネノハヒフヘホマミムメモャヤュユョヨラリルレロワヲン゙ー・「」。、'
_ZALPHA = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz')
_ZSYMBOL = '!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ '
_ZDIGIT = '0123456789'
# mapping from full-width to half-width
_KANA_MAPPER = {normalize('NFKC', c): c for c in _HKANA}
_ALPHA_MAPPER = {c: normalize('NFKC', c) for c in _ZALPHA}
_DIGIT_MAPPER = {c: normalize('NFKC', c) for c in _ZDIGIT}
_SYMBOL_MAPPER = {c: normalize('NFKC', c) for c in _ZSYMBOL}
def _reverse_dict(dict):
return {v: k for k, v in iteritems(dict)}
def _ord_dict(dict):
return {ord(k): v for k, v in iteritems(dict)}
# for unicode.translate
_Z2H_KANA = _ord_dict(_KANA_MAPPER)
_Z2H_ALPHA = _ord_dict(_ALPHA_MAPPER)
_Z2H_DIGIT = _ord_dict(_DIGIT_MAPPER)
_Z2H_SYMBOL = _ord_dict(_SYMBOL_MAPPER)
_H2Z_KANA = _ord_dict(_reverse_dict(_KANA_MAPPER))
_H2Z_ALPHA = _ord_dict(_reverse_dict(_ALPHA_MAPPER))
_H2Z_DIGIT = _ord_dict(_reverse_dict(_DIGIT_MAPPER))
_H2Z_SYMBOL = _ord_dict(_reverse_dict(_SYMBOL_MAPPER))
def _h2z_sm(text):
return (text.replace("ガ", "ガ").replace("ギ", "ギ").replace("グ", "グ").replace("ゲ", "ゲ").
replace("ゴ", "ゴ").replace("ザ", "ザ").replace("ジ", "ジ").replace("ズ", "ズ").
replace("ゼ", "ゼ").replace("ゾ", "ゾ").replace("ダ", "ダ").replace("ヂ", "ヂ").
replace("ヅ", "ヅ").replace("デ", "デ").replace("ド", "ド").replace("バ", "バ").
replace("ビ", "ビ").replace("ブ", "ブ").replace("ベ", "ベ").replace("ボ", "ボ").
replace("パ", "パ").replace("ピ", "ピ").replace("プ", "プ").replace("ペ", "ペ").
replace("ポ", "ポ").replace("ヴ", "ヴ"))
def _z2h_sm(text):
return (text.replace("ガ", "ガ").replace("ギ", "ギ").replace("グ", "グ").replace("ゲ", "ゲ").
replace("ゴ", "ゴ").replace("ザ", "ザ").replace("ジ", "ジ").replace("ズ", "ズ").
replace("ゼ", "ゼ").replace("ゾ", "ゾ").replace("ダ", "ダ").replace("ヂ", "ヂ").
replace("ヅ", "ヅ").replace("デ", "デ").replace("ド", "ド").replace("バ", "バ").
replace("ビ", "ビ").replace("ブ", "ブ").replace("ベ", "ベ").replace("ボ", "ボ").
replace("パ", "パ").replace("ピ", "ピ").replace("プ", "プ").replace("ペ", "ペ").
replace("ポ", "ポ").replace("ヴ", "ヴ"))
[docs]def str_z2h(self, kana=True, alpha=True, digit=True, symbol=True):
mapper = dict()
if kana:
mapper.update(_Z2H_KANA)
if alpha:
mapper.update(_Z2H_ALPHA)
if digit:
mapper.update(_Z2H_DIGIT)
if symbol:
mapper.update(_Z2H_SYMBOL)
if kana:
if PY3:
def f(x):
return _z2h_sm(x).translate(mapper)
else:
def f(x):
return _z2h_sm(u_safe(x)).translate(mapper)
else:
if PY3:
def f(x):
return x.translate(mapper)
else:
def f(x):
return u_safe(x).translate(mapper)
try:
target = self.series
except AttributeError:
target = self._data
return self._wrap_result(strings._na_map(f, target))
[docs]def str_h2z(self, kana=True, alpha=True, digit=True, symbol=True):
mapper = dict()
if kana:
mapper.update(_H2Z_KANA)
if alpha:
mapper.update(_H2Z_ALPHA)
if digit:
mapper.update(_H2Z_DIGIT)
if symbol:
mapper.update(_H2Z_SYMBOL)
if kana:
if PY3:
def f(x):
return _h2z_sm(x).translate(mapper)
else:
def f(x):
return _h2z_sm(u_safe(x)).translate(mapper)
else:
if PY3:
def f(x):
return x.translate(mapper)
else:
def f(x):
return u_safe(x).translate(mapper)
try:
target = self.series
except AttributeError:
target = self._data
return self._wrap_result(strings._na_map(f, target))
# do not overwrite existing func
if not hasattr(strings.StringMethods, 'z2h'):
strings.StringMethods.z2h = str_z2h
if not hasattr(strings.StringMethods, 'h2z'):
strings.StringMethods.h2z = str_h2z