Source code for japandas.core.strings

#!/usr/bin/env python
# coding: utf-8

from __future__ import unicode_literals

from unicodedata import normalize

from pandas.compat import PY3, iteritems, u_safe
import pandas.core.strings as strings


# soundmarks require special handlings
_HKANA = 'ァアィイゥウェエォオカキクケコサシスセソタチッツテトナニヌネノハヒフヘホマミムメモャヤュユョヨラリルレロワヲン゙ー・「」。、'
_ZALPHA = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
           'abcdefghijklmnopqrstuvwxyz')
_ZSYMBOL = '!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ '
_ZDIGIT = '0123456789'

# mapping from full-width to half-width
_KANA_MAPPER = {normalize('NFKC', c): c for c in _HKANA}
_ALPHA_MAPPER = {c: normalize('NFKC', c) for c in _ZALPHA}
_DIGIT_MAPPER = {c: normalize('NFKC', c) for c in _ZDIGIT}
_SYMBOL_MAPPER = {c: normalize('NFKC', c) for c in _ZSYMBOL}


def _reverse_dict(dict):
    return {v: k for k, v in iteritems(dict)}


def _ord_dict(dict):
    return {ord(k): v for k, v in iteritems(dict)}


# for unicode.translate
_Z2H_KANA = _ord_dict(_KANA_MAPPER)
_Z2H_ALPHA = _ord_dict(_ALPHA_MAPPER)
_Z2H_DIGIT = _ord_dict(_DIGIT_MAPPER)
_Z2H_SYMBOL = _ord_dict(_SYMBOL_MAPPER)
_H2Z_KANA = _ord_dict(_reverse_dict(_KANA_MAPPER))
_H2Z_ALPHA = _ord_dict(_reverse_dict(_ALPHA_MAPPER))
_H2Z_DIGIT = _ord_dict(_reverse_dict(_DIGIT_MAPPER))
_H2Z_SYMBOL = _ord_dict(_reverse_dict(_SYMBOL_MAPPER))


def _h2z_sm(text):
    return (text.replace("ガ", "ガ").replace("ギ", "ギ").replace("グ", "グ").replace("ゲ", "ゲ").
            replace("ゴ", "ゴ").replace("ザ", "ザ").replace("ジ", "ジ").replace("ズ", "ズ").
            replace("ゼ", "ゼ").replace("ゾ", "ゾ").replace("ダ", "ダ").replace("ヂ", "ヂ").
            replace("ヅ", "ヅ").replace("デ", "デ").replace("ド", "ド").replace("バ", "バ").
            replace("ビ", "ビ").replace("ブ", "ブ").replace("ベ", "ベ").replace("ボ", "ボ").
            replace("パ", "パ").replace("ピ", "ピ").replace("プ", "プ").replace("ペ", "ペ").
            replace("ポ", "ポ").replace("ヴ", "ヴ"))


def _z2h_sm(text):
    return (text.replace("ガ", "ガ").replace("ギ", "ギ").replace("グ", "グ").replace("ゲ", "ゲ").
            replace("ゴ", "ゴ").replace("ザ", "ザ").replace("ジ", "ジ").replace("ズ", "ズ").
            replace("ゼ", "ゼ").replace("ゾ", "ゾ").replace("ダ", "ダ").replace("ヂ", "ヂ").
            replace("ヅ", "ヅ").replace("デ", "デ").replace("ド", "ド").replace("バ", "バ").
            replace("ビ", "ビ").replace("ブ", "ブ").replace("ベ", "ベ").replace("ボ", "ボ").
            replace("パ", "パ").replace("ピ", "ピ").replace("プ", "プ").replace("ペ", "ペ").
            replace("ポ", "ポ").replace("ヴ", "ヴ"))


[docs]def str_z2h(self, kana=True, alpha=True, digit=True, symbol=True): mapper = dict() if kana: mapper.update(_Z2H_KANA) if alpha: mapper.update(_Z2H_ALPHA) if digit: mapper.update(_Z2H_DIGIT) if symbol: mapper.update(_Z2H_SYMBOL) if kana: if PY3: def f(x): return _z2h_sm(x).translate(mapper) else: def f(x): return _z2h_sm(u_safe(x)).translate(mapper) else: if PY3: def f(x): return x.translate(mapper) else: def f(x): return u_safe(x).translate(mapper) try: target = self.series except AttributeError: target = self._data return self._wrap_result(strings._na_map(f, target))
[docs]def str_h2z(self, kana=True, alpha=True, digit=True, symbol=True): mapper = dict() if kana: mapper.update(_H2Z_KANA) if alpha: mapper.update(_H2Z_ALPHA) if digit: mapper.update(_H2Z_DIGIT) if symbol: mapper.update(_H2Z_SYMBOL) if kana: if PY3: def f(x): return _h2z_sm(x).translate(mapper) else: def f(x): return _h2z_sm(u_safe(x)).translate(mapper) else: if PY3: def f(x): return x.translate(mapper) else: def f(x): return u_safe(x).translate(mapper) try: target = self.series except AttributeError: target = self._data return self._wrap_result(strings._na_map(f, target))
# do not overwrite existing func if not hasattr(strings.StringMethods, 'z2h'): strings.StringMethods.z2h = str_z2h if not hasattr(strings.StringMethods, 'h2z'): strings.StringMethods.h2z = str_h2z