我刚刚从Format numbers as currency in Python了解到,Python模块babel提供了babel.numbers.format_currency
来将数字格式化为货币。例如,
from babel.numbers import format_currency
s = format_currency(123456.789, 'USD', locale='en_US') # u'$123,456.79'
s = format_currency(123456.789, 'EUR', locale='fr_FR') # u'123\xa0456,79\xa0\u20ac'
反过来,从货币到数字,例如$123,456,789.00
- > 123456789
? babel
提供babel.numbers.parse_number
来解析本地号码,但我找不到类似parse_currency
的内容。那么,将本地货币解析为数字的理想方法是什么?
我经历了Python: removing characters except digits from string。
# Way 1
import string
all=string.maketrans('','')
nodigs=all.translate(all, string.digits)
s = '$123,456.79'
n = s.translate(all, nodigs) # 12345679, lost `.`
# Way 2
import re
n = re.sub("\D", "", s) # 12345679
不关注小数点分隔符.
。
从字符串中删除除.
之外的所有非数字字符(请参阅here),
import re
# Way 1:
s = '$123,456.79'
n = re.sub("[^0-9|.]", "", s) # 123456.79
# Way 2:
non_decimal = re.compile(r'[^\d.]+')
s = '$123,456.79'
n = non_decimal.sub('', s) # 123456.79
它会处理小数点分隔符.
。
但是,例如,
时,上述解决方案不起作用from babel.numbers import format_currency
s = format_currency(123456.789, 'EUR', locale='fr_FR') # u'123\xa0456,79\xa0\u20ac'
new_s = s.encode('utf-8') # 123 456,79 €
如您所见,货币格式各不相同。 以一般方式将货币解析为数字的理想方式是什么?
答案 0 :(得分:4)
babel文档指出the number parsing is not fully implemented yes但是他们已经做了很多工作来将货币信息输入库中。您可以使用get_currency_name()
和get_currency_symbol()
获取货币详细信息,还可以使用所有其他get_...
函数来获取正常的数字详细信息(小数点,减号等)。
使用该信息,您可以从货币字符串中排除货币详细信息(名称,符号)和分组(例如美国的,
)。然后,您将小数细节更改为C
区域设置使用的细节(-
表示减号,.
表示小数点。“
这导致了这段代码(我添加了一个对象来保存一些数据,这可能会在进一步处理中变得很方便):
import re, os
from babel import numbers as n
from babel.core import default_locale
class AmountInfo(object):
def __init__(self, name, symbol, value):
self.name = name
self.symbol = symbol
self.value = value
def parse_currency(value, cur):
decp = n.get_decimal_symbol()
plus = n.get_plus_sign_symbol()
minus = n.get_minus_sign_symbol()
group = n.get_group_symbol()
name = n.get_currency_name(cur)
symbol = n.get_currency_symbol(cur)
remove = [plus, name, symbol, group]
for token in remove:
# remove the pieces of information that shall be obvious
value = re.sub(re.escape(token), '', value)
# change the minus sign to a LOCALE=C minus
value = re.sub(re.escape(minus), '-', value)
# and change the decimal mark to a LOCALE=C decimal point
value = re.sub(re.escape(decp), '.', value)
# just in case remove extraneous spaces
value = re.sub('\s+', '', value)
return AmountInfo(name, symbol, value)
#cur_loc = os.environ['LC_ALL']
cur_loc = default_locale()
print('locale:', cur_loc)
test = [ (n.format_currency(123456.789, 'USD', locale=cur_loc), 'USD')
, (n.format_currency(-123456.78, 'PLN', locale=cur_loc), 'PLN')
, (n.format_currency(123456.789, 'PLN', locale=cur_loc), 'PLN')
, (n.format_currency(123456.789, 'IDR', locale=cur_loc), 'IDR')
, (n.format_currency(123456.789, 'JPY', locale=cur_loc), 'JPY')
, (n.format_currency(-123456.78, 'JPY', locale=cur_loc), 'JPY')
, (n.format_currency(123456.789, 'CNY', locale=cur_loc), 'CNY')
, (n.format_currency(-123456.78, 'CNY', locale=cur_loc), 'CNY')
]
for v,c in test:
print('As currency :', c, ':', v.encode('utf-8'))
info = parse_currency(v, c)
print('As value :', c, ':', info.value)
print('Extra info :', info.name.encode('utf-8')
, info.symbol.encode('utf-8'))
输出看起来很有希望(在美国语言环境中):
$ export LC_ALL=en_US
$ ./cur.py
locale: en_US
As currency : USD : b'$123,456.79'
As value : USD : 123456.79
Extra info : b'US Dollar' b'$'
As currency : PLN : b'-z\xc5\x82123,456.78'
As value : PLN : -123456.78
Extra info : b'Polish Zloty' b'z\xc5\x82'
As currency : PLN : b'z\xc5\x82123,456.79'
As value : PLN : 123456.79
Extra info : b'Polish Zloty' b'z\xc5\x82'
As currency : IDR : b'Rp123,457'
As value : IDR : 123457
Extra info : b'Indonesian Rupiah' b'Rp'
As currency : JPY : b'\xc2\xa5123,457'
As value : JPY : 123457
Extra info : b'Japanese Yen' b'\xc2\xa5'
As currency : JPY : b'-\xc2\xa5123,457'
As value : JPY : -123457
Extra info : b'Japanese Yen' b'\xc2\xa5'
As currency : CNY : b'CN\xc2\xa5123,456.79'
As value : CNY : 123456.79
Extra info : b'Chinese Yuan' b'CN\xc2\xa5'
As currency : CNY : b'-CN\xc2\xa5123,456.78'
As value : CNY : -123456.78
Extra info : b'Chinese Yuan' b'CN\xc2\xa5'
它仍适用于不同的语言环境(巴西以逗号作为小数标记而着名):
$ export LC_ALL=pt_BR
$ ./cur.py
locale: pt_BR
As currency : USD : b'US$123.456,79'
As value : USD : 123456.79
Extra info : b'D\xc3\xb3lar americano' b'US$'
As currency : PLN : b'-PLN123.456,78'
As value : PLN : -123456.78
Extra info : b'Zloti polon\xc3\xaas' b'PLN'
As currency : PLN : b'PLN123.456,79'
As value : PLN : 123456.79
Extra info : b'Zloti polon\xc3\xaas' b'PLN'
As currency : IDR : b'IDR123.457'
As value : IDR : 123457
Extra info : b'Rupia indon\xc3\xa9sia' b'IDR'
As currency : JPY : b'JP\xc2\xa5123.457'
As value : JPY : 123457
Extra info : b'Iene japon\xc3\xaas' b'JP\xc2\xa5'
As currency : JPY : b'-JP\xc2\xa5123.457'
As value : JPY : -123457
Extra info : b'Iene japon\xc3\xaas' b'JP\xc2\xa5'
As currency : CNY : b'CN\xc2\xa5123.456,79'
As value : CNY : 123456.79
Extra info : b'Yuan chin\xc3\xaas' b'CN\xc2\xa5'
As currency : CNY : b'-CN\xc2\xa5123.456,78'
As value : CNY : -123456.78
Extra info : b'Yuan chin\xc3\xaas' b'CN\xc2\xa5'
值得指出的是babel
存在一些编码问题。这是因为语言环境文件(在locale-data
中)本身使用不同的编码。如果您正在使用您熟悉的货币,这应该不是问题。但是如果你尝试使用不熟悉的货币,你可能会遇到问题(我刚刚得知波兰使用的是iso-8859-2
,而不是iso-8859-1
。)
答案 1 :(得分:4)
以下是不依赖babel库的通用货币解析器。
import numpy as np
import re
def currency_parser(cur_str):
# Remove any non-numerical characters
# except for ',' '.' or '-' (e.g. EUR)
cur_str = re.sub("[^-0-9.,]", '', cur_str)
# Remove any 000s separators (either , or .)
cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
if '.' in list(cur_str[-3:]):
num = float(cur_str)
elif ',' in list(cur_str[-3:]):
num = float(cur_str.replace(',', '.'))
else:
num = float(cur_str)
return np.round(num, 2)
这是一个测试功能的pytest脚本:
import numpy as np
import pytest
import re
def currency_parser(cur_str):
# Remove any non-numerical characters
# except for ',' '.' or '-' (e.g. EUR)
cur_str = re.sub("[^-0-9.,]", '', cur_str)
# Remove any 000s separators (either , or .)
cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
if '.' in list(cur_str[-3:]):
num = float(cur_str)
elif ',' in list(cur_str[-3:]):
num = float(cur_str.replace(',', '.'))
else:
num = float(cur_str)
return np.round(num, 2)
@pytest.mark.parametrize('currency_str, expected', [
(
'.3', 0.30
),
(
'1', 1.00
),
(
'1.3', 1.30
),
(
'43,324', 43324.00
),
(
'3,424', 3424.00
),
(
'-0.00', 0.00
),
(
'EUR433,432.53', 433432.53
),
(
'25.675,26 EUR', 25675.26
),
(
'2.447,93 EUR', 2447.93
),
(
'-540,89EUR', -540.89
),
(
'67.6 EUR', 67.60
),
(
'30.998,63 CHF', 30998.63
),
(
'0,00 CHF', 0.00
),
(
'159.750,00 DKK', 159750.00
),
(
'£ 2.237,85', 2237.85
),
(
'£ 2,237.85', 2237.85
),
(
'-1.876,85 SEK', -1876.85
),
(
'59294325.3', 59294325.30
),
(
'8,53 NOK', 8.53
),
(
'0,09 NOK', 0.09
),
(
'-.9 CZK', -0.9
),
(
'35.255,40 PLN', 35255.40
),
(
'-PLN123.456,78', -123456.78
),
(
'US$123.456,79', 123456.79
),
(
'-PLN123.456,78', -123456.78
),
(
'PLN123.456,79', 123456.79
),
(
'IDR123.457', 123457
),
(
'JP¥123.457', 123457
),
(
'-JP\xc2\xa5123.457', -123457
),
(
'CN\xc2\xa5123.456,79', 123456.79
),
(
'-CN\xc2\xa5123.456,78', -123456.78
),
])
def test_currency_parse(currency_str, expected):
assert currency_parser(currency_str) == expected