首先,我已经看到了很多与此相关的问题(将字符串转换为浮动等等),但我需要更通用的东西,我找不到(所以我希望这也有助于其他有类似问题的人)。 我已经找到了解决方案,但我想知道它是否是1)性能和2)pythonic优雅方面的最佳解决方案。
问题简短:
所以我做了什么:
import ast
import types
NumberTypes = (types.IntType, types.LongType, types.FloatType, types.ComplexType)
def mk_value(s, currency_sign='', thousand_sep='', decimal_sep='.'):
if isinstance(s, bool): # make boolean into a 0/1 value
if s:
result = 1
else:
result = 0
elif isinstance(s, NumberTypes): # keep numbers as/is
result = s
else: # convert a string
# prepare the string for conversion
if currency_sign != '':
s = s.replace(currency_sign, '')
if thousand_sep != '':
s = s.replace(thousand_sep, '')
if decimal_sep != '.':
s = s.replace(decimal_sep, '.')
s = s.strip()
# convert the string
if s == '':
result = None
else:
try:
# convert the string by a safe evaluation
result = ast.literal_eval(s)
# check if result of the evaluation is a number type
if not isinstance(result, NumberTypes):
result = None
except ValueError:
# if the conversion gave an error, the string is not a number
result = None
return result
您可以通过以下方式进行测试:
mk_value(True)
mk_value(1234)
mk_value(1234.56)
mk_value('1234')
mk_value('1234.56')
mk_value('1,234.56') # without an explicit decimal separator this is not a number
mk_value('1.234.567,89 EUR', currency_sign='EUR', thousand_sep='.', decimal_sep=',') # all exceptions
所以这是有效的(据我所知);但这是最好/最pythonic的方式吗?有更快的方法吗?我应该看看Cython吗?任何有关改进这一点的想法都会非常有用!
BR
岩溶
编辑:我已根据Andrew和WoLpH的建议更新了我的代码。它现在看起来像这样:
import types
NumberTypes = (types.IntType, types.LongType, types.FloatType, types.ComplexType)
def mk_value(s, currency_sign='', thousand_sep='', decimal_sep='.'):
if isinstance(s, bool): # make boolean into a 0/1 value
if s:
result = 1
else:
result = 0
elif isinstance(s, NumberTypes): # keep numbers as/is
result = s
else: # convert a string
# prepare the string for conversion
if currency_sign:
s = s.replace(currency_sign, '')
if thousand_sep:
s = s.replace(thousand_sep, '')
if decimal_sep != '.':
s = s.replace(decimal_sep, '.')
s = s.strip()
# convert the string
if not s: # if the string is empty, it's not a number
result = None
else:
try: # try int
result = int(s)
except ValueError:
try: # if there's an error, try float
result = float(s)
except ValueError:
# if the conversion gave an error, the string is not a number
result = None
return result
之前的代码表现如下:
>>> timeit.timeit("mk_value(1234)", 'from __main__ import mk_value', number=100000)
0.050575971603393555
>>> timeit.timeit("mk_value(1234.56)", 'from __main__ import mk_value', number=100000)
0.07073187828063965
>>> timeit.timeit("mk_value('1234')", 'from __main__ import mk_value', number=100000)
0.8333430290222168
>>> timeit.timeit("mk_value('1234.56')", 'from __main__ import mk_value', number=100000)
0.8230760097503662
>>> timeit.timeit("mk_value('1,234.56', thousand_sep=',')", 'from __main__ import mk_value', number=100000)
0.9358179569244385
新代码的表现:
>>> timeit.timeit("mk_value(1234)", 'from __main__ import mk_value', number=100000)
0.04723405838012695
>>> timeit.timeit("mk_value(1234.56)", 'from __main__ import mk_value', number=100000)
0.06952905654907227
>>> timeit.timeit("mk_value('1234')", 'from __main__ import mk_value', number=100000)
0.1798090934753418
>>> timeit.timeit("mk_value('1234.56')", 'from __main__ import mk_value', number=100000)
0.45616698265075684
>>> timeit.timeit("mk_value('1,234.56', thousand_sep=',')", 'from __main__ import mk_value', number=100000)
0.5290899276733398
所以速度要快得多:对于最复杂的一个快几乎两倍,对int来说要快得多(我猜它是try / except逻辑中的第一个)!非常好,谢谢你的意见。
我现在要打开它,看看是否有人对如何提高更多有一个很好的想法:)至少我希望这将有助于未来的其他人(这一定是一个非常普遍的问题)
答案 0 :(得分:2)
可能会有更多的Pythonic imho,但我还不确定最佳解决方案。
benchmark.py
# vim: set fileencoding=utf-8 :
import timeit
import pyximport
pyximport.install()
def timer(func, mod):
import_ = 'from %s import mk_value' % mod
time = timeit.timeit(func, import_, number=100000)
ms = 1000 * time
us = 1000 * ms
if func[40:]:
func_short = func[:37] + '...'
else:
func_short = func
print '%(mod)s.%(func_short)-40s %(ms)6dms %(us)12dμs' % locals()
for mod in 'abcd':
timer("mk_value(1234)", mod)
timer("mk_value(1234.56)", mod)
timer("mk_value('1234')", mod)
timer("mk_value('1234.56')", mod)
timer("mk_value('1,234.56', thousand_sep=',')", mod)
timer("mk_value('1.234.567,89 EUR', currency_sign='EUR', "
"thousand_sep='.', decimal_sep=',')", mod)
a.py
import ast
import types
NumberTypes = (types.IntType, types.LongType, types.FloatType, types.ComplexType)
def mk_value(s, currency_sign='', thousand_sep='', decimal_sep='.'):
if isinstance(s, bool): # make boolean into a 0/1 value
if s:
result = 1
else:
result = 0
elif isinstance(s, NumberTypes): # keep numbers as/is
result = s
else: # convert a string
# prepare the string for conversion
if currency_sign != '':
s = s.replace(currency_sign, '')
if thousand_sep != '':
s = s.replace(thousand_sep, '')
if decimal_sep != '.':
s = s.replace(decimal_sep, '.')
s = s.strip()
# convert the string
if s == '':
result = None
else:
try:
# convert the string by a safe evaluation
result = ast.literal_eval(s)
# check if result of the evaluation is a number type
if not isinstance(result, NumberTypes):
result = None
except ValueError:
# if the conversion gave an error, the string is not a number
result = None
return result
b.py
import types
NumberTypes = (types.IntType, types.LongType, types.FloatType, types.ComplexType)
def mk_value(s, currency_sign='', thousand_sep='', decimal_sep='.'):
if isinstance(s, bool): # make boolean into a 0/1 value
if s:
result = 1
else:
result = 0
elif isinstance(s, NumberTypes): # keep numbers as/is
result = s
else: # convert a string
# prepare the string for conversion
if currency_sign:
s = s.replace(currency_sign, '')
if thousand_sep:
s = s.replace(thousand_sep, '')
if decimal_sep != '.':
s = s.replace(decimal_sep, '.')
s = s.strip()
# convert the string
if not s: # if the string is empty, it's not a number
result = None
else:
try: # try int
result = int(s)
except ValueError:
try: # if there's an error, try float
result = float(s)
except ValueError:
# if the conversion gave an error, the string is not a number
result = None
return result
c.pyx
import types
NumberTypes = (types.IntType, types.LongType, types.FloatType, types.ComplexType)
def mk_value(s, currency_sign='', thousand_sep='', decimal_sep='.'):
if isinstance(s, bool): # make boolean into a 0/1 value
if s:
result = 1
else:
result = 0
elif isinstance(s, NumberTypes): # keep numbers as/is
result = s
else: # convert a string
# prepare the string for conversion
if currency_sign:
s = s.replace(currency_sign, '')
if thousand_sep:
s = s.replace(thousand_sep, '')
if decimal_sep != '.':
s = s.replace(decimal_sep, '.')
s = s.strip()
# convert the string
if not s: # if the string is empty, it's not a number
result = None
else:
try: # try int
result = int(s)
except ValueError:
try: # if there's an error, try float
result = float(s)
except ValueError:
# if the conversion gave an error, the string is not a number
result = None
return result
d.pyx
import types
NumberTypes = (types.IntType, types.LongType, types.FloatType, types.ComplexType)
def mk_value(s, currency_sign='', thousand_sep='', decimal_sep='.'):
if isinstance(s, bool): # make boolean into a 0/1 value
if s:
result = 1
else:
result = 0
elif isinstance(s, NumberTypes): # keep numbers as/is
result = s
elif s:
if currency_sign:
s = s.replace(currency_sign, '')
result = _mk_value(s, currency_sign, thousand_sep, decimal_sep)
else:
result = None
return result
cdef object _mk_value(char *s, char *currency_sign, char *thousand_sep, char *decimal_sep):
cdef int i=0, j=0
result = None
while s[i]:
if s[i] == decimal_sep[0]:
s[j] = '.'
j += 1
elif s[i] == thousand_sep[0]:
pass
elif s[i] == ' ':
pass
else:
s[j] = s[i]
j += 1
i += 1
# convert the string
if not s: # if the string is empty, it's not a number
result = None
else:
try: # try int
result = int(s)
except ValueError:
try: # if there's an error, try float
result = float(s)
except ValueError:
# if the conversion gave an error, the string is not a number
pass
return result
a.mk_value(1234) 27ms 27526μs
a.mk_value(1234.56) 42ms 42097μs
a.mk_value('1234') 502ms 502109μs
a.mk_value('1234.56') 520ms 520395μs
a.mk_value('1,234.56', thousand_sep=',') 570ms 570749μs
a.mk_value('1.234.567,89 EUR', currency... 627ms 627456μs
b.mk_value(1234) 27ms 27082μs
b.mk_value(1234.56) 40ms 40014μs
b.mk_value('1234') 94ms 94444μs
b.mk_value('1234.56') 276ms 276519μs
b.mk_value('1,234.56', thousand_sep=',') 315ms 315310μs
b.mk_value('1.234.567,89 EUR', currency... 374ms 374861μs
c.mk_value(1234) 11ms 11482μs
c.mk_value(1234.56) 22ms 22765μs
c.mk_value('1234') 69ms 69251μs
c.mk_value('1234.56') 176ms 176908μs
c.mk_value('1,234.56', thousand_sep=',') 226ms 226709μs
c.mk_value('1.234.567,89 EUR', currency... 285ms 285431μs
d.mk_value(1234) 11ms 11483μs
d.mk_value(1234.56) 22ms 22355μs
d.mk_value('1234') 69ms 69151μs
d.mk_value('1234.56') 169ms 169364μs
d.mk_value('1,234.56', thousand_sep=',') 187ms 187460μs
d.mk_value('1.234.567,89 EUR', currency... 233ms 233935μs
答案 1 :(得分:2)
我会用早期逻辑编写它并引发异常以指示失败:
import types
NumberTypes = (types.IntType, types.LongType, types.FloatType, types.ComplexType)
def mk_value(s, currency_sign='', thousand_sep='', decimal_sep='.'):
if isinstance(s, NumberTypes):
# Already in the desired form.
return s
if isinstance(s, str):
# Prepare the string for conversion.
if currency_sign:
s = s.replace(currency_sign, '')
if thousand_sep:
s = s.replace(thousand_sep, '')
if decimal_sep != '.':
s = s.replace(decimal_sep, '.')
# stripping the string isn't necessary either...
# Convert the string.
# The empty string case is already handled;
# "special cases aren't special enough".
# This also handles bools naturally; might be slower,
# but bools oughtn't be the common case anyway.
try:
return int(s)
except ValueError:
return float(s)
# If that didn't work either, let the exception propagate.