具有以下 import mysql.connector
dbname = input('Please enter the name of database : ')
db = mysql.connector.connect(
host = 'localhost',
user = 'root',
password = '',
)
cursor = db.cursor()
cursor.execute("CREATE DATABASE IF NOT EXISTS %s" %dbname)
cursor.close()
db.close()
import mysql.connector
db = mysql.connector.connect(
host = 'localhost',
user = 'root',
password = '',
database=dbname,
)
cursor = db.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS %s (email VARCHAR(30),pwd
VARCHAR(20))" %dbname)
cursor.close()
db.close()
pd.DataFrame
我想对行中的连续零进行计数
pd.DataFrame({'2010':[0, 45, 5], '2011': [12, 56, 0], '2012': [11, 22, 0], '2013': [0, 5, 0], '2014': [0, 0, 0]})
2010 2011 2012 2013 2014
1 0 12 11 0 0
2 45 56 22 5 0
3 5 0 0 0 0
寻找不同的有效方式
答案 0 :(得分:2)
为了提高效率,我建议您采用纯NumPy方式-
<dependentAssembly>
<assemblyIdentity name="System.Web.Http" publicKeyToken="31bf3856ad364e35" xmlns="urn:schemas-microsoft-com:asm.v1" />
<bindingRedirect oldVersion="0.0.0.0-5.2.3.0" newVersion="5.2.3.0" xmlns="urn:schemas-microsoft-com:asm.v1" />
</dependentAssembly>
样品运行-
def islandlen_perrow(df, trigger_val=0):
a=df.values==trigger_val
pad = np.zeros((a.shape[0],1),dtype=bool)
mask = np.hstack((pad, a, pad))
mask_step = mask[:,1:] != mask[:,:-1]
idx = np.flatnonzero(mask_step)
island_lens = idx[1::2] - idx[::2]
n_islands_perrow = mask_step.sum(1)//2
out = np.split(island_lens,n_islands_perrow[:-1].cumsum())
return out
大型数组上的计时-
In [69]: df
Out[69]:
2010 2011 2012 2013 2014
0 0 12 11 0 0
1 45 56 22 5 0
2 5 0 0 0 0
In [70]: islandlen_perrow(df, trigger_val=0)
Out[70]: [array([1, 2], dtype=int64), array([1], dtype=int64), array([4], dtype=int64)]
In [76]: pd.Series(islandlen_perrow(df, trigger_val=0))
Out[76]:
0 [1, 2]
1 [1]
2 [4]
dtype: object
答案 1 :(得分:1)
您可以使用itertools.groupby:
import pandas as pd
from itertools import groupby
def count_zeros(x):
return [sum(1 for _ in group) for key, group in groupby(x, key=lambda i: i == 0) if key]
df = pd.DataFrame({'2010':[0, 45, 5], '2011': [12, 56, 0], '2012': [11, 22, 0], '2013': [0, 5, 0], '2014': [0, 0, 0]})
result = df.apply(count_zeros, axis=1)
print(result)
输出
0 [1, 2]
1 [1]
2 [4]
dtype: object
答案 2 :(得分:1)
将itertools.groupby
与列表理解结合使用:
from itertools import groupby
df['counts'] = [[len(list(grp)) for flag, grp in groupby(row, key=bool) if not flag] \
for row in df.values]
print(df)
2010 2011 2012 2013 2014 counts
0 0 12 11 0 0 [1, 2]
1 45 56 22 5 0 [1]
2 5 0 0 0 0 [4]
答案 3 :(得分:1)
如果您对纯熊猫/ numpy解决方案感兴趣...可以使用groupby
和value_counts
:
v = df.stack()
m = v.eq(0)
(m.ne(m.shift())
.cumsum()
.where(m)
.dropna()
.groupby(level=0)
.apply(lambda x: x.value_counts(sort=False).tolist()))
0 [1, 2]
1 [1]
2 [4]
dtype: object
或者,避免使用lambda
,
(m.ne(m.shift())
.cumsum()
.where(m)
.dropna()
.groupby(level=0)
.value_counts(sort=False)
.groupby(level=0)
.apply(list))
0 [1, 2]
1 [1]
2 [4]
dtype: object
答案 4 :(得分:0)
一种方法是将值转换为布尔值,然后用1 [1, 2]
2 [1]
3 [4]
值分割字符串
False