使用以下数据集:
collect()
我想计算每个@pyimport scipy.signal as ss
x=[1,2,3,4,5,6,7,8,9,10]
savitzkyGolay(x,5,1)
10-element Array{Float64,1}:
1.6000000000000003
2.200000000000001
3.0
4.0
5.000000000000001
6.000000000000001
7.0
8.0
8.8
9.400000000000002
#Python's scipy implementation
ss.savgol_filter(x,5,1)
10-element Array{Float64,1}:
1.0000000000000007
2.0000000000000004
2.9999999999999996
3.999999999999999
4.999999999999999
5.999999999999999
6.999999999999998
7.999999999999998
8.999999999999996
9.999999999999995
的国家( import pandas as pd
import pymysql
import os
from FunctionsOilegal import connection, reportSittel, reportSittelByStatus
def callReportAll(Status):
cursor = ''
cursor = reportSittelByStatus(conn,Status)
columns = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
dfAux = pd.DataFrame(list(data), columns=columns)
return dfAux
def styleSheets(nome,defaux):
header_format = workbook.add_format({
'bold': True,
'fg_color': '#9ABACC',
'border': 1})
worksheet = writer.sheets[nome]
for col_num, value in enumerate(defaux.columns.values):
worksheet.write(0, col_num , value, header_format)
conn= connection()
cursor = reportSittel(conn)
columns = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
df = pd.DataFrame(list(data), columns=columns)
df1 = callReportAll("Error")
df2 = callReportAll('Running')
df3 = callReportAll('Processed')
df4 = callReportAll('Protocoled')
df5 = callReportAll('Received')
df6 = callReportAll('Refused')
conn.close()
writer = pd.ExcelWriter('pandas_multiple.xlsx', engine='xlsxwriter' )
df.to_excel(writer, sheet_name='BASE', index=False)
df1.to_excel(writer, sheet_name='Error', index=False)
df2.to_excel(writer, sheet_name='Running', index=False)
df3.to_excel(writer, sheet_name='Processed', index=False)
df4.to_excel(writer, sheet_name='Protocoled',index=False)
df5.to_excel(writer, sheet_name='Received', index=False)
df6.to_excel(writer, sheet_name='Refused', index=False)
workbook = writer.book
styleSheets('BASE',df)
styleSheets('Error',df1)
styleSheets('Running',df2)
styleSheets('Processed',df3)
styleSheets('Protocoled',df4)
styleSheets('Received',df5)
styleSheets('Refused',df6)
writer.save()
)中有set.seed(2)
origin <- rep(c("DEU", "GBR", "ITA", "NLD", "CAN", "MEX", "USA", "CHN", "JPN", "KOR","DEU", "GBR", "ITA", "NLD", "CAN", "MEX", "USA", "CHN", "JPN", "KOR"), 4)
year <- rep(c(rep(1998, 10), rep(2000, 10)), 2)
type <- sample(1:10, size=length(origin), replace=TRUE)
value <- sample(100:10000, size=length(origin), replace=TRUE)
test.df <- as.data.frame(cbind(origin, year, type, value))
rm(origin, year, type, value)
### add some (6) missing values
test.df$value[sample(1:length(test.df$value), 6, replace = FALSE)] <- NA
个
我尝试过:
type
和
origin
但是我不确定如何解释这些结果。
当然,如果year
,R不应将其计算在内...
答案 0 :(得分:2)
要删除value
为NA
的行,请使用filter
:
test.df %>% group_by(origin,year) %>%
filter(!is.na(value)) %>% count()
# A tibble: 20 x 3
# Groups: origin, year [20]
origin year n
<fct> <fct> <int>
1 CAN 1998 4
2 CAN 2000 3
3 CHN 1998 3
4 CHN 2000 4
5 DEU 1998 4
6 DEU 2000 4
7 GBR 1998 4
8 GBR 2000 4
9 ITA 1998 3
10 ITA 2000 4
11 JPN 1998 3
12 JPN 2000 3
13 KOR 1998 4
14 KOR 2000 4
15 MEX 1998 4
16 MEX 2000 4
17 NLD 1998 3
18 NLD 2000 4
19 USA 1998 4
20 USA 2000 4
但是请注意,这并不计算每个组中有多少type
,而是有多少行。如果要计算唯一的type
的数量,可以执行以下操作:
test.df %>% group_by(origin,year) %>%
filter(!is.na(value)) %>%
summarize(n_distinct(type)) #Merci, @Frank!
# A tibble: 20 x 3
# Groups: origin [?]
origin year `length(unique(type))`
<fct> <fct> <int>
1 CAN 1998 3
2 CAN 2000 3
3 CHN 1998 2
4 CHN 2000 3
5 DEU 1998 4
6 DEU 2000 3
7 GBR 1998 4
8 GBR 2000 4
9 ITA 1998 3
10 ITA 2000 4
11 JPN 1998 3
12 JPN 2000 2
13 KOR 1998 4
14 KOR 2000 4
15 MEX 1998 3
16 MEX 2000 3
17 NLD 1998 2
18 NLD 2000 3
19 USA 1998 3
20 USA 2000 4