我有一个数据框(df),其中包含[[' toaddress',' ccaddress',' body']
我想迭代数据框的索引,以获取toaddress和ccaddress字段中的最小,最大和平均电子邮件地址数量,这是通过计算和' @'的实例来确定的。在这两列中的每个字段中
如果所有其他方法都失败了,我想我可以使用df.toaddress.str.contains(r' @')。sum()并将其除以数据框中的行数来获取平均值,但我认为它只计算至少有1个@符号的行。
答案 0 :(得分:1)
您可以使用
df[['toaddress', 'ccaddress']].applymap(lambda x: str.count(x, '@'))
获取每个单元格中'@'
的计数。
然后,您只需沿结果中的行轴计算pandas max
,min
和mean
。
正如我对原始问题发表评论时,您已经建议使用df.toaddress.str.contains(r'@').sum()
- 如果您不满意我是否愿意使用df.toaddress.str.count(r'@')
而不是我上面展示的方法?< / p>
答案 1 :(得分:0)
len(filter(lambda df: df.toaddress.str.contains(r'@'),rows))
甚至
len(filter(lambda df: r'@' in str(df.toaddress), rows))
答案 2 :(得分:0)
也许是这样的
Object.assign()
返回:
#include <iostream>
#include <fstream>
#include <string>
#include <iomanip>
#include <cmath>
#include <cstdlib>
using namespace std;
class payroll {
ifstream fin;
char employeeid[12];
char firstname[20];
char lastname[20];
char SMH;
int SSN, hoursworked, overtimehours;
double hourlyrate, regularpay, overtimepay, grosspay, taxrate, taxamount, netpay, sum, average;
public:
payroll(){
fin.open("employee.txt");
}//CONSTRUCTOR
~payroll();
private:
void calcgrosspay() {
grosspay = regularpay + overtimepay;
if (hoursworked > 40) {
overtimehours = hoursworked - 40;
overtimepay = overtimehours * hourlyrate * 1.5;
regularpay = 40 * hourlyrate;
}//if
else {
overtimehours = 0;
overtimepay = 0;
regularpay = hoursworked * hourlyrate;
}//else
}//for
void calctax() {
if (grosspay >= 500) taxrate = .30;
else if (grosspay>200.00) taxrate = .20;
else taxrate = .10;
if (SMH == 'S' || SMH == 's')
taxrate = taxrate + .05;
taxamount = grosspay*taxrate;
}//for end of grosspay and set taxrate FOR
void calcNetpay() {
netpay = grosspay - taxamount;
}//end of calcnetpay function
void printheadings() {
cout << setw(49) << "-Payroll Report-" << endl;
cout << "------------------------------------------------------------------------------" << endl;
cout << "ID First Name Last Name Stat SSN HW HR OT OP GP Tax Net" << endl;
cout << "==============================================================================" << endl;
cout << "------------------------------------------------------------------------------" << endl;
}//printheadings
void printdata() {
setprecision(2);
cout << setw(14) << employeeid
<< setw(16) << firstname
<< setw(15) << lastname
<< setw(6) << SMH
<< setw(5) << SSN
<< setw(6) << hoursworked
<< setw(6) << hourlyrate
<< setw(8) << grosspay
<< setw(6) << taxrate
<< setw(9) << regularpay
<< setw(6) << overtimehours
<< setw(6) << overtimepay
<< setw(9) << netpay << endl;
}//print data
void payroll::findsum(int i) {
sum += netpay;
}
double payroll::findavg(double, int i) {
average = sum / i;
cout << endl << "The netpay average is " << average << endl;
return average;
}
public:
void printreport() {
int i = 0;
printheadings();
while (fin >> employeeid >> SMH >> SSN >> hoursworked >> hourlyrate >> firstname >> lastname)
{
calcgrosspay();
calctax();
calcNetpay();
printheadings();
printdata();
i++;
findsum(i);
}//while
findavg(sum, i);
}//print data report
}; // end of payroll class
payroll::~payroll() {
fin.close();
}//DESTRUCTOR
int main() {
payroll employee;
employee.printreport();
system("PAUSE");
}//main
答案 3 :(得分:0)
此答案使用https://pypi.python.org/pypi/fake-factory生成测试数据
import pandas as pd
from random import randint
from faker import Factory
fake = Factory.create()
def emails():
emailAdd = [fake.email()]
for x in range(randint(0,3)):
emailAdd.append(fake.email())
return emailAdd
df1 = pd.DataFrame(columns=['toaddress', 'ccaddress', 'body'])
for extra in range(10):
df1 = df1.append(pd.DataFrame({'toaddress':[emails()],'ccaddress':[emails()],'body':fake.text()}),ignore_index=True)
print('toaddress length is {}'.format([len(x) for x in df1.toaddress.values]))
print('ccaddress length is {}'.format([len(x) for x in df1.ccaddress.values]))
最后两行是计算电子邮件的部分。 我不确定你是否想要特别检查'@',也许你可以使用假工厂生成一些测试数据作为例子?