我有这样的数据:
emailaddress customer_acquisation_date customer_order_date payment_amount
xy@gmail.com 01/05/2013 6:24 AM 01/05/2013 5:10 AM $ 20.67
xy@gmail.com 01/05/2013 6:24 AM 02/07/2013 7:21 PM $ 25.56
xy@gmail.com 01/05/2013 6:24 AM 07/10/2013 8:00 AM $100.00
xy@gmail.com 01/05/2013 6:24 AM 08/12/2013 9:35 AM $30.00
我想通过电子邮件地址汇总(付款金额),我希望最终输出为:
emailaddress customer_acquisation_date customer_order_date payment_amount
xy@gmail.com 01/05/2013 6:24 AM 01/05/2013 $ 177
02/07/2013
07/10/2013
08/12/2013
我写的代码
z <- aggregate(x$emailaddress~x$paymentamount,data=x,FUN=sum)
我收到错误
Error in Summary.factor(c(211594L, 291939L, 79240L, 208971L, 369325L, :
‘sum’ not meaningful for factors
这样做的正确方法是什么。任何帮助表示赞赏
答案 0 :(得分:6)
聚合函数首先获取聚合的值,然后是分组参数。如上所述,您还需要删除美元符号才能将列转换为数字格式。
# Remove the dollar sign
x$payment_amount = as.numeric( gsub('[$]', '', x$payment_amount ))
# Write it in the right order .. aggregate(x, by, FUN .. )
z <- aggregate( payment_amount ~ emailaddress, data = x, FUN = sum )
编辑:添加data.table解决方案,同时保留其他列。
library(data.table)
setDT(x) # Convert the data.frame to data.table
z = x[, payment_total := sum(payment_amount), by = emailaddress]
setDF(z) # Convert the result to data.frame
答案 1 :(得分:2)
以下不是删除$
符号,而是直接提取数值。这样做的好处是不需要指定实际删除的内容(例如,不同的货币符号,或“10.00美元”)。我还添加了merge
来获得OP的预期输出(不是真的,但更好的恕我直言):
library(magrittr)
library(dplyr)
x$payment_amount %<>% {regmatches(., gregexpr("[[:digit:]]+[.][[:digit:]]+", .))} %>%
as.numeric()
aggre = aggregate(payment_amount ~ emailaddress, data = df, FUN = sum)
select(x, -payment_amount) %>%
merge(aggre, by = "emailaddress") %>%
rename(tot_payment_amount = payment_amount)
结果:
emailaddress customer_acquisation_date customer_order_date tot_payment_amount
1 xy@gmail.com 01/05/2013 6:24 AM 01/05/2013 5:10 AM 176.23
2 xy@gmail.com 01/05/2013 6:24 AM 02/07/2013 7:21 PM 176.23
3 xy@gmail.com 01/05/2013 6:24 AM 07/10/2013 8:00 AM 176.23
4 xy@gmail.com 01/05/2013 6:24 AM 08/12/2013 9:35 AM 176.23
注意:
我使用magrittr
包主要是为了它方便的双向管道操作符%<>%
。这将LHS(x$payment_amount
)提供给RHS上的.
,并将LHS设置为等于RHS的输出。 {}
周围regmatches
需要将x$payment_amount
提供给函数内的函数(即text=
gregexpr
regmatches
内的%<>%
参数。对于那些发现%<>%
容易混淆的人,以下是如何在没有x$payment_amount =
with(x, regmatches(payment_amount, gregexpr("[[:digit:]]+[.][[:digit:]]+", payment_amount))) %>%
as.numeric()
的情况下做同样的事情:
package com.ticketapp.emailaddresss.ticketapp;
import android.content.Intent;
import android.net.Uri;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;
import com.android.volley.Request;
import com.android.volley.RequestQueue;
import com.android.volley.Response;
import com.android.volley.VolleyError;
import com.android.volley.toolbox.StringRequest;
import com.facebook.AccessToken;
import com.facebook.GraphRequest;
import com.facebook.GraphResponse;
import com.facebook.HttpMethod;
import com.facebook.login.LoginManager;
import com.facebook.share.model.ShareHashtag;
import com.facebook.share.model.ShareLinkContent;
import com.facebook.share.widget.ShareDialog;
import java.util.ArrayList;
import java.util.List;
public class MainActivity extends AppCompatActivity {
Button button;
TextView textView;
String server_url = "https://www.idg.se/";
RequestQueue requestQueue;
private ShareDialog shareDialog;
private String name, surname, imageUrl;
private String TAG = "MainActivity";
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
button = (Button) findViewById(R.id.bm);
textView = (TextView) findViewById(R.id.txt);
Bundle inBundle = getIntent().getExtras();
name = inBundle.getString("name");
surname = inBundle.getString("surname");
imageUrl = inBundle.getString("imageUrl");
TextView nameView = (TextView) findViewById(R.id.nameAndSurname);
nameView.setText("" + name + " " + surname);
button.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
StringRequest stringRequest = new StringRequest(Request.Method.POST, server_url, new Response.Listener<String>() {
@Override
public void onResponse(String response) {
textView.setText(response);
//requestQueue.stop();
}
}, new Response.ErrorListener() {
@Override
public void onErrorResponse(VolleyError error) {
textView.setText("Something went wrong");
error.printStackTrace();
//requestQueue.stop();
}
});
//requestQueue.add(stringRequest);
SingletonRequestQueue.getmInstance(getApplicationContext()).addRequestQueue(stringRequest);
}
});
}
private void share() {
shareDialog = new ShareDialog(this);
List<String> taggedUserIds = new ArrayList<String>();
taggedUserIds.add("{USER_ID}");
taggedUserIds.add("{USER_ID}");
taggedUserIds.add("{USER_ID}");
ShareLinkContent content = new ShareLinkContent.Builder()
.setContentUrl(Uri.parse("http://www.sitepoint.com"))
.setContentTitle("This is a content title")
.setContentDescription("This is a description")
.setShareHashtag(new ShareHashtag.Builder().setHashtag("#sitepoint").build())
.setPeopleIds(taggedUserIds)
.setPlaceId("{PLACE_ID}")
.build();
shareDialog.show(content);
}
private void getPosts() {
new GraphRequest(AccessToken.getCurrentAccessToken(), "/me/posts", null, HttpMethod.GET, new GraphRequest.Callback() {
public void onCompleted(GraphResponse response) {
Log.e(TAG, response.toString());
}
}).executeAsync();
}
private void logout() {
LoginManager.getInstance().logOut();
Intent login = new Intent(MainActivity.this, LoginActivity.class);
startActivity(login);
finish();
}
//@Override
public void onClick(View view) {
switch (view.getId()) {
case R.id.share:
share();
break;
case R.id.getPosts:
getPosts();
break;
case R.id.logout:
logout();
break;
}
}
}
答案 2 :(得分:0)
我们无法一起添加角色或因素。我们需要将因子转换为字符,删除$
,然后转换为数字。
library(dplyr)
library(stringr)
x2 <- x %>%
mutate(payment_amount = as.character(payment_amount)) %>%
mutate(payment_amount = str_replace(payment_amount, fixed("$"), "")) %>%
mutate(payment_amount = as.numeric(payment_amount)) %>%
group_by(emailaddress) %>%
summarise(payment_amount = sum(payment_amount))
x2
# A tibble: 1 x 2
emailaddress payment_amount
<fctr> <dbl>
1 xy@gmail.com 176.23
数据强>
x <- read.table(text = "emailaddress customer_acquisation_date customer_order_date payment_amount
xy@gmail.com '01/05/2013 6:24 AM' '01/05/2013 5:10 AM' '$ 20.67'
xy@gmail.com '01/05/2013 6:24 AM' '02/07/2013 7:21 PM' '$ 25.56'
xy@gmail.com '01/05/2013 6:24 AM' '07/10/2013 8:00 AM' '$100.00'
xy@gmail.com '01/05/2013 6:24 AM' '08/12/2013 9:35 AM' '$30.00'",
header = TRUE)
答案 3 :(得分:0)
我建议使用readr
,dplyr
和lubridate
:
library(tidyverse)
library(lubridate)
data_string <- trimws('
email , datetime , payment
xy@gmail.com , 01/05/2013 5:10 AM , $20.67
xy@gmail.com , 02/07/2013 7:21 PM , $25.56
xy@gmail.com , 07/10/2013 8:00 AM , $100.00
xy@gmail.com , 08/12/2013 9:35 AM , $30.00
')
orders <- read_csv(data_string, col_types = cols(
email = col_character(),
datetime = col_datetime(format = "%m/%d/%Y %I:%M %p"),
payment = col_number()
))
orders
## # A tibble: 4 x 3
## email datetime payment
## <chr> <dttm> <dbl>
## 1 xy@gmail.com 2013-01-05 05:10:00 20.67
## 2 xy@gmail.com 2013-02-07 19:21:00 25.56
## 3 xy@gmail.com 2013-07-10 08:00:00 100.00
## 4 xy@gmail.com 2013-08-12 09:35:00 30.00
customers <- orders %>%
group_by(email) %>%
summarise(
total_payment = sum(payment),
acquisition_date = min(datetime),
order_dates = list(date(datetime))
)
customers
## # A tibble: 1 x 4
## email total_payment acquisition_date order_dates
## <chr> <dbl> <dttm> <list>
## 1 xy@gmail.com 176.23 2013-01-05 05:10:00 <date [4]>
customers$order_dates
## [[1]]
## [1] "2013-01-05" "2013-02-07" "2013-07-10" "2013-08-12"