Question

我有这样的数据：

emailaddress    customer_acquisation_date  customer_order_date  payment_amount
xy@gmail.com     01/05/2013 6:24 AM         01/05/2013 5:10 AM           $ 20.67
xy@gmail.com     01/05/2013 6:24 AM         02/07/2013 7:21 PM           $ 25.56
xy@gmail.com     01/05/2013 6:24 AM         07/10/2013 8:00 AM           $100.00
xy@gmail.com     01/05/2013 6:24 AM         08/12/2013 9:35 AM           $30.00

我想通过电子邮件地址汇总（付款金额），我希望最终输出为：

emailaddress    customer_acquisation_date  customer_order_date  payment_amount
xy@gmail.com     01/05/2013 6:24 AM         01/05/2013            $ 177
                                            02/07/2013                 
                                            07/10/2013                 
                                            08/12/2013

我写的代码

z <- aggregate(x$emailaddress~x$paymentamount,data=x,FUN=sum)

我收到错误

Error in Summary.factor(c(211594L, 291939L, 79240L, 208971L, 369325L,  : 
  ‘sum’ not meaningful for factors

这样做的正确方法是什么。任何帮助表示赞赏

Answer 1

聚合函数首先获取聚合的值，然后是分组参数。如上所述，您还需要删除美元符号才能将列转换为数字格式。

# Remove the dollar sign
x$payment_amount = as.numeric( gsub('[$]', '', x$payment_amount ))

# Write it in the right order .. aggregate(x, by, FUN .. ) 
z <- aggregate( payment_amount ~ emailaddress, data = x, FUN = sum )

编辑：添加data.table解决方案，同时保留其他列。

 library(data.table)  
 setDT(x) # Convert the data.frame to data.table
 z = x[, payment_total := sum(payment_amount), by = emailaddress]
 setDF(z) # Convert the result to data.frame

Answer 2

以下不是删除$符号，而是直接提取数值。这样做的好处是不需要指定实际删除的内容（例如，不同的货币符号，或“10.00美元”）。我还添加了merge来获得OP的预期输出（不是真的，但更好的恕我直言）：

library(magrittr)
library(dplyr)

x$payment_amount %<>% {regmatches(., gregexpr("[[:digit:]]+[.][[:digit:]]+", .))} %>%
  as.numeric() 

aggre = aggregate(payment_amount ~ emailaddress, data = df, FUN = sum)

select(x, -payment_amount) %>%
  merge(aggre, by = "emailaddress") %>%
  rename(tot_payment_amount = payment_amount)

结果：

  emailaddress customer_acquisation_date customer_order_date tot_payment_amount
1 xy@gmail.com        01/05/2013 6:24 AM  01/05/2013 5:10 AM             176.23
2 xy@gmail.com        01/05/2013 6:24 AM  02/07/2013 7:21 PM             176.23
3 xy@gmail.com        01/05/2013 6:24 AM  07/10/2013 8:00 AM             176.23
4 xy@gmail.com        01/05/2013 6:24 AM  08/12/2013 9:35 AM             176.23

注意：

我使用magrittr包主要是为了它方便的双向管道操作符%<>%。这将LHS（x$payment_amount）提供给RHS上的.，并将LHS设置为等于RHS的输出。 {}周围regmatches需要将x$payment_amount提供给函数内的函数（即text= gregexpr regmatches内的%<>%参数。对于那些发现%<>%容易混淆的人，以下是如何在没有x$payment_amount = with(x, regmatches(payment_amount, gregexpr("[[:digit:]]+[.][[:digit:]]+", payment_amount))) %>% as.numeric()的情况下做同样的事情：

package com.ticketapp.emailaddresss.ticketapp;

import android.content.Intent;
import android.net.Uri;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;

import com.android.volley.Request;
import com.android.volley.RequestQueue;
import com.android.volley.Response;
import com.android.volley.VolleyError;
import com.android.volley.toolbox.StringRequest;
import com.facebook.AccessToken;

import com.facebook.GraphRequest;
import com.facebook.GraphResponse;
import com.facebook.HttpMethod;
import com.facebook.login.LoginManager;
import com.facebook.share.model.ShareHashtag;
import com.facebook.share.model.ShareLinkContent;
import com.facebook.share.widget.ShareDialog;

import java.util.ArrayList;
import java.util.List;

public class MainActivity extends AppCompatActivity {

    Button button;
    TextView textView;
    String server_url = "https://www.idg.se/";
    RequestQueue requestQueue;

    private ShareDialog shareDialog;
    private String name, surname, imageUrl;
    private String TAG = "MainActivity";

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        button = (Button) findViewById(R.id.bm);
        textView = (TextView) findViewById(R.id.txt);

        Bundle inBundle = getIntent().getExtras();
        name = inBundle.getString("name");
        surname = inBundle.getString("surname");
        imageUrl = inBundle.getString("imageUrl");

        TextView nameView = (TextView) findViewById(R.id.nameAndSurname);
        nameView.setText("" + name + " " + surname);
        button.setOnClickListener(new View.OnClickListener() {

            @Override
            public void onClick(View v) {
                StringRequest stringRequest = new StringRequest(Request.Method.POST, server_url, new Response.Listener<String>() {
                    @Override
                    public void onResponse(String response) {
                    textView.setText(response);
                    //requestQueue.stop();
                    }
                }, new Response.ErrorListener() {
                    @Override
                    public void onErrorResponse(VolleyError error) {
                        textView.setText("Something went wrong");
                        error.printStackTrace();
                        //requestQueue.stop();
                    }
                });
                //requestQueue.add(stringRequest);
                SingletonRequestQueue.getmInstance(getApplicationContext()).addRequestQueue(stringRequest);
            }
        });
    }

    private void share() {
        shareDialog = new ShareDialog(this);
        List<String> taggedUserIds = new ArrayList<String>();
        taggedUserIds.add("{USER_ID}");
        taggedUserIds.add("{USER_ID}");
        taggedUserIds.add("{USER_ID}");

        ShareLinkContent content = new ShareLinkContent.Builder()
                .setContentUrl(Uri.parse("http://www.sitepoint.com"))
                .setContentTitle("This is a content title")
                .setContentDescription("This is a description")
                .setShareHashtag(new ShareHashtag.Builder().setHashtag("#sitepoint").build())
                .setPeopleIds(taggedUserIds)
                .setPlaceId("{PLACE_ID}")
                .build();

        shareDialog.show(content);
    }

    private void getPosts() {
        new GraphRequest(AccessToken.getCurrentAccessToken(), "/me/posts", null, HttpMethod.GET, new GraphRequest.Callback() {
            public void onCompleted(GraphResponse response) {
                Log.e(TAG, response.toString());
            }
        }).executeAsync();
    }

    private void logout() {
        LoginManager.getInstance().logOut();
        Intent login = new Intent(MainActivity.this, LoginActivity.class);
        startActivity(login);
        finish();
    }

    //@Override
    public void onClick(View view) {
        switch (view.getId()) {
            case R.id.share:
                share();
                break;

            case R.id.getPosts:
                getPosts();
                break;

            case R.id.logout:
                logout();
                break;
        }
    }
}

Answer 3

我们无法一起添加角色或因素。我们需要将因子转换为字符，删除$，然后转换为数字。

library(dplyr)
library(stringr)

x2 <- x %>%
  mutate(payment_amount = as.character(payment_amount)) %>%
  mutate(payment_amount = str_replace(payment_amount, fixed("$"), "")) %>%
  mutate(payment_amount = as.numeric(payment_amount)) %>%
  group_by(emailaddress) %>%
  summarise(payment_amount = sum(payment_amount))

x2
# A tibble: 1 x 2
  emailaddress payment_amount
        <fctr>          <dbl>
1 xy@gmail.com         176.23

数据

x <- read.table(text = "emailaddress customer_acquisation_date customer_order_date payment_amount xy@gmail.com '01/05/2013 6:24 AM' '01/05/2013 5:10 AM' '$ 20.67' xy@gmail.com '01/05/2013 6:24 AM' '02/07/2013 7:21 PM' '$ 25.56' xy@gmail.com '01/05/2013 6:24 AM' '07/10/2013 8:00 AM' '$100.00' xy@gmail.com '01/05/2013 6:24 AM' '08/12/2013 9:35 AM' '$30.00'", header = TRUE)

Answer 4

我建议使用readr，dplyr和lubridate：

library(tidyverse)
library(lubridate)

data_string <- trimws('
email        , datetime           , payment
xy@gmail.com , 01/05/2013 5:10 AM , $20.67
xy@gmail.com , 02/07/2013 7:21 PM , $25.56
xy@gmail.com , 07/10/2013 8:00 AM , $100.00
xy@gmail.com , 08/12/2013 9:35 AM , $30.00
')

orders <- read_csv(data_string, col_types = cols(
    email = col_character(),
    datetime = col_datetime(format = "%m/%d/%Y %I:%M %p"),
    payment = col_number()
))
orders

## # A tibble: 4 x 3
##          email            datetime payment
##          <chr>              <dttm>   <dbl>
## 1 xy@gmail.com 2013-01-05 05:10:00   20.67
## 2 xy@gmail.com 2013-02-07 19:21:00   25.56
## 3 xy@gmail.com 2013-07-10 08:00:00  100.00
## 4 xy@gmail.com 2013-08-12 09:35:00   30.00

customers <- orders %>%
    group_by(email) %>%
    summarise(
        total_payment = sum(payment),
        acquisition_date = min(datetime),
        order_dates = list(date(datetime))
    )
customers

## # A tibble: 1 x 4
##          email total_payment    acquisition_date order_dates
##          <chr>         <dbl>              <dttm>      <list>
## 1 xy@gmail.com        176.23 2013-01-05 05:10:00  <date [4]>

customers$order_dates

## [[1]]
## [1] "2013-01-05" "2013-02-07" "2013-07-10" "2013-08-12"

按付款金额汇总

4 个答案: