从两个分组列中获取最大值

时间:2017-09-17 07:11:23

标签: r dplyr

我用dplyr计算了两列中的常见事件。

yelp_tbl %>% group_by(city, name) %>% count(city, name)

# A tibble: 126,968 x 3
# Groups:   city, name [126,968]
    city        name     n
   <chr>       <chr> <int>
 1   Toronto   Starbucks   132
 2 Las Vegas   Starbucks   129
 3 Las Vegas      Subway   107
 4 Las Vegas  McDonald's    79
 5   Phoenix   Starbucks    75

现在我想过滤第一列和第二列中最大值(n)的行。这意味着我只想保持名称/城市组合最大(n),以便每个城市只出现一次。

我试过了:

yelp_tbl %>% group_by(city, name) %>% count(city, name) %>% top_n(1,n)

但它不起作用。谢谢。

数据框:

> dput(yelp_tbl[ 1:20, ])
structure(list(business_id = c("0DI8Dt2PJp07XkVvIElIcQ", "LTlCaCGZE14GuaUXUGbamg", 
"EDqCEAGXVGCH4FJXgqtjqg", "cnGIivYRLxpF7tBVR_JwWA", "cdk-qqJ71q6P7TJTww_DSA", 
"Q9rsaUiQ-A3NdEAloy0aJA", "Cu4_Fheh7IrzGiK-Pc79ig", "GDnbt3isfhd57T1QqU6flg", 
"qwAHit4Tuj1zpO7CxVwOMA", "Nbr0kbtIrVlEcKIZoXWbSw", "MFneYHieJ_lnjMeFUGKVbw", 
"42romV8altAeuZuP2OC1gw", "iaunX_af5M5lfT2eEm9FMQ", "Tc24GX9-ZPr4_SHU0nJZZA", 
"6EvETd9FVPJfhT_6AW9iEw", "DSWsjtAfLYw9a4MTz0kKLw", "SbfEPi-iR4ntf3wRQfxYyw", 
"YCsLfBVdLFeN2Necw1HPSA", "O_4OTnw48ULP5uZh6MqiOQ", "CE0dABv9sfrXjDIJugpU8A"
), name = c("Innovative Vapors", "Cut and Taste", "Pizza Pizza", 
"Plush Salon and Spa", "Comfort Inn", "A Plus Nail", "Boomerang Baby", 
"Taco Bell", "CubeSmart Self Storage", "Sehne Backwaren", "Revv Illusions", 
"Ohana Hawaiian BBQ", "Encore Books and Records", "Canyon Ridge Endodontics", 
"Fossil Store", "Best Buy", "Woofs, Wiggles N Wags All Breed Rescue", 
"Stussy", "Holiday Inn Express & Suites Madison Central", "Graceful Cake Creations"
), neighborhood = c("", "", "Dufferin Grove", "", "Downtown Core", 
"", "", "", "", "", "", "", "Notre-Dame-de-Grâce", "", "", "", 
"", "Ossington Strip", "", ""), address = c("227 E Baseline Rd, Ste J2", 
"495 S Grand Central Pkwy", "979 Bloor Street W", "7014 Steubenville Pike", 
"321 Jarvis Street", "30 Gibson Drive, Suite122", "10875 N Frankloyd Wright Blvd", 
"11072 No Frank Lloyd Wright", "11000 North 115th Street", "Hauptstr. 1", 
"2553 Wigwam Pkwy", "1500 N Green Valley Pkwy, Ste 230", "5670 Sherbrooke Rue W", 
"2525 W Carefree Hwy", "6801 North Lake Mall Drive, 173", "2500 Winston Park Drive, Unit A", 
"", "1000 Queen Street W", "610 John Nolen Dr", "330 S Gilbert Rd, Ste 15"
), city = c("Tempe", "Las Vegas", "Toronto", "Oakdale", "Toronto", 
"Markham", "Scottsdale", "Scottsdale", "Scottsdale", "Stuttgart", 
"Henderson", "Henderson", "Montréal", "Phoenix", "Charlotte", 
"Oakville", "Scottsdale", "Toronto", "Madison", "Mesa"), state = c("AZ", 
"NV", "ON", "PA", "ON", "ON", "AZ", "AZ", "AZ", "BW", "NV", "NV", 
"QC", "AZ", "NC", "ON", "AZ", "ON", "WI", "AZ"), postal_code = c("85283", 
"89106", "M6H 1L5", "15071", "M5B 2C2", "L3R 2S3", "85259", "85259", 
"85259", "70563", "89074", "89074", "H4A 1W7", "85085", "28216", 
"L6H 7E5", "", "M6J 1H1", "53713", "85204"), latitude = c(33.3782141, 
36.1922841, 43.661054, 40.4445439533, 43.6598286, 43.82764, 33.5852707, 
33.58671, 33.5864403, 48.7299, 36.0345795, 36.029596485, 45.4707675, 
33.7967745, 35.3521284, 43.5162547833, 33.6031328399, 43.644258153, 
43.048623, 33.4091719), longitude = c(-111.936102, -115.1592718, 
-79.429089, -80.1745398943, -79.3754006, -79.342722, -111.8349538, 
-111.83541, -111.8325788, 9.11242, -115.0977701, -115.085821152, 
-73.6150299, -112.1154511, -80.8512352, -79.6859058738, -111.971311071, 
-79.4188293813, -89.376341, -111.7884179), stars = c(4.5, 5, 
2.5, 4, 3, 2.5, 3.5, 2.5, 4.5, 3.5, 5, 4, 5, 4, 3.5, 3, 4.5, 
2, 4.5, 5), review_count = c(17L, 9L, 7L, 4L, 8L, 3L, 8L, 9L, 
11L, 3L, 7L, 38L, 4L, 4L, 3L, 3L, 17L, 5L, 14L, 7L), is_open = c(0L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 
0L, 1L, 1L), attributes = list(c("BikeParking: True", "BusinessAcceptsBitcoin: False", 
"BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}", 
"DogsAllowed: False", "RestaurantsPriceRange2: 2", "WheelchairAccessible: True"
), c("BusinessAcceptsBitcoin: False", "BusinessAcceptsCreditCards: True"
), c("Alcohol: none", "Ambience: {'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': False}", 
"BikeParking: True", "BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}", 
"Caters: True", "GoodForKids: True", "GoodForMeal: {'dessert': False, 'latenight': False, 'lunch': False, 'dinner': False, 'breakfast': False, 'brunch': False}", 
"HasTV: True", "NoiseLevel: quiet", "OutdoorSeating: False", 
"RestaurantsAttire: casual", "RestaurantsDelivery: True", "RestaurantsGoodForGroups: True", 
"RestaurantsPriceRange2: 1", "RestaurantsReservations: False", 
"RestaurantsTableService: False", "RestaurantsTakeOut: True", 
"WiFi: free"), c("AcceptsInsurance: False", "BusinessAcceptsCreditCards: True", 
"BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}", 
"ByAppointmentOnly: True", "HairSpecializesIn: {'coloring': False, 'africanamerican': False, 'curly': False, 'perms': False, 'kids': False, 'extensions': False, 'asian': False, 'straightperms': False}", 
"RestaurantsPriceRange2: 3"), c("BusinessAcceptsCreditCards: True", 
"RestaurantsPriceRange2: 2", "WiFi: free"), c("BusinessAcceptsCreditCards: True", 
"ByAppointmentOnly: True", "RestaurantsPriceRange2: 3"), c("BikeParking: True", 
"BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}", 
"RestaurantsPriceRange2: 2", "WheelchairAccessible: True"), c("Alcohol: none", 
"Ambience: {'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'divey': False, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': False}", 
"BikeParking: True", "BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}", 
"DriveThru: True", "GoodForKids: True", "GoodForMeal: {'dessert': False, 'latenight': False, 'lunch': False, 'dinner': False, 'breakfast': False, 'brunch': False}", 
"HasTV: False", "NoiseLevel: quiet", "OutdoorSeating: False", 
"RestaurantsAttire: casual", "RestaurantsDelivery: False", "RestaurantsGoodForGroups: True", 
"RestaurantsPriceRange2: 1", "RestaurantsReservations: False", 
"RestaurantsTableService: False", "RestaurantsTakeOut: True", 
"WiFi: no"), "BusinessAcceptsCreditCards: True", c("BikeParking: True", 
"BusinessAcceptsCreditCards: False", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}", 
"RestaurantsDelivery: False", "RestaurantsPriceRange2: 3", "RestaurantsTakeOut: True"
), NULL, c("Alcohol: none", "Ambience: {'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'divey': False, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': True}", 
"BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}", 
"Caters: True", "GoodForKids: True", "GoodForMeal: {'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True, 'breakfast': False, 'brunch': False}", 
"HasTV: True", "NoiseLevel: quiet", "OutdoorSeating: False", 
"RestaurantsAttire: casual", "RestaurantsDelivery: False", "RestaurantsGoodForGroups: True", 
"RestaurantsPriceRange2: 1", "RestaurantsReservations: False", 
"RestaurantsTableService: False", "RestaurantsTakeOut: True", 
"WiFi: no"), c("BikeParking: True", "BusinessParking: {'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 
"RestaurantsPriceRange2: 1"), "ByAppointmentOnly: True", NULL, 
    c("BikeParking: True", "BusinessAcceptsCreditCards: True", 
    "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}", 
    "RestaurantsPriceRange2: 2"), NULL, c("BusinessAcceptsCreditCards: True", 
    "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}", 
    "RestaurantsPriceRange2: 3"), c("BusinessAcceptsCreditCards: True", 
    "RestaurantsPriceRange2: 2", "WiFi: free"), c("BusinessAcceptsCreditCards: True", 
    "BusinessParking: {'garage': False, 'street': True, 'validated': False, 'lot': True, 'valet': False}", 
    "RestaurantsDelivery: True", "RestaurantsPriceRange2: 2", 
    "RestaurantsTakeOut: True", "WheelchairAccessible: True")), 
    categories = list(c("Tobacco Shops", "Nightlife", "Vape Shops", 
    "Shopping"), c("Caterers", "Grocery", "Food", "Event Planning & Services", 
    "Party & Event Planning", "Specialty Food"), c("Restaurants", 
    "Pizza", "Chicken Wings", "Italian"), c("Hair Removal", "Beauty & Spas", 
    "Blow Dry/Out Services", "Hair Stylists", "Hair Extensions", 
    "Massage", "Permanent Makeup", "Waxing", "Hair Salons"), 
        c("Hotels & Travel", "Event Planning & Services", "Hotels"
        ), c("Nail Salons", "Beauty & Spas"), c("Baby Gear & Furniture", 
        "Shopping"), c("Tex-Mex", "Mexican", "Fast Food", "Restaurants"
        ), c("Local Services", "Self Storage"), c("Food", "Bakeries"
        ), c("Nail Salons", "Hair Salons", "Beauty & Spas"), 
        c("Hawaiian", "Restaurants", "Barbeque"), c("Shopping", 
        "Bookstores", "Books", "Mags", "Music & Video"), c("General Dentistry", 
        "Health & Medical", "Dentists"), c("Leather Goods", "Fashion", 
        "Watches", "Accessories", "Shopping"), c("Mobile Phones", 
        "Computers", "Local Services", "Electronics", "IT Services & Computer Repair", 
        "Shopping"), c("Animal Shelters", "Pets"), c("Shoe Stores", 
        "Fashion", "Men's Clothing", "Shopping"), c("Hotels & Travel", 
        "Venues & Event Spaces", "Event Planning & Services", 
        "Hotels"), c("Food", "Bakeries")), hours = list(c("Monday 11:0-21:0", 
    "Tuesday 11:0-21:0", "Wednesday 11:0-21:0", "Thursday 11:0-21:0", 
    "Friday 11:0-22:0", "Saturday 10:0-22:0", "Sunday 11:0-18:0"
    ), c("Monday 0:0-0:0", "Tuesday 0:0-0:0", "Wednesday 0:0-0:0", 
    "Thursday 0:0-0:0", "Friday 0:0-0:0", "Saturday 0:0-0:0", 
    "Sunday 0:0-0:0"), c("Monday 11:0-2:0", "Tuesday 11:0-2:0", 
    "Wednesday 11:0-2:0", "Thursday 11:0-3:0", "Friday 11:0-3:0", 
    "Saturday 11:0-3:0", "Sunday 11:0-2:0"), c("Tuesday 10:0-21:0", 
    "Wednesday 10:0-21:0", "Thursday 10:0-21:0", "Friday 10:0-18:0", 
    "Saturday 9:0-16:0"), NULL, c("Monday 10:30-20:0", "Tuesday 10:30-20:0", 
    "Thursday 10:30-20:0", "Friday 10:30-20:0", "Saturday 10:30-19:0", 
    "Sunday 10:30-19:0"), c("Monday 10:0-18:0", "Tuesday 10:0-18:0", 
    "Wednesday 10:0-18:0", "Thursday 10:0-18:0", "Friday 10:0-18:0", 
    "Saturday 10:0-18:0", "Sunday 11:0-16:0"), c("Monday 10:0-22:0", 
    "Tuesday 10:0-22:0", "Wednesday 10:0-22:0", "Thursday 10:0-22:0", 
    "Friday 10:0-22:0", "Saturday 10:0-22:0", "Sunday 10:0-22:0"
    ), c("Monday 9:30-18:0", "Tuesday 9:30-18:0", "Wednesday 9:30-18:0", 
    "Thursday 9:30-18:0", "Friday 9:30-18:0", "Saturday 8:30-17:0", 
    "Sunday 11:0-15:0"), NULL, c("Monday 8:0-19:0", "Tuesday 8:0-19:0", 
    "Wednesday 8:0-19:0", "Thursday 8:0-19:0", "Friday 8:0-19:0", 
    "Saturday 8:0-17:0", "Sunday 10:0-13:0"), c("Monday 11:0-21:30", 
    "Tuesday 11:0-21:30", "Wednesday 11:0-21:30", "Thursday 11:0-21:30", 
    "Friday 11:0-21:30", "Saturday 11:0-21:30", "Sunday 11:30-21:0"
    ), c("Monday 11:0-19:0", "Tuesday 11:0-19:0", "Wednesday 11:0-19:0", 
    "Thursday 11:0-21:0", "Friday 11:0-21:0", "Saturday 11:0-19:0", 
    "Sunday 11:0-19:0"), NULL, c("Monday 10:0-21:0", "Tuesday 10:0-21:0", 
    "Wednesday 10:0-21:0", "Thursday 10:0-21:0", "Friday 10:0-22:0", 
    "Saturday 10:0-22:0", "Sunday 11:0-19:0"), c("Monday 10:0-21:0", 
    "Tuesday 10:0-21:0", "Wednesday 10:0-21:0", "Thursday 10:0-21:0", 
    "Friday 10:0-21:0", "Saturday 10:0-21:0", "Sunday 11:0-18:0"
    ), NULL, NULL, c("Monday 0:0-0:0", "Tuesday 0:0-0:0", "Wednesday 0:0-0:0", 
    "Thursday 0:0-0:0", "Friday 0:0-0:0", "Saturday 0:0-0:0", 
    "Sunday 0:0-0:0"), c("Monday 9:0-16:0", "Tuesday 9:0-16:0", 
    "Wednesday 9:0-16:0", "Thursday 9:0-16:0", "Friday 9:0-16:0", 
    "Saturday 10:0-15:0", "Sunday 10:0-15:0")), type = c("business", 
    "business", "business", "business", "business", "business", 
    "business", "business", "business", "business", "business", 
    "business", "business", "business", "business", "business", 
    "business", "business", "business", "business")), .Names = c("business_id", 
"name", "neighborhood", "address", "city", "state", "postal_code", 
"latitude", "longitude", "stars", "review_count", "is_open", 
"attributes", "categories", "hours", "type"), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

0 个答案:

没有答案