我用dplyr计算了两列中的常见事件。
yelp_tbl %>% group_by(city, name) %>% count(city, name)
# A tibble: 126,968 x 3
# Groups: city, name [126,968]
city name n
<chr> <chr> <int>
1 Toronto Starbucks 132
2 Las Vegas Starbucks 129
3 Las Vegas Subway 107
4 Las Vegas McDonald's 79
5 Phoenix Starbucks 75
现在我想过滤第一列和第二列中最大值(n)的行。这意味着我只想保持名称/城市组合最大(n),以便每个城市只出现一次。
我试过了:
yelp_tbl %>% group_by(city, name) %>% count(city, name) %>% top_n(1,n)
但它不起作用。谢谢。
数据框:
> dput(yelp_tbl[ 1:20, ])
structure(list(business_id = c("0DI8Dt2PJp07XkVvIElIcQ", "LTlCaCGZE14GuaUXUGbamg",
"EDqCEAGXVGCH4FJXgqtjqg", "cnGIivYRLxpF7tBVR_JwWA", "cdk-qqJ71q6P7TJTww_DSA",
"Q9rsaUiQ-A3NdEAloy0aJA", "Cu4_Fheh7IrzGiK-Pc79ig", "GDnbt3isfhd57T1QqU6flg",
"qwAHit4Tuj1zpO7CxVwOMA", "Nbr0kbtIrVlEcKIZoXWbSw", "MFneYHieJ_lnjMeFUGKVbw",
"42romV8altAeuZuP2OC1gw", "iaunX_af5M5lfT2eEm9FMQ", "Tc24GX9-ZPr4_SHU0nJZZA",
"6EvETd9FVPJfhT_6AW9iEw", "DSWsjtAfLYw9a4MTz0kKLw", "SbfEPi-iR4ntf3wRQfxYyw",
"YCsLfBVdLFeN2Necw1HPSA", "O_4OTnw48ULP5uZh6MqiOQ", "CE0dABv9sfrXjDIJugpU8A"
), name = c("Innovative Vapors", "Cut and Taste", "Pizza Pizza",
"Plush Salon and Spa", "Comfort Inn", "A Plus Nail", "Boomerang Baby",
"Taco Bell", "CubeSmart Self Storage", "Sehne Backwaren", "Revv Illusions",
"Ohana Hawaiian BBQ", "Encore Books and Records", "Canyon Ridge Endodontics",
"Fossil Store", "Best Buy", "Woofs, Wiggles N Wags All Breed Rescue",
"Stussy", "Holiday Inn Express & Suites Madison Central", "Graceful Cake Creations"
), neighborhood = c("", "", "Dufferin Grove", "", "Downtown Core",
"", "", "", "", "", "", "", "Notre-Dame-de-Grâce", "", "", "",
"", "Ossington Strip", "", ""), address = c("227 E Baseline Rd, Ste J2",
"495 S Grand Central Pkwy", "979 Bloor Street W", "7014 Steubenville Pike",
"321 Jarvis Street", "30 Gibson Drive, Suite122", "10875 N Frankloyd Wright Blvd",
"11072 No Frank Lloyd Wright", "11000 North 115th Street", "Hauptstr. 1",
"2553 Wigwam Pkwy", "1500 N Green Valley Pkwy, Ste 230", "5670 Sherbrooke Rue W",
"2525 W Carefree Hwy", "6801 North Lake Mall Drive, 173", "2500 Winston Park Drive, Unit A",
"", "1000 Queen Street W", "610 John Nolen Dr", "330 S Gilbert Rd, Ste 15"
), city = c("Tempe", "Las Vegas", "Toronto", "Oakdale", "Toronto",
"Markham", "Scottsdale", "Scottsdale", "Scottsdale", "Stuttgart",
"Henderson", "Henderson", "Montréal", "Phoenix", "Charlotte",
"Oakville", "Scottsdale", "Toronto", "Madison", "Mesa"), state = c("AZ",
"NV", "ON", "PA", "ON", "ON", "AZ", "AZ", "AZ", "BW", "NV", "NV",
"QC", "AZ", "NC", "ON", "AZ", "ON", "WI", "AZ"), postal_code = c("85283",
"89106", "M6H 1L5", "15071", "M5B 2C2", "L3R 2S3", "85259", "85259",
"85259", "70563", "89074", "89074", "H4A 1W7", "85085", "28216",
"L6H 7E5", "", "M6J 1H1", "53713", "85204"), latitude = c(33.3782141,
36.1922841, 43.661054, 40.4445439533, 43.6598286, 43.82764, 33.5852707,
33.58671, 33.5864403, 48.7299, 36.0345795, 36.029596485, 45.4707675,
33.7967745, 35.3521284, 43.5162547833, 33.6031328399, 43.644258153,
43.048623, 33.4091719), longitude = c(-111.936102, -115.1592718,
-79.429089, -80.1745398943, -79.3754006, -79.342722, -111.8349538,
-111.83541, -111.8325788, 9.11242, -115.0977701, -115.085821152,
-73.6150299, -112.1154511, -80.8512352, -79.6859058738, -111.971311071,
-79.4188293813, -89.376341, -111.7884179), stars = c(4.5, 5,
2.5, 4, 3, 2.5, 3.5, 2.5, 4.5, 3.5, 5, 4, 5, 4, 3.5, 3, 4.5,
2, 4.5, 5), review_count = c(17L, 9L, 7L, 4L, 8L, 3L, 8L, 9L,
11L, 3L, 7L, 38L, 4L, 4L, 3L, 3L, 17L, 5L, 14L, 7L), is_open = c(0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 1L), attributes = list(c("BikeParking: True", "BusinessAcceptsBitcoin: False",
"BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}",
"DogsAllowed: False", "RestaurantsPriceRange2: 2", "WheelchairAccessible: True"
), c("BusinessAcceptsBitcoin: False", "BusinessAcceptsCreditCards: True"
), c("Alcohol: none", "Ambience: {'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': False}",
"BikeParking: True", "BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}",
"Caters: True", "GoodForKids: True", "GoodForMeal: {'dessert': False, 'latenight': False, 'lunch': False, 'dinner': False, 'breakfast': False, 'brunch': False}",
"HasTV: True", "NoiseLevel: quiet", "OutdoorSeating: False",
"RestaurantsAttire: casual", "RestaurantsDelivery: True", "RestaurantsGoodForGroups: True",
"RestaurantsPriceRange2: 1", "RestaurantsReservations: False",
"RestaurantsTableService: False", "RestaurantsTakeOut: True",
"WiFi: free"), c("AcceptsInsurance: False", "BusinessAcceptsCreditCards: True",
"BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}",
"ByAppointmentOnly: True", "HairSpecializesIn: {'coloring': False, 'africanamerican': False, 'curly': False, 'perms': False, 'kids': False, 'extensions': False, 'asian': False, 'straightperms': False}",
"RestaurantsPriceRange2: 3"), c("BusinessAcceptsCreditCards: True",
"RestaurantsPriceRange2: 2", "WiFi: free"), c("BusinessAcceptsCreditCards: True",
"ByAppointmentOnly: True", "RestaurantsPriceRange2: 3"), c("BikeParking: True",
"BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}",
"RestaurantsPriceRange2: 2", "WheelchairAccessible: True"), c("Alcohol: none",
"Ambience: {'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'divey': False, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': False}",
"BikeParking: True", "BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}",
"DriveThru: True", "GoodForKids: True", "GoodForMeal: {'dessert': False, 'latenight': False, 'lunch': False, 'dinner': False, 'breakfast': False, 'brunch': False}",
"HasTV: False", "NoiseLevel: quiet", "OutdoorSeating: False",
"RestaurantsAttire: casual", "RestaurantsDelivery: False", "RestaurantsGoodForGroups: True",
"RestaurantsPriceRange2: 1", "RestaurantsReservations: False",
"RestaurantsTableService: False", "RestaurantsTakeOut: True",
"WiFi: no"), "BusinessAcceptsCreditCards: True", c("BikeParking: True",
"BusinessAcceptsCreditCards: False", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}",
"RestaurantsDelivery: False", "RestaurantsPriceRange2: 3", "RestaurantsTakeOut: True"
), NULL, c("Alcohol: none", "Ambience: {'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'divey': False, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': True}",
"BusinessAcceptsCreditCards: True", "BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}",
"Caters: True", "GoodForKids: True", "GoodForMeal: {'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True, 'breakfast': False, 'brunch': False}",
"HasTV: True", "NoiseLevel: quiet", "OutdoorSeating: False",
"RestaurantsAttire: casual", "RestaurantsDelivery: False", "RestaurantsGoodForGroups: True",
"RestaurantsPriceRange2: 1", "RestaurantsReservations: False",
"RestaurantsTableService: False", "RestaurantsTakeOut: True",
"WiFi: no"), c("BikeParking: True", "BusinessParking: {'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}",
"RestaurantsPriceRange2: 1"), "ByAppointmentOnly: True", NULL,
c("BikeParking: True", "BusinessAcceptsCreditCards: True",
"BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}",
"RestaurantsPriceRange2: 2"), NULL, c("BusinessAcceptsCreditCards: True",
"BusinessParking: {'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}",
"RestaurantsPriceRange2: 3"), c("BusinessAcceptsCreditCards: True",
"RestaurantsPriceRange2: 2", "WiFi: free"), c("BusinessAcceptsCreditCards: True",
"BusinessParking: {'garage': False, 'street': True, 'validated': False, 'lot': True, 'valet': False}",
"RestaurantsDelivery: True", "RestaurantsPriceRange2: 2",
"RestaurantsTakeOut: True", "WheelchairAccessible: True")),
categories = list(c("Tobacco Shops", "Nightlife", "Vape Shops",
"Shopping"), c("Caterers", "Grocery", "Food", "Event Planning & Services",
"Party & Event Planning", "Specialty Food"), c("Restaurants",
"Pizza", "Chicken Wings", "Italian"), c("Hair Removal", "Beauty & Spas",
"Blow Dry/Out Services", "Hair Stylists", "Hair Extensions",
"Massage", "Permanent Makeup", "Waxing", "Hair Salons"),
c("Hotels & Travel", "Event Planning & Services", "Hotels"
), c("Nail Salons", "Beauty & Spas"), c("Baby Gear & Furniture",
"Shopping"), c("Tex-Mex", "Mexican", "Fast Food", "Restaurants"
), c("Local Services", "Self Storage"), c("Food", "Bakeries"
), c("Nail Salons", "Hair Salons", "Beauty & Spas"),
c("Hawaiian", "Restaurants", "Barbeque"), c("Shopping",
"Bookstores", "Books", "Mags", "Music & Video"), c("General Dentistry",
"Health & Medical", "Dentists"), c("Leather Goods", "Fashion",
"Watches", "Accessories", "Shopping"), c("Mobile Phones",
"Computers", "Local Services", "Electronics", "IT Services & Computer Repair",
"Shopping"), c("Animal Shelters", "Pets"), c("Shoe Stores",
"Fashion", "Men's Clothing", "Shopping"), c("Hotels & Travel",
"Venues & Event Spaces", "Event Planning & Services",
"Hotels"), c("Food", "Bakeries")), hours = list(c("Monday 11:0-21:0",
"Tuesday 11:0-21:0", "Wednesday 11:0-21:0", "Thursday 11:0-21:0",
"Friday 11:0-22:0", "Saturday 10:0-22:0", "Sunday 11:0-18:0"
), c("Monday 0:0-0:0", "Tuesday 0:0-0:0", "Wednesday 0:0-0:0",
"Thursday 0:0-0:0", "Friday 0:0-0:0", "Saturday 0:0-0:0",
"Sunday 0:0-0:0"), c("Monday 11:0-2:0", "Tuesday 11:0-2:0",
"Wednesday 11:0-2:0", "Thursday 11:0-3:0", "Friday 11:0-3:0",
"Saturday 11:0-3:0", "Sunday 11:0-2:0"), c("Tuesday 10:0-21:0",
"Wednesday 10:0-21:0", "Thursday 10:0-21:0", "Friday 10:0-18:0",
"Saturday 9:0-16:0"), NULL, c("Monday 10:30-20:0", "Tuesday 10:30-20:0",
"Thursday 10:30-20:0", "Friday 10:30-20:0", "Saturday 10:30-19:0",
"Sunday 10:30-19:0"), c("Monday 10:0-18:0", "Tuesday 10:0-18:0",
"Wednesday 10:0-18:0", "Thursday 10:0-18:0", "Friday 10:0-18:0",
"Saturday 10:0-18:0", "Sunday 11:0-16:0"), c("Monday 10:0-22:0",
"Tuesday 10:0-22:0", "Wednesday 10:0-22:0", "Thursday 10:0-22:0",
"Friday 10:0-22:0", "Saturday 10:0-22:0", "Sunday 10:0-22:0"
), c("Monday 9:30-18:0", "Tuesday 9:30-18:0", "Wednesday 9:30-18:0",
"Thursday 9:30-18:0", "Friday 9:30-18:0", "Saturday 8:30-17:0",
"Sunday 11:0-15:0"), NULL, c("Monday 8:0-19:0", "Tuesday 8:0-19:0",
"Wednesday 8:0-19:0", "Thursday 8:0-19:0", "Friday 8:0-19:0",
"Saturday 8:0-17:0", "Sunday 10:0-13:0"), c("Monday 11:0-21:30",
"Tuesday 11:0-21:30", "Wednesday 11:0-21:30", "Thursday 11:0-21:30",
"Friday 11:0-21:30", "Saturday 11:0-21:30", "Sunday 11:30-21:0"
), c("Monday 11:0-19:0", "Tuesday 11:0-19:0", "Wednesday 11:0-19:0",
"Thursday 11:0-21:0", "Friday 11:0-21:0", "Saturday 11:0-19:0",
"Sunday 11:0-19:0"), NULL, c("Monday 10:0-21:0", "Tuesday 10:0-21:0",
"Wednesday 10:0-21:0", "Thursday 10:0-21:0", "Friday 10:0-22:0",
"Saturday 10:0-22:0", "Sunday 11:0-19:0"), c("Monday 10:0-21:0",
"Tuesday 10:0-21:0", "Wednesday 10:0-21:0", "Thursday 10:0-21:0",
"Friday 10:0-21:0", "Saturday 10:0-21:0", "Sunday 11:0-18:0"
), NULL, NULL, c("Monday 0:0-0:0", "Tuesday 0:0-0:0", "Wednesday 0:0-0:0",
"Thursday 0:0-0:0", "Friday 0:0-0:0", "Saturday 0:0-0:0",
"Sunday 0:0-0:0"), c("Monday 9:0-16:0", "Tuesday 9:0-16:0",
"Wednesday 9:0-16:0", "Thursday 9:0-16:0", "Friday 9:0-16:0",
"Saturday 10:0-15:0", "Sunday 10:0-15:0")), type = c("business",
"business", "business", "business", "business", "business",
"business", "business", "business", "business", "business",
"business", "business", "business", "business", "business",
"business", "business", "business", "business")), .Names = c("business_id",
"name", "neighborhood", "address", "city", "state", "postal_code",
"latitude", "longitude", "stars", "review_count", "is_open",
"attributes", "categories", "hours", "type"), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))