我在使用过滤器动词时遇到问题。以下是我的数据集的一小部分样本。
structure(list(employer = c("MICROSOFT CORPORATION", "GOOGLE INC",
"GOOGLE INC", "AMAZON CORPORATE LLC", "MICROSOFT CORPORATION",
"GOOGLE INC", "AMAZON CORPORATE LLC", "MICROSOFT CORPORATION",
"MICROSOFT CORPORATION", "MICROSOFT CORPORATION", "AMAZON CORPORATE LLC",
"APPLE INC", "AMAZON CORPORATE LLC", "AMAZON CORPORATE LLC",
"YAHOO HOLDINGS INC", "APPLE INC", "AMAZON CORPORATE LLC", "GOOGLE INC",
"AMAZON WEB SERVICES INC", "GOOGLE INC", "AMAZONCOMKYDC LLC",
"LINKEDIN CORPORATION", "FACEBOOK INC", "GOOGLE INC", "GOOGLE INC",
"GOOGLE INC", "AMAZON CORPORATE LLC", "AMAZON CORPORATE LLC",
"MICROSOFT CORPORATION", "GOOGLE INC", "GOOGLE INC", "AMAZON CORPORATE LLC",
"AIRBNB INC", "MICROSOFT CORPORATION", "GOOGLE INC", "GOOGLE INC",
"GOOGLE INC", "AMAZON CORPORATE LLC", "GOOGLE INC", "YAHOO! INC",
"AMAZON CORPORATE LLC", "MICROSOFT CORPORATION", "MICROSOFT CORPORATION",
"GOOGLE INC", "FACEBOOK INC", "AIRBNB INC", "MICROSOFT CORPORATION",
"APPLE INC", "UBER TECHNOLOGIES INC", "MICROSOFT CORPORATION"
), job.title = c("SOFTWARE ENGINEER", "STRATEGIST", "TEST ENGINEER",
"TECHNICAL PROGRAM MANAGER", "PROGRAM MANAGER", "SOFTWARE ENGINEER, SITE RELIABILITY ENGINEERING",
"SOFTWARE DEVELOPMENT ENGINEER II", "SENIOR SOFTWARE ENGINEER",
"SOFTWARE ENGINEER 2", "SENIOR SOFTWARE ENGINEER", "SENIOR PRODUCT MANAGER",
"ENGINEERING PROJECT MGR 4", "PROGRAM MANAGER", "BUSINESS INTELLIGENCE ENGINEER I",
"TECH YAHOO, SOFTWARE DEV ENGINEER", "SOFTWARE ENGINEER APPS",
"SOFTWARE DEVELOPMENT ENGINEER I", "SOFTWARE ENGINEER", "SECURITY ENGINEER II",
"SOFTWARE ENGINEER", "OPERATIONS MANAGER", "SOFTWARE ENGINEER",
"SOFTWARE ENGINEER", "SOFTWARE ENGINEER", "TECHNICAL ACCOUNT MANAGER",
"ANALYTICAL LEAD", "PRODUCT MANAGER II", "SOFTWARE DEVELOPMENT ENGINEER II",
"SENIOR PROGRAM MANAGER", "SOFTWARE ENGINEER", "SOFTWARE ENGINEER",
"SOFTWARE DEVELOPMENT ENGINEER III", "SOFTWARE ENGINEER", "PROGRAM MANAGER",
"SALES STRATEGY ASSOCIATE", "SOFTWARE ENGINEER", "SOFTWARE ENGINEER 1615.20269",
"SOFTWARE DEVELOPMENT ENGINEER II", "SOFTWARE ENGINEER", "TECH YAHOO, SOFTWARE DEVELOPMENT ENGINEER, ASSOCIATE",
"NETWORK DEVELOPMENT ENGINEER I", "SOFTWARE DEVELOPMENT ENGINEER IN TEST",
"SENIOR SOFTWARE ENGINEERING MANAGER", "SOLUTIONS CONSULTANT",
"DATA SCIENTIST", "SOFTWARE ENGINEER", "SUPPORT ENGINEER", "SYSTEMS DESIGN ENGINEER 3",
"SOFTWARE ENGINEER", "PREMIER FIELD ENGINEER"), base.salary = c("125,003",
"110,000", "125,100", "155,000", "117,218", "104,000", "120,700",
"145,301", "140,000", "141,123", "115,000", "137,571", "105,500",
"93,000", "123,628", "150,000", "99,200", "108,000", "135,000",
"110,000", "90,000", "131,997", "110,000", "115,000", "108,000",
"91,000", "110,000", "144,000", "160,250", "127,000", "132,000",
"153,900", "125,000", "124,989", "110,200", "150,000", "132,000",
"112,000", "120,000", "96,866", "105,000", "94,139", "156,123",
"97,500", "117,453", "120,000", "92,500", "97,386", "111,405",
"109,811"), location = c("BELLEVUE, WA", "MOUNTAIN VIEW, CA",
"MOUNTAIN VIEW, CA", "SEATTLE, WA", "REDMOND, WA", "VENICE, CA",
"SEATTLE, WA", "REDMOND, WA", "SAN FRANCISCO, CA", "REDMOND, WA",
"SEATTLE, WA", "CUPERTINO, CA", "SEATTLE, WA", "SEATTLE, WA",
"SUNNYVALE, CA", "CUPERTINO, CA", "SEATTLE, WA", "MOUNTAIN VIEW, CA",
"SEATTLE, WA", "MOUNTAIN VIEW, CA", "ORLANDO, FL", "NEW YORK, NY",
"MENLO PARK, CA", "PITTSBURGH, PA", "MOUNTAIN VIEW, CA", "NEW YORK, NY",
"SEATTLE, WA", "SEATTLE, WA", "REDMOND, WA", "MOUNTAIN VIEW, CA",
"MOUNTAIN VIEW, CA", "SEATTLE, WA", "SAN FRANCISCO, CA", "REDMOND, WA",
"MOUNTAIN VIEW, CA", "PALO ALTO, CA", "MOUNTAIN VIEW, CA", "SEATTLE, WA",
"KIRKLAND, WA", "SAN FRANCISCO, CA", "SEATTLE, WA", "ISSAQUAH, WA",
"REDMOND, WA", "NEW YORK, NY", "MENLO PARK, CA", "SAN FRANCISCO, CA",
"SEATTLE, WA", "CUPERTINO, CA", "NEW YORK, NY", "BENTONVILLE, AR"
), submit.date = c("12/27/2016", "06/08/2016", "06/02/2016",
"05/22/2017", "11/04/2014", "02/25/2016", "02/27/2014", "11/13/2014",
"06/15/2017", "11/20/2014", "02/04/2017", "06/15/2017", "02/24/2017",
"06/19/2015", "02/17/2017", "11/04/2016", "01/13/2017", "05/15/2015",
"02/04/2014", "11/08/2013", "03/16/2017", "11/18/2016", "01/08/2014",
"05/07/2014", "10/22/2013", "02/16/2017", "08/21/2015", "04/29/2016",
"08/25/2016", "02/18/2015", "03/17/2016", "06/14/2017", "02/12/2015",
"10/01/2015", "02/27/2015", "12/14/2015", "02/09/2017", "03/09/2015",
"05/12/2016", "03/03/2016", "06/11/2014", "12/06/2013", "01/19/2015",
"02/22/2016", "02/10/2015", "02/18/2017", "03/17/2017", "06/18/2014",
"07/25/2016", "11/16/2015"), start.date = c("06/26/2017", "10/01/2016",
"10/22/2016", "06/05/2017", "11/17/2014", "08/23/2016", "08/25/2014",
"05/11/2015", "06/28/2017", "05/16/2015", "07/30/2017", "10/28/2017",
"08/04/2017", "07/20/2015", "03/01/2017", "11/21/2016", "07/14/2017",
"09/08/2015", "02/07/2014", "01/06/2014", "03/27/2017", "12/05/2016",
"07/04/2014", "10/03/2014", "11/04/2013", "08/18/2017", "09/14/2015",
"10/23/2016", "10/01/2016", "08/17/2015", "03/24/2016", "11/14/2017",
"08/01/2015", "04/01/2016", "08/21/2015", "01/25/2016", "07/21/2017",
"08/30/2015", "08/12/2016", "09/01/2016", "06/18/2014", "06/04/2014",
"06/11/2015", "08/20/2016", "08/07/2015", "08/01/2017", "09/15/2017",
"09/02/2014", "07/28/2016", "11/23/2015"), case.status = c("CERTIFIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED",
"WITHDRAWN", "CERTIFIED", "CERTIFIED", "CERTIFIED", "DENIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED",
"CERTIFIED", "CERTIFIED", "WITHDRAWN", "CERTIFIED", "CERTIFIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED",
"CERTIFIED", "CERTIFIED", "CERTIFIED", "CERTIFIED")), .Names = c("employer",
"job.title", "base.salary", "location", "submit.date", "start.date",
"case.status"), row.names = c(49523L, 34286L, 34811L, 11521L,
39957L, 33899L, 8005L, 43122L, 51506L, 42828L, 4681L, 13148L,
3377L, 904L, 56070L, 15872L, 6070L, 25408L, 4268L, 25972L, 2556L,
36551L, 19938L, 26637L, 34433L, 21937L, 3178L, 9001L, 41880L,
27560L, 28258L, 9576L, 227L, 40098L, 24335L, 29791L, 31987L,
7452L, 26970L, 56520L, 2391L, 45909L, 44112L, 34167L, 18377L,
171L, 51780L, 17635L, 54413L, 39161L), class = "data.frame")
我不得不清理原始数据集,因为当我查看独特的雇主(原始数据集)时,我有很多额外的数据:
unique(cleanH1B$employer)
[1] "AIRBNB INC"
[2] "AMAZON CORPORATE LLC"
[3] "AMAZON MEDIA GROUP LLC"
[4] "AMAZON MEDIA GROUP"
[5] "AMAZON WEB SERVICES INC"
[6] "AMAZON SERVICES LLC"
[7] "AMAZONCOMKYDC LLC"
[8] "AMAZON FULFILLMENT SERVICES INC"
[9] "AMAZONCOMAZDC LLC"
[10] "AMAZON PRODUCE NETWORK LLP"
[11] "AMAZONCOMDEDC LLC"
[12] "AMAZONCOMKSDC LLC"
[13] "AMAZON CAPITAL SERVICES INC"
[14] "AMAZON DIGITAL SERVICES INC"
[15] "AMAZON DIGITAL SERVICES LLC"
[16] "AMAZON WEB SERVICES LLC"
[17] "AMAZON COPORATE LLC"
[18] "AMAZON ROBOTICS LLC"
[19] "AMAZON WEB SERVICES"
[20] "AMAZON STUDIOS INC"
[21] "AMAZON MASONRY"
[22] "AMAZON PAYMENTS INC"
[23] "AMAZONCOM DEDC LLC"
[24] "AMAZONCOMINDC LLC"
[25] "AMAZONCOMKYDC"
[26] "AMAZON CORPORATE"
[27] "AMAZON STUDIOS LLC"
[28] "AMAZONCOMNVDC INC"
[29] "AMAZONCOMKYDC INC"
[30] "AMAZON FRESH LLC"
[31] "AMAZONCOMDEDCLLC"
[32] "AMAZON PHARMACY INC"
[33] "AMAZON WEB SEERVICES INC"
[34] "AMAZONCOM AZDZ LLC"
[35] "AMAZON FULFILLMENT SERVICE INC"
[36] "AMAZON FUFILLMENT SERVICES INC"
[37] "AMAZON REGISTRY SERVICES INC"
[38] "AMAZON TECHNOLOGIES INC"
[39] "AMAZON DEVELOPMENT CENTER INC"
[40] "AMAZON RESTAURANT & BAR INC"
[41] "AMAZON CORP LLC"
[42] "AMAZON FULFILLMENT SVCS INC"
[43] "AMAZON MECHANICAL TURK INC"
[44] "AMAZON CORPORTATE LLC"
[45] "AMAZON CAPTAL SERVICES INC"
[46] "AMAZON ROBOTICS LLC (KIVA)"
[47] "AMAZON CORPORTE LLC"
[48] "APPLET SYSTEMS LLC"
[49] "APPLE INC"
[50] "APPLE ALUM USA CORP"
[51] "APPLE FEDERAL CREDIT UNION"
[52] "APPLE DENTAL & ASSOCIATES LLC"
[53] "APPLE AMERICAN GROUP"
[54] "APPLETON GRP LLC"
[55] "APPLEXUS TECHNOLOGIES LLC"
[56] "APPLE AMERICAN GROUP LLC"
[57] "APPLE TREE DENTAL"
[58] "APPLE T USA INC"
[59] "APPLE AIR COMPRESSOR CORP"
[60] "APPLE BEAUTY INC"
[61] "APPLEGATE TRAN INTERIORS INC"
[62] "APPLE MEDICAL CENTER AND URGENT CARE"
[63] "APPLE MEDICAL CENTER AND URGENT CARE INC"
[64] "APPLESEED MONTESSORI SCHOOL"
[65] "APPLEPEA MONTESSORI ACADEMY OF ONTARIO"
[66] "APPLE SEEDS LLC"
[67] "APPLETREE INSTITUTE FOR EDUCATION INNOVATION INC"
[68] "APPLETREE DAY CARE CENTER INC"
[69] "APPLETREE EARLY LEARNING PUBLIC CHARTER SCHOOL"
[70] "APPLECHEM INC"
[71] "APPLEECON LLC"
[72] "APPLECON LLC"
[73] "APPLECRATE INC"
[74] "APPLEBY CAPITAL INC"
[75] "APPLE VACATIONS LLC"
[76] "FACEBOOK INC"
[77] "FACEBOOK MIAMI INC"
[78] "FACEBOOK SERVICES INC"
[79] "GOOGLE INC"
[80] "GOOGLE LIFE SCIENCES LLC"
[81] "GOOGLE CAPITAL MANAGEMENT COMPANY LLC"
[82] "GOOGLE VENTURES MANAGEMENT COMPANY LLC"
[83] "LINKEDIN CORPORATION"
[84] "MICROSOFT CORPORATION"
[85] "MICROSOFT OPERATIONS PUERTO RICO LLC"
[86] "TWITTER INC"
[87] "UBER TECHNOLOGIES INC"
[88] "UBERION INC"
[89] "UBERTAL INC"
[90] "UBERWURX LLC"
[91] "UBERTEJAS LLC"
[92] "UBERMEDIA INC"
[93] "UBER OPERATIONS LLC"
[94] "UBERLEGEN TECHNOLOGY GROUP LLC"
[95] "UBER BRAIN LLC"
[96] "UBERTO CONSTRUCTION"
[97] "UBER GROUP PLLC"
[98] "YAHOO! INC"
[99] "YAHOO INC"
[100] "YAHOO HOLDINGS INC"
[101] "YAHOO HOLDINGS"
我的方法是识别所有不必要的雇主名称,并使用filter()删除它们。下面是我尝试通过删除与以下雇主关联的三行来创建新数据框。
cleanH1B <- filter(df_h1b, employer != "AMAZON MASONRY" |
employer != "AMAZON AERO SERVICES LLC" |
employer != "APPLET SYSTEMS LLC")
然而,我的方法不起作用,我最终得到了相同的原始数据集。我也尝试过:
cleanH1B <- filter(df_h1b, employer != "AMAZON MASONRY",
employer != "AMAZON AERO SERVICES LLC",
employer != "APPLET SYSTEMS LLC")
任何人都可以帮助指出我如何使用filter()错误并可能分享更有效的技术吗?
谢谢!
答案 0 :(得分:1)
从或(&
)
|
)
df %>% filter(employer != "AMAZON MASONRY" &
employer != "AMAZON AERO SERVICES LLC" &
employer != "APPLET SYSTEMS LLC")
# # A tibble: 50 x 7
# employer job.title base.salary
# <chr> <chr> <chr>
# 1 MICROSOFT CORPORATION SOFTWARE ENGINEER 125,003
# 2 GOOGLE INC STRATEGIST 110,000
# 3 GOOGLE INC TEST ENGINEER 125,100
# 4 AMAZON CORPORATE LLC TECHNICAL PROGRAM MANAGER 155,000
# 5 MICROSOFT CORPORATION PROGRAM MANAGER 117,218
# 6 GOOGLE INC SOFTWARE ENGINEER, SITE RELIABILITY ENGINEERING 104,000
# 7 AMAZON CORPORATE LLC SOFTWARE DEVELOPMENT ENGINEER II 120,700
# 8 MICROSOFT CORPORATION SENIOR SOFTWARE ENGINEER 145,301
# 9 MICROSOFT CORPORATION SOFTWARE ENGINEER 2 140,000
# 10 MICROSOFT CORPORATION SENIOR SOFTWARE ENGINEER 141,123
...如果您想进一步简化,可以先将排除项分配给矢量。
emp_list <- c("AMAZON MASONRY", "AMAZON AERO SERVICES LLC", "APPLET SYSTEMS LLC")
df %>% filter(!employer %in% emp_list)
这样你的过滤器更直接。