我正在尝试以最有趣的格式获取数据。
最初看起来像这样:
applicants_wide <- tibble::tribble(
~fiscal_year, ~job_number, ~job_description, ~total_applications, ~Female, ~Male, ~Unknown_Gender, ~Black, ~Hispanic, ~Asian, ~Caucasian, ~American.Indian..Alaskan.Native, ~Filipino, ~Unknown_Ethnicity,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, 20, 31, 3, 25, 18, 1, 6, 0, 0, 4,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, 488, 152, 8, 151, 204, 123, 62, 3, 79, 26,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, 13, 37, 1, 8, 12, 9, 20, 0, 0, 2,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, 9, 38, 1, 21, 14, 3, 7, 0, 1, 2,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, 15, 24, 1, 3, 7, 7, 19, 1, 1, 2,
"2013-2014", "1191", "ARCHIVIST1191", 161, 89, 66, 6, 12, 36, 20, 73, 0, 6, 14
)
然后,我使用dplyr的gather
创建了两列:gender
和ethnicity
。
#Put data in the tidy format
applicants_long <- applicants_wide %>%
gather(key = "gender", value = "gender_count", -c(8:14, 1:4)) %>%
gather(key = "ethnicity", value = "ethnicity_count", -c(1:4, 12:13))
那让我有了这个:
applicants_long
# A tibble: 126 x 8
tibble::tribble(
~fiscal_year, ~job_number, ~job_description, ~total_applications, ~gender, ~gender_count, ~ethnicity, ~ethnicity_count,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Female", 20, "Black", 25,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Female", 488, "Black", 151,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Female", 13, "Black", 8,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Female", 9, "Black", 21,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Female", 15, "Black", 3,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Female", 89, "Black", 12,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Male", 31, "Black", 25,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Male", 152, "Black", 151,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Male", 37, "Black", 8,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Male", 38, "Black", 21,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Male", 24, "Black", 3,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Male", 66, "Black", 12,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Unknown_Gender", 3, "Black", 25,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Unknown_Gender", 8, "Black", 151,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Unknown_Gender", 1, "Black", 8,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Unknown_Gender", 1, "Black", 21,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Unknown_Gender", 1, "Black", 3,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Unknown_Gender", 6, "Black", 12,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Female", 20, "Hispanic", 18,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Female", 488, "Hispanic", 204,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Female", 13, "Hispanic", 12,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Female", 9, "Hispanic", 14,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Female", 15, "Hispanic", 7,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Female", 89, "Hispanic", 36,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Male", 31, "Hispanic", 18,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Male", 152, "Hispanic", 204,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Male", 37, "Hispanic", 12,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Male", 38, "Hispanic", 14,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Male", 24, "Hispanic", 7,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Male", 66, "Hispanic", 36,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Unknown_Gender", 3, "Hispanic", 18,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Unknown_Gender", 8, "Hispanic", 204,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Unknown_Gender", 1, "Hispanic", 12,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Unknown_Gender", 1, "Hispanic", 14,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Unknown_Gender", 1, "Hispanic", 7,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Unknown_Gender", 6, "Hispanic", 36,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Female", 20, "Asian", 1,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Female", 488, "Asian", 123,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Female", 13, "Asian", 9,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Female", 9, "Asian", 3,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Female", 15, "Asian", 7,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Female", 89, "Asian", 20,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Male", 31, "Asian", 1,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Male", 152, "Asian", 123,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Male", 37, "Asian", 9,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Male", 38, "Asian", 3,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Male", 24, "Asian", 7,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Male", 66, "Asian", 20,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Unknown_Gender", 3, "Asian", 1,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Unknown_Gender", 8, "Asian", 123,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Unknown_Gender", 1, "Asian", 9,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Unknown_Gender", 1, "Asian", 3,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Unknown_Gender", 1, "Asian", 7,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Unknown_Gender", 6, "Asian", 20,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Female", 20, "Caucasian", 6,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Female", 488, "Caucasian", 62,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Female", 13, "Caucasian", 20,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Female", 9, "Caucasian", 7,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Female", 15, "Caucasian", 19,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Female", 89, "Caucasian", 73,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Male", 31, "Caucasian", 6,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Male", 152, "Caucasian", 62,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Male", 37, "Caucasian", 20,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Male", 38, "Caucasian", 7,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Male", 24, "Caucasian", 19,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Male", 66, "Caucasian", 73,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Unknown_Gender", 3, "Caucasian", 6,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Unknown_Gender", 8, "Caucasian", 62,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Unknown_Gender", 1, "Caucasian", 20,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Unknown_Gender", 1, "Caucasian", 7,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Unknown_Gender", 1, "Caucasian", 19,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Unknown_Gender", 6, "Caucasian", 73,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Female", 20, "American.Indian..Alaskan.Native", 0,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Female", 488, "American.Indian..Alaskan.Native", 3,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Female", 13, "American.Indian..Alaskan.Native", 0,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Female", 9, "American.Indian..Alaskan.Native", 0,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Female", 15, "American.Indian..Alaskan.Native", 1,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Female", 89, "American.Indian..Alaskan.Native", 0,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Male", 31, "American.Indian..Alaskan.Native", 0,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Male", 152, "American.Indian..Alaskan.Native", 3,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Male", 37, "American.Indian..Alaskan.Native", 0,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Male", 38, "American.Indian..Alaskan.Native", 0,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Male", 24, "American.Indian..Alaskan.Native", 1,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Male", 66, "American.Indian..Alaskan.Native", 0,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Unknown_Gender", 3, "American.Indian..Alaskan.Native", 0,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Unknown_Gender", 8, "American.Indian..Alaskan.Native", 3,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Unknown_Gender", 1, "American.Indian..Alaskan.Native", 0,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Unknown_Gender", 1, "American.Indian..Alaskan.Native", 0,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Unknown_Gender", 1, "American.Indian..Alaskan.Native", 1,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Unknown_Gender", 6, "American.Indian..Alaskan.Native", 0,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Female", 20, "Filipino", 0,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Female", 488, "Filipino", 79,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Female", 13, "Filipino", 0,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Female", 9, "Filipino", 1,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Female", 15, "Filipino", 1,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Female", 89, "Filipino", 6,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Male", 31, "Filipino", 0,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Male", 152, "Filipino", 79,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Male", 37, "Filipino", 0,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Male", 38, "Filipino", 1,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Male", 24, "Filipino", 1,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Male", 66, "Filipino", 6,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Unknown_Gender", 3, "Filipino", 0,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Unknown_Gender", 8, "Filipino", 79,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Unknown_Gender", 1, "Filipino", 0,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Unknown_Gender", 1, "Filipino", 1,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Unknown_Gender", 1, "Filipino", 1,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Unknown_Gender", 6, "Filipino", 6,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Female", 20, "Unknown_Ethnicity", 4,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Female", 488, "Unknown_Ethnicity", 26,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Female", 13, "Unknown_Ethnicity", 2,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Female", 9, "Unknown_Ethnicity", 2,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Female", 15, "Unknown_Ethnicity", 2,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Female", 89, "Unknown_Ethnicity", 14,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Male", 31, "Unknown_Ethnicity", 4,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Male", 152, "Unknown_Ethnicity", 26,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Male", 37, "Unknown_Ethnicity", 2,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Male", 38, "Unknown_Ethnicity", 2,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Male", 24, "Unknown_Ethnicity", 2,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Male", 66, "Unknown_Ethnicity", 14,
"2013-2014", "9206 OP 2014/04/18", "311 DIRECTOR 9206", 54, "Unknown_Gender", 3, "Unknown_Ethnicity", 4,
"2013-2014", "1223 P 2013/08/09", "ACCOUNTING CLERK 1223", 648, "Unknown_Gender", 8, "Unknown_Ethnicity", 26,
"2013-2014", "7260 OP 2014/02/14", "AIRPORT MANAGER 7260", 51, "Unknown_Gender", 1, "Unknown_Ethnicity", 2,
"2013-2014", "3227 P 2013/11/15", "AIRPORT POLICE LIEUTENANT 2013", 48, "Unknown_Gender", 1, "Unknown_Ethnicity", 2,
"2013-2014", "2400 O 2014/05/02", "AQUARIST 2400", 40, "Unknown_Gender", 1, "Unknown_Ethnicity", 2,
"2013-2014", "1191", "ARCHIVIST1191", 161, "Unknown_Gender", 6, "Unknown_Ethnicity", 14
)
但是有些事情感觉不对劲……整洁的数据每行只有一个观察值,这里的gender_count
和ethnicity_count
列连续总结了多个观察值。
如果我想以 tidyest 格式获取数据,我是否已尽力还是错过了一步?
答案 0 :(得分:1)
这很整洁,因为观察值(即行的数值)是不同的,但这并不意味着每个观察值之间的关系都是有意义的。问题在于,原始数据告诉我们例如,有多少女性和多少黑人被使用,但是却没有告诉我们有多少黑人女性。这意味着职位描述中每个性别的种族细分将是相同的,例如对于职位描述“ 311 DIRECTOR 9206”,“女性”,“男性”和“未知性别”的“黑人”人数将为25。
您可能仍然可以使用整齐的格式处理此数据,但是我认为分组,汇总等将不必要地circuit回。将数据分为两个整洁的数据框可能更有意义:一个用于性别,一个用于种族。