
时间:2019-08-23 16:25:14

标签: csv split cross-apply ssms-2017

我如何重新加入从 ### First, create data.frame from "complete" csv files ### folder_complete <-"insert path here" df_list_complete <- list.files(path=folder_complete, pattern="*.csv", full.names = TRUE) df_complete = ldply(df_list_complete, read_csv) ### Then, read in and edit "incomplete" files one at a time using for loop ### ### Note "incomplete" files are in a different director - this was set during the session ### filenames <- dir(pattern = "*.csv") for (i in 1:length(filenames)) { tmp <- read.csv(filenames[i], stringsAsFactors = FALSE) ### Merge / Identify matches between "complete" data.frame and "incomplete" file ### ### using "Programme Synopsis" as the unique column ### tmp_new <- merge(tmp, df_complete, by = "Programme_Synopsis") ### Delete any rows with NAs in specific columns - ### ### I did this because the previous step matched empty rows for these columns, and I didn't want these ### tmp_new <- distinct(tmp_new,Programme_Synopsis_url.x, .keep_all = TRUE) tmp_new <- distinct(tmp_new,Programme_Duration.y, .keep_all = TRUE) ### Delete Duplicate columns - merging created several duplicate columns (.y, .x) ### ### I only wanted to add the matching "Programme Duration" column from the "complete" data.frame to the "incomplete" file ### ### but wasn't sure how to do this. ### ### Instead, I had to retrospectively remove the duplicate columns ### tmp_new <- tmp_new[ -c(2:7) ] ### Rename columns ### tmp_new2 <- rename(tmp_new, c("Programme_Synopsis_url.y" = "Programme_Synopsis_url", "Programme_Duration.y" = "Programme_Duration", "Programme_Category.y" = "Programme_Category", "Programme_Availability.y" = "Programme_Availability", "Programme_Genre.y" = "Programme_Genre", "Programme_Title.y" = "Programme_Title")) ### Merge (again!) using plyr Join function ### df <- join(tmp_new2, tmp, by = "Programme_Synopsis_url", type = "full") ### Delete any without an index ### ### (i.e. those that don't belong in this dataframe) ### df <- df[!is.na(df$index), ] ### Re-order by original index ### df <- df[order(df$index), ] ### Remove duplicated index columns ### df$index.x <- NULL df$index.y <- NULL ### Write out the new file ### write.csv(df, filenames[[i]], row.names = FALSE) 作为value返回的值?




显示问题的Gif enter image description here

1 个答案:

答案 0 :(得分:0)



CROSS APPLY STRING_SPLIT(fd.multi_select, ',')为其提供别名csv


WITH fake_data AS
    SELECT 1 as pkey, 'Billy' as name, 'FE,BF,AF,JF,AA' AS multi_select
, lookupTable AS
    SELECT 'Forever'   AS lookupValue, 'FE' AS lookupItem UNION ALL
    SELECT 'BoyFriend' AS lookupValue, 'BF' AS lookupItem UNION ALL
    SELECT 'AsFriend' AS lookupValue, 'AF' AS lookupItem

SELECT fd.pkey, fd.name, value
FROM fake_data fd
    CROSS APPLY STRING_SPLIT(fd.multi_select, ',') csv
    LEFT JOIN lookupTable lt ON lt.lookupItem = csv.value