我曾经使用Stata进行编码,现在我必须学习R,这让我有些头痛。如果您告诉我如何改进它们,我会尝试问一些好问题,也不会介意。
在图像/表格中,您可以看到数据集和计算结果(无功能)。结果是to_p_scale_s1_na
这是我计算分数(包括描述和创建数据集)的步骤:
# Create Data
t0_item1 <- c(0,1,0,1,NA,NA)
t0_item1 <- factor(t0_item1, labels = c("yes", "no"))
t0_item2 <- c(1,1,0,1,0,1)
t0_item2 <- factor(t0_item2, labels = c("yes", "no"))
t0_item3 <- c(1,0,1,1,0,NA)
t0_item3 <- factor(t0_item3, labels = c("yes", "no"))
t0_item4 <- c(0,0,1,1,1,1)
t0_item4 <- factor(t0_item4, labels = c("yes", "no"))
t1_item1 <- c(1,1,0,1,NA,0)
t1_item1 <- factor(t1_item1, labels = c("yes", "no"))
t1_item2 <- c(1,0,0,1,NA,1)
t1_item2 <- factor(t1_item2, labels = c("yes", "no"))
t1_item3 <- c(1,0,0,1,NA,1)
t1_item3 <- factor(t1_item3, labels = c("yes", "no"))
t1_item4 <- c(0,0,0,1,NA,1)
t1_item4 <- factor(t1_item4, labels = c("yes", "no"))
data_t0 <- data.frame(t0_item1, t0_item2, t0_item3, t0_item4)
data_t1 <- data.frame(t1_item1, t1_item2, t1_item3, t1_item4)
rm(t0_item1, t0_item2, t0_item3, t0_item4, t1_item1, t1_item2, t1_item3, t1_item4)
head(data_t0)
#Code without function
#Data / different datasets can be easily aranged in the same order.
#But what comes now needs to be done many times for different points in time
#different scales with different possibilities to answere...
#In the excample:
#item 1 and 2 add 1 to total points of indicator, if answere is yes
#item 3 and 4 add 1 to total points of indicator, if answere is no
#I had trouble using the data as they are coded as factors and with handling the NAs
# Probably there are better ways to this.
#Data will be ordered in dataframes accordingly to use rowSums command.
attach(data_t0)
# Skala 1 (all Variables)
list_scale_1_t0 <- data.frame(t0_item1, t0_item2, t0_item3, t0_item4)
# scale a adds 1 to total score if yes
list_scale_1a_t0 <- data.frame(t0_item1, t0_item2)
# scale b adds 1 if no
list_scale_1b_t0 <- data.frame(t0_item3, t0_item4)
detach(data_t0)
# Count NAs and available data
# NAs
list_scale_1_t0$na <- rowSums(is.na(list_scale_1_t0))
# available data
list_scale_1_t0$all <- 4 - list_scale_1_t0$na
# 3. Now rowSums is used to count for yes and now respectively
# Scale
# 1 = yes (2 Items)
list_scale_1_t0$t0_p_scale_s1a <- rowSums(list_scale_1a_t0 == "yes", na.rm=T)
summary(list_scale_1_t0$t0_p_scale_s1a)
# 1 = no (2 Items)
list_scale_1_t0$t0_p_scale_s1b <- rowSums(list_scale_1b_t0 == "no", na.rm=T)
summary(list_scale_1_t0$t0_p_scale_s1b)
# sum both parts of the total indicator
list_scale_1_t0$t0_p_scale_s1 <- list_scale_1_t0$t0_p_scale_s1a + list_scale_1_t0$t0_p_scale_s1b
summary(list_scale_1_t0$t0_p_scale_s1)
# In order to not count an NA as 0 points:
list_scale_1_t0$t0_p_scale_s1_na <- list_scale_1_t0$t0_p_scale_s1 / list_scale_1_t0$all * 4
# Exclude data with more then one NA
# Skala 1: 4 Items -> min. 3 available / max. 1 NAs (25%)
list_scale_1_t0$t0_p_scale_s1_na[list_scale_1_t0$na>1] <- NA
summary(list_scale_1_t0$t0_p_scale_s1_na)
#The result is as I want it to be!
我尝试了各种带有功能的事情,有些事情确实起作用了。我希望能帮助您优化上述内容?
#Now this needs to be done multiple times and I am supposed to build a function to do it:
#Function is supposed to first adress data_t0 and then address data_t1 and
#do the calculations and add the result to data_t0 and data_t1
fkt.scale1 <- function(df_source, df_s1, df_s1a, df_s1b) {
# Take data for total indicator from source
df_s1 <- data.frame(df_source$item_1, df_source$item_2, df_source$item_3, df_source$item_4)
# count NAs and available data
df_s1$na <- rowSums(is.na(df_s1))
df_s1$all <- 4 - df_s1$na
# df_s1a create scale a (adds 1 if yes)
df_s1a <- data.frame(df_s1$item_1, df_s1$item_2)
# df_s1b create scale b (adds 1 if no)
df_s1b <- data.frame(df_s1$item_3, df_s1$item_4)
# count yes / no and add to index as seperate variables
df_s1$t0_p_scale_s1a <- rowSums(df_s1a == "yes", na.rm=T)
df_s1$t0_p_scale_s1b <- rowSums(df_s1b == "no", na.rm=T)
# Sum
df_s1$t0_p_scale_s1 <- df_s1$t0_p_scale_s1a + df_s1$t0_p_scale_s1b
df_source
#I kept trying different things and it would be too confusing to confusing to add more.
}
data_t0 <- fkt.scale1(data_t0, df_s1, df_s1a, df_s1b)
data_t1 <- fkt.scale1(data_t1, df_s1, df_s1a, df_s1b)
#Advice how to handle this better highly appreciated :)
谢谢!