Question

我曾经使用Stata进行编码，现在我必须学习R，这让我有些头痛。如果您告诉我如何改进它们，我会尝试问一些好问题，也不会介意。

在图像/表格中，您可以看到数据集和计算结果（无功能）。结果是to_p_scale_s1_na Table with data and calculated score

这是我计算分数（包括描述和创建数据集）的步骤：

# Create Data
t0_item1 <- c(0,1,0,1,NA,NA)
t0_item1 <- factor(t0_item1,    labels = c("yes", "no"))
t0_item2 <- c(1,1,0,1,0,1)
t0_item2 <- factor(t0_item2,    labels = c("yes", "no"))
t0_item3 <- c(1,0,1,1,0,NA)
t0_item3 <- factor(t0_item3,    labels = c("yes", "no"))
t0_item4 <- c(0,0,1,1,1,1)
t0_item4 <- factor(t0_item4,    labels = c("yes", "no"))
t1_item1 <- c(1,1,0,1,NA,0)
t1_item1 <- factor(t1_item1,    labels = c("yes", "no"))
t1_item2 <- c(1,0,0,1,NA,1)
t1_item2 <- factor(t1_item2,    labels = c("yes", "no"))
t1_item3 <- c(1,0,0,1,NA,1)
t1_item3 <- factor(t1_item3,    labels = c("yes", "no"))
t1_item4 <- c(0,0,0,1,NA,1)
t1_item4 <- factor(t1_item4,    labels = c("yes", "no"))
data_t0 <- data.frame(t0_item1, t0_item2, t0_item3, t0_item4)
data_t1 <- data.frame(t1_item1, t1_item2, t1_item3, t1_item4)
rm(t0_item1, t0_item2, t0_item3, t0_item4, t1_item1, t1_item2, t1_item3, t1_item4)

head(data_t0)

#Code without function

#Data / different datasets can be easily aranged in the same order. 
#But what comes now needs to be done many times for different points in time
#different scales with different possibilities to answere...
#In the excample:
#item 1 and 2 add 1 to total points of indicator, if answere is yes 
#item 3 and 4 add 1 to total points of indicator, if answere is no
#I had trouble using the data as they are coded as factors and with handling the NAs
# Probably there are better ways to this.

#Data will be ordered in dataframes accordingly to use rowSums command.

attach(data_t0)
# Skala 1 (all Variables)
list_scale_1_t0 <- data.frame(t0_item1, t0_item2, t0_item3, t0_item4)
# scale a adds 1 to total score if yes
list_scale_1a_t0 <- data.frame(t0_item1, t0_item2)
# scale b adds 1 if  no
list_scale_1b_t0 <- data.frame(t0_item3, t0_item4)
detach(data_t0)

# Count NAs and available data
# NAs
list_scale_1_t0$na <- rowSums(is.na(list_scale_1_t0))
# available data
list_scale_1_t0$all <- 4 - list_scale_1_t0$na

# 3. Now rowSums is used to count for  yes and now respectively
# Scale
# 1 = yes (2 Items)
list_scale_1_t0$t0_p_scale_s1a <- rowSums(list_scale_1a_t0 == "yes", na.rm=T)
summary(list_scale_1_t0$t0_p_scale_s1a)
# 1 = no (2 Items)
list_scale_1_t0$t0_p_scale_s1b <- rowSums(list_scale_1b_t0 == "no", na.rm=T)
summary(list_scale_1_t0$t0_p_scale_s1b)
# sum both parts of the total indicator
list_scale_1_t0$t0_p_scale_s1 <- list_scale_1_t0$t0_p_scale_s1a + list_scale_1_t0$t0_p_scale_s1b
summary(list_scale_1_t0$t0_p_scale_s1)

# In order to not count an NA as 0 points:
list_scale_1_t0$t0_p_scale_s1_na <- list_scale_1_t0$t0_p_scale_s1 / list_scale_1_t0$all * 4

# Exclude data with more then one NA
# Skala 1: 4 Items ->  min. 3 available / max. 1 NAs (25%)
list_scale_1_t0$t0_p_scale_s1_na[list_scale_1_t0$na>1] <- NA
summary(list_scale_1_t0$t0_p_scale_s1_na)

#The result is as I want it to be!

我尝试了各种带有功能的事情，有些事情确实起作用了。我希望能帮助您优化上述内容？

#Now this needs to be done multiple times and I am supposed to build a function to do it:
#Function is supposed to first adress data_t0 and then address data_t1 and 
#do the calculations and add the result to data_t0 and data_t1

fkt.scale1 <- function(df_source, df_s1, df_s1a, df_s1b) {
  # Take data for total indicator from source
  df_s1 <- data.frame(df_source$item_1, df_source$item_2, df_source$item_3, df_source$item_4)
  # count NAs and available data
  df_s1$na <- rowSums(is.na(df_s1))
  df_s1$all <- 4 - df_s1$na
  # df_s1a create scale a (adds 1 if yes)
  df_s1a <- data.frame(df_s1$item_1, df_s1$item_2)
  # df_s1b create scale b (adds 1 if no)
  df_s1b <- data.frame(df_s1$item_3, df_s1$item_4)
  #  count yes / no and add to index as seperate variables
  df_s1$t0_p_scale_s1a <- rowSums(df_s1a == "yes", na.rm=T)
  df_s1$t0_p_scale_s1b <- rowSums(df_s1b == "no", na.rm=T)
  # Sum
  df_s1$t0_p_scale_s1 <- df_s1$t0_p_scale_s1a + df_s1$t0_p_scale_s1b
  df_source
  #I kept trying different things and it would be too confusing to confusing to add more.
}

data_t0 <- fkt.scale1(data_t0, df_s1, df_s1a, df_s1b)
data_t1 <- fkt.scale1(data_t1, df_s1, df_s1a, df_s1b)

#Advice how to handle this better highly appreciated :)

谢谢！

计算问卷得分的功能（不同时间，因子变量）

0 个答案: