R:LS意味着分析会产生NA?

时间:2018-11-20 00:33:32

标签: r linear-regression lsmeans

我正在运行线性模型回归分析脚本,并且正在我的模型上运行emmeans(ls意思是),但是我不确定整个NA的原因...这是我运行的内容:

   setwd("C:/Users/wkmus/Desktop/R-Stuff")
    ### yeild-twt
    ASM_Data<-read.csv("ASM_FIELD_18_SUMM_wm.csv",header=TRUE, na.strings=".")
    head(ASM_Data)
    str(ASM_Data)
    ####"NA" values in table are labeled as "." colored orange
    ASM_Data$REP <- as.factor(ASM_Data$REP)
    head(ASM_Data$REP)
    ASM_Data$ENTRY_NO <-as.factor(ASM_Data$ENTRY_NO)
    head(ASM_Data$ENTRY_NO)
    ASM_Data$RANGE<-as.factor(ASM_Data$RANGE)
    head(ASM_Data$RANGE)
    ASM_Data$PLOT_ID<-as.factor(ASM_Data$PLOT_ID)
    head(ASM_Data$PLOT_ID)
    ASM_Data$PLOT<-as.factor(ASM_Data$PLOT)
    head(ASM_Data$PLOT)
    ASM_Data$ROW<-as.factor(ASM_Data$ROW)
    head(ASM_Data$ROW)
    ASM_Data$REP <- as.numeric(as.character(ASM_Data$REP))
    head(ASM_Data$REP)
    ASM_Data$TWT_g.li <- as.numeric(as.character(ASM_Data$TWT_g.li))
    ASM_Data$Yield_kg.ha <- as.numeric(as.character(ASM_Data$Yield_kg.ha))
    ASM_Data$PhysMat_Julian <- as.numeric(as.character(ASM_Data$PhysMat_Julian))
    ASM_Data$flowering <- as.numeric(as.character(ASM_Data$flowering))
    ASM_Data$height <- as.numeric(as.character(ASM_Data$height))
    ASM_Data$CLEAN.WT <- as.numeric(as.character(ASM_Data$CLEAN.WT))
    ASM_Data$GRAV.TEST.WEIGHT <-as.numeric(as.character(ASM_Data$GRAV.TEST.WEIGHT))
    str(ASM_Data)

    library(lme4)
    #library(lsmeans)
    library(emmeans)

以下是数据框:

  > str(ASM_Data)
'data.frame':   270 obs. of  20 variables:
 $ TRIAL_ID         : Factor w/ 1 level "18ASM_OvOv": 1 1 1 1 1 1 1 1 1 1 ...
 $ PLOT_ID          : Factor w/ 270 levels "18ASM_OvOv_002",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ PLOT             : Factor w/ 270 levels "2","3","4","5",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ ROW              : Factor w/ 20 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ RANGE            : Factor w/ 15 levels "1","2","3","4",..: 2 3 4 5 6 7 8 9 10 12 ...
 $ REP              : num  1 1 1 1 1 1 1 1 1 1 ...
 $ MP               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ SUB.PLOT         : Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 2 2 2 2 2 3 ...
 $ ENTRY_NO         : Factor w/ 139 levels "840","850","851",..: 116 82 87 134 77 120 34 62 48 136 ...
 $ height           : num  74 70 73 80 70 73 75 68 65 68 ...
 $ flowering        : num  133 133 134 134 133 131 133 137 134 132 ...
 $ CLEAN.WT         : num  1072 929 952 1149 1014 ...
 $ GRAV.TEST.WEIGHT : num  349 309 332 340 325 ...
 $ TWT_g.li         : num  699 618 663 681 650 684 673 641 585 646 ...
 $ Yield_kg.ha      : num  2073 1797 1841 2222 1961 ...
 $ Chaff.Color      : Factor w/ 3 levels "Bronze","Mixed",..: 1 3 3 1 1 1 1 3 1 3 ...
 $ CHAFF_COLOR_SCALE: int  2 1 1 2 2 2 2 1 2 1 ...
 $ PhysMat          : Factor w/ 3 levels "6/12/2018","6/13/2018",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ PhysMat_Julian   : num  163 163 163 163 163 163 163 163 163 163 ...
 $ PEDIGREE         : Factor w/ 1 level "OVERLEY/OVERLAND": 1 1 1 1 1 1 1 1 1 1 ...

这是ASM数据的标题:

 head(ASM_Data)
    `TRIAL_ID        PLOT_ID PLOT ROW RANGE REP MP SUB.PLOT ENTRY_NO height flowering CLEAN.WT GRAV.TEST.WEIGHT TWT_g.li`
    1 18ASM_OvOv 18ASM_OvOv_002    2   1     2   1  1        A      965     74       133   1071.5           349.37      699
    2 18ASM_OvOv 18ASM_OvOv_003    3   1     3   1  1        A      931     70       133    928.8           309.13      618
    3 18ASM_OvOv 18ASM_OvOv_004    4   1     4   1  1        A      936     73       134    951.8           331.70      663
    4 18ASM_OvOv 18ASM_OvOv_005    5   1     5   1  1        A      983     80       134   1148.6           340.47      681
    5 18ASM_OvOv 18ASM_OvOv_006    6   1     6   1  1        B      926     70       133   1014.0           324.95      650
    6 18ASM_OvOv 18ASM_OvOv_007    7   1     7   1  1        B      969     73       131   1076.6           342.09      684
      Yield_kg.ha Chaff.Color CHAFF_COLOR_SCALE   PhysMat PhysMat_Julian         PEDIGREE
    1        2073      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND
    2        1797       White                 1 6/12/2018            163 OVERLEY/OVERLAND
    3        1841       White                 1 6/12/2018            163 OVERLEY/OVERLAND
    4        2222      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND
    5        1961      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND
    6        2082      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND

我正在研究一个处理体重的线性模型。

这是我跑的:

ASM_Data$TWT_g.li <- as.numeric(as.character((ASM_Data$TWT_g.li))) head(ASM_Data$TWT_g.li)

ASM_YIELD_1 <- lm(TWT_g.li~ENTRY_NO + REP + SUB.BLOCK, data=ASM_Data)
anova(ASM_YIELD_1)
summary(ASM_YIELD_1)
emmeans(ASM_YIELD_1, "ENTRY_NO") ###########ADJ. MEANS

我得到方差分析的输出

anova(ASM_YIELD_1)
Analysis of Variance Table

Response: TWT_g.li
           Df Sum Sq Mean Sq  F value  Pr(>F)    
ENTRY_NO  138 217949    1579   7.0339 < 2e-16 ***
REP         1  66410   66410 295.7683 < 2e-16 ***
SUB.BLOCK   4   1917     479   2.1348 0.08035 .  
Residuals 125  28067     225                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

但是对于em,我得到的是这样的:

ENTRY_NO emmean SE df asymp.LCL asymp.UCL
 840      nonEst NA NA        NA        NA
 850      nonEst NA NA        NA        NA
 851      nonEst NA NA        NA        NA
 852      nonEst NA NA        NA        NA
 853      nonEst NA NA        NA        NA
 854      nonEst NA NA        NA        NA
 855      nonEst NA NA        NA        NA
 857      nonEst NA NA        NA        NA
 858      nonEst NA NA        NA        NA
 859      nonEst NA NA        NA        NA

我的数据中确实有离群值,以“。”表示。在我的数据中,但这是我唯一想到的是关闭状态。

当我运行with(ASM_Data, table(ENTRY_NO, REP, SUB.BLOCK))

这就是我所拥有的:

 with(ASM_Data, table(ENTRY_NO,REP,SUB.BLOCK))
, , SUB.BLOCK = A

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 0
     853 0 0
     854 0 0
     855 0 0
     857 0 0
     858 0 0
     859 0 0
     860 0 0
     861 0 0
     862 0 0
     863 1 0
     864 0 0
     865 1 0
     866 1 0
     867 0 0
     868 0 0
     869 1 0
     870 1 0
     871 0 0
     872 0 0
     873 0 0
     874 0 0
     875 0 0
     876 0 0
     877 0 0
     878 0 0
     879 1 0
     880 0 0
     881 0 0
     882 0 0
     883 0 0
     884 0 0
     885 1 0
     886 0 0
     887 1 0
     888 1 0
     889 1 0
     890 0 0
     891 1 0
     892 0 0
     893 0 0
     894 0 0
     895 0 0
     896 1 0
     897 0 0
     898 0 0
     899 0 0
     900 1 0
     901 1 0
     902 0 0
     903 0 0
     904 1 0
     905 1 0
     906 0 0
     907 1 0
     908 1 0
     909 0 0
     910 0 0
     911 0 0
     912 0 0
     913 0 0
     914 0 0
     915 0 0
     916 1 0
     917 0 0
     918 0 0
     919 1 0
     920 0 0
     921 0 0
     922 0 0
     923 1 0
     924 0 0
     925 0 0
     926 0 0
     927 1 0
     928 0 0
     929 0 0
     930 0 0
     931 1 0
     932 0 0
     933 0 0
     934 0 0
     935 0 0
     936 1 0
     937 0 0
     938 1 0
     939 1 0
     940 0 0
     941 1 0
     942 0 0
     943 1 0
     944 0 0
     945 0 0
     946 0 0
     947 0 0
     948 1 0
     949 0 0
     950 1 0
     951 0 0
     952 0 0
     953 0 0
     954 0 0
     955 1 0
     956 1 0
     957 1 0
     958 1 0
     959 0 0
     960 0 0
     961 0 0
     962 0 0
     963 0 0
     964 0 0
     965 1 0
     966 0 0
     967 1 0
     968 0 0
     969 0 0
     970 1 0
     971 0 0
     972 0 0
     973 0 0
     974 1 0
     975 0 0
     976 0 0
     977 0 0
     978 1 0
     979 0 0
     980 0 0
     981 0 0
     982 0 0
     983 1 0
     984 1 0
     985 0 0
     986 1 0
     987 3 0
     988 0 0

, , SUB.BLOCK = B

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 0
     853 1 0
     854 0 0
     855 0 0
     857 0 0
     858 0 0
     859 0 0
     860 0 0
     861 1 0
     862 0 0
     863 0 0
     864 0 0
     865 0 0
     866 0 0
     867 0 0
     868 0 0
     869 0 0
     870 0 0
     871 1 0
     872 0 0
     873 0 0
     874 0 0
     875 0 0
     876 1 0
     877 1 0
     878 1 0
     879 0 0
     880 1 0
     881 0 0
     882 1 0
     883 1 0
     884 1 0
     885 0 0
     886 0 0
     887 0 0
     888 0 0
     889 0 0
     890 1 0
     891 0 0
     892 1 0
     893 1 0
     894 1 0
     895 1 0
     896 0 0
     897 1 0
     898 0 0
     899 0 0
     900 0 0
     901 0 0
     902 1 0
     903 0 0
     904 0 0
     905 0 0
     906 0 0
     907 0 0
     908 0 0
     909 1 0
     910 0 0
     911 1 0
     912 0 0
     913 1 0
     914 0 0
     915 0 0
     916 0 0
     917 0 0
     918 0 0
     919 0 0
     920 1 0
     921 1 0
     922 0 0
     923 0 0
     924 0 0
     925 1 0
     926 1 0
     927 0 0
     928 0 0
     929 0 0
     930 1 0
     931 0 0
     932 1 0
     933 0 0
     934 1 0
     935 0 0
     936 0 0
     937 1 0
     938 0 0
     939 0 0
     940 1 0
     941 0 0
     942 0 0
     943 0 0
     944 0 0
     945 1 0
     946 0 0
     947 1 0
     948 0 0
     949 0 0
     950 0 0
     951 1 0
     952 0 0
     953 0 0
     954 1 0
     955 0 0
     956 0 0
     957 0 0
     958 0 0
     959 1 0
     960 0 0
     961 0 0
     962 1 0
     963 0 0
     964 0 0
     965 0 0
     966 0 0
     967 0 0
     968 0 0
     969 1 0
     970 0 0
     971 0 0
     972 0 0
     973 0 0
     974 0 0
     975 0 0
     976 1 0
     977 1 0
     978 0 0
     979 0 0
     980 0 0
     981 1 0
     982 1 0
     983 0 0
     984 0 0
     985 3 0
     986 0 0
     987 1 0
     988 1 0

, , SUB.BLOCK = C

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 0
     853 0 0
     854 0 0
     855 0 0
     857 1 0
     858 0 0
     859 1 0
     860 0 0
     861 0 0
     862 1 0
     863 0 0
     864 0 0
     865 0 0
     866 0 0
     867 0 0
     868 0 0
     869 0 0
     870 0 0
     871 0 0
     872 1 0
     873 0 0
     874 0 0
     875 0 0
     876 0 0
     877 0 0
     878 0 0
     879 0 0
     880 0 0
     881 1 0
     882 0 0
     883 0 0
     884 0 0
     885 0 0
     886 1 0
     887 0 0
     888 0 0
     889 0 0
     890 0 0
     891 0 0
     892 0 0
     893 0 0
     894 0 0
     895 0 0
     896 0 0
     897 0 0
     898 1 0
     899 1 0
     900 0 0
     901 0 0
     902 0 0
     903 1 0
     904 0 0
     905 0 0
     906 1 0
     907 0 0
     908 0 0
     909 0 0
     910 1 0
     911 0 0
     912 1 0
     913 0 0
     914 1 0
     915 1 0
     916 0 0
     917 1 0
     918 1 0
     919 0 0
     920 0 0
     921 0 0
     922 1 0
     923 0 0
     924 1 0
     925 0 0
     926 0 0
     927 0 0
     928 1 0
     929 1 0
     930 0 0
     931 0 0
     932 0 0
     933 1 0
     934 0 0
     935 1 0
     936 0 0
     937 0 0
     938 0 0
     939 0 0
     940 0 0
     941 0 0
     942 1 0
     943 0 0
     944 1 0
     945 0 0
     946 1 0
     947 0 0
     948 0 0
     949 1 0
     950 0 0
     951 0 0
     952 1 0
     953 1 0
     954 0 0
     955 0 0
     956 0 0
     957 0 0
     958 0 0
     959 0 0
     960 1 0
     961 1 0
     962 0 0
     963 1 0
     964 1 0
     965 0 0
     966 1 0
     967 0 0
     968 1 0
     969 0 0
     970 0 0
     971 1 0
     972 1 0
     973 1 0
     974 0 0
     975 1 0
     976 0 0
     977 0 0
     978 1 0
     979 2 0
     980 0 0
     981 0 0
     982 0 0
     983 0 0
     984 0 0
     985 1 0
     986 3 0
     987 0 0
     988 0 0

, , SUB.BLOCK = D

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 1
     853 0 0
     854 0 0
     855 0 0
     857 0 0
     858 0 1
     859 0 0
     860 0 1
     861 0 0
     862 0 0
     863 0 0
     864 0 1
     865 0 0
     866 0 0
     867 0 0
     868 0 0
     869 0 0
     870 0 0
     871 0 0
     872 0 0
     873 0 0
     874 0 0
     875 0 1
     876 0 0
     877 0 0
     878 0 1
     879 0 0
     880 0 1
     881 0 1
     882 0 1
     883 0 1
     884 0 1
     885 0 0
     886 0 0
     887 0 0
     888 0 0
     889 0 0
     890 0 0
     891 0 0
     892 0 1
     893 0 0
     894 0 0
     895 0 0
     896 0 0
     897 0 1
     898 0 0
     899 0 1
     900 0 0
     901 0 0
     902 0 1
     903 0 0
     904 0 0
     905 0 0
     906 0 0
     907 0 0
     908 0 0
     909 0 0
     910 0 0
     911 0 0
     912 0 0
     913 0 1
     914 0 1
     915 0 1
     916 0 0
     917 0 1
     918 0 1
     919 0 0
     920 0 0
     921 0 1
     922 0 1
     923 0 0
     924 0 0
     925 0 0
     926 0 0
     927 0 0
     928 0 0
     929 0 1
     930 0 1
     931 0 0
     932 0 0

有人可以告诉我发生了什么事吗?

谢谢!

2 个答案:

答案 0 :(得分:0)

通过对2次重复和5个区块(或更多)的预测结果求平均来获得EMM。看看

coef(ASM_YIELD_1)

如果任何rep或block效果都不可用,则您无法估算所有的rep或block效果,这使得它们的平均值不可估计。

您可以通过执行以下操作确切地查看哪些因子组合是不可估计的:

summary(ref_grid(ASM_YIELD_1))

附录

这是我在评论中要求的表格的重新格式化:

ENTRY   ---------- BLOCK -------------
 NO      A        B        C        D

 840    0 0      0 0      0 0      0 0
 850    0 0      0 0      0 0      0 0
 851    0 0      0 0      0 0      0 0
 852    0 0      0 0      0 0      0 1
 853    0 0      1 0      0 0      0 0
 854    0 0      0 0      0 0      0 0
 855    0 0      0 0      0 0      0 0
 857    0 0      0 0      1 0      0 0
 858    0 0      0 0      0 0      0 1
 859    0 0      0 0      1 0      0 0
    ... etc ...

这是极为稀疏的数据。我认为还有两个未显示的块。但是我看到很少有实例在多个rep或block中观察到给定的ENTRY_NO。因此,我认为尝试考虑此模型中的代表效应或阻滞效应严重不合适。

MAYBE从模型中省略REP将使其起作用。可以用factor(REP)代替REP重新拟合模型,这将使​​emmeans能够检测嵌套结构。否则,阻滞结构和治疗方法确实存在一些微妙的依赖性,我不知道该怎么建议。

答案 1 :(得分:0)

我已经能够创造这样的情况。考虑以下数据集:

> junk
   trt rep blk           y
1    A   1   1 -1.17415687
2    B   1   1 -0.20084854
3    C   1   1  0.64797806
4    A   1   2 -1.69371434
5    B   1   2 -0.35835442
6    C   1   2  1.35718782
7    A   1   3  0.20510482
8    B   1   3  1.00857651
9    C   1   3 -0.20553167
10   A   2   4  0.31261523
11   B   2   4  0.47989115
12   C   2   4  1.27574085
13   A   2   5 -0.79209520
14   B   2   5  1.07151315
15   C   2   5 -0.04222769
16   A   2   6 -0.80571767
17   B   2   6  0.80442988
18   C   2   6  1.73526561

这有6个完整的块,每个代表分别标记为3个块。 rep是具有值12的数字变量,而blk是具有6个级别1的因数,这不是很明显,但确实如此。 6

> sapply(junk, class)
      trt       rep       blk         y 
 "factor" "numeric"  "factor" "numeric"

有了这个完整的数据集,我可以毫不费力地获得用于建模情况的EMM,以与原始发布中使用的情况类似。但是,如果仅使用这些数据的一个子集,则情况有所不同。考虑:

> samp
[1]  1  2  3  5  8 11 13 15 16

> junk.lm = lm(y ~ trt + rep + blk, data = junk, subset = samp)
> emmeans(junk.lm, "trt")
 trt emmean SE df asymp.LCL asymp.UCL
 A   nonEst NA NA        NA        NA
 B   nonEst NA NA        NA        NA
 C   nonEst NA NA        NA        NA

Results are averaged over the levels of: blk 
Confidence level used: 0.95

同样,请记住,在此模型中,rep数字。如果相反,我将rep作为一个因素:

> junk.lmf = lm(y ~ trt + factor(rep) + blk, data = junk, subset = samp)
> emmeans(junk.lmf, "trt")
NOTE: A nesting structure was detected in the fitted model:
    blk %in% rep
If this is incorrect, re-run or update with `nesting` specified
 trt     emmean        SE df  lower.CL upper.CL
 A   -0.6262635 0.4707099  1 -6.607200 5.354673
 B    0.0789780 0.3546191  1 -4.426885 4.584841
 C    0.6597377 0.5191092  1 -5.936170 7.255646

Results are averaged over the levels of: blk, rep 
Confidence level used: 0.95

我们得到非NA的估计值,部分原因是它能够检测到blk嵌套在rep中的事实,从而在每个rep中分别执行EMM计算。请注意,在最后一个输出的注释中,平均是在2个重复和6个块上完成的;而在fiber.lm中,平均仅对块进行,而rep(一个数字变量)被设置为其平均值。比较两个模型的参考网格:

> ref_grid(junk.lm)
'emmGrid' object with variables:
    trt = A, B, C
    rep = 1.4444
    blk = 1, 2, 3, 4, 5, 6

> ref_grid(junk.lmf)
'emmGrid' object with variables:
    trt = A, B, C
    rep = 1, 2
    blk = 1, 2, 3, 4, 5, 6
Nesting structure:  blk %in% rep

另一种选择是通过简单地从模型中省略rep来避免嵌套问题:

> junk.lm.norep = lm(y ~ trt + blk, data = junk, subset = samp)
> emmeans(junk.lm.norep, "trt")
 trt     emmean        SE df  lower.CL upper.CL
 A   -0.6262635 0.4707099  1 -6.607200 5.354673
 B    0.0789780 0.3546191  1 -4.426885 4.584841
 C    0.6597377 0.5191092  1 -5.936170 7.255646

Results are averaged over the levels of: blk 
Confidence level used: 0.95

请注意,产生的结果完全相同。原因是blk的水平已经可以预测rep的水平,因此模型中不需要它。

总结:

  • 这种情况部分归因于缺少数据的事实
  • 部分是因为rep在模型中是数字预测变量,而不是因素。
  • 根据您的情况,我建议使用factor(REP)(而不是REP)重新拟合模型作为数值预测变量。这可能足以产生估计值。
  • 如果实际上,例如在我的示例中,SUB.BLOCK级别预测了REP级别,则将REP完全排除在模型之外。