Docx4j - 如何获取docx复选框状态

时间:2016-04-22 16:31:04

标签: java checkbox docx docx4j

我正在尝试读取一堆格式相同的docx文件,并将数据提取到数据库中。我对文本没有任何问题,但我正在努力解决这些问题。我需要说我是docx4j的新手,但现在已经四天苦苦挣扎。我非常重视一些帮助/帮助/建议。

我附上了一份文件(test.docx),我正在努力阅读。我使用Word自己插入的第一个复选框由我的代码检测到,并作为CTSdtCell出现在初始传递中,但其他复选框则不是。它们似乎在文件中以不同的方式表示,CTObject,CTSHape,CTIMageData和CTControl,我找不到从这些或其中一个获取复选框的方法。

public static void main(String[] args) throws Exception {
    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File("test.docx"));      
    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
    Finder finder = new Finder(FldChar.class);
    new TraversalUtil(documentPart.getContent(), finder);
}

public static class Finder extends CallbackImpl {
    protected Class<?> typeToFind;
    protected Finder(Class<?> typeToFind) {
        this.typeToFind = typeToFind;
    }

    public List<Object> results = new ArrayList<Object>(); 

    @Override
    public List<Object> apply(Object o) {
        String txtVal="";
        System.out.println(o.getClass().getName());

        if (o instanceof org.docx4j.wml.CTSdtCell) {
            List<Object> objs = ((org.docx4j.wml.CTSdtCell)o).getSdtPr().getRPrOrAliasOrLock();
            findCheckbox(objs);
        }

        if (o instanceof org.docx4j.wml.SdtRun) {
            List<Object> objs = ((org.docx4j.wml.SdtRun)o).getSdtPr().getRPrOrAliasOrLock();
            findCheckbox(objs);
        }

        if (o instanceof org.docx4j.wml.SdtBlock) {
            List<Object> objs = ((org.docx4j.wml.SdtBlock)o).getSdtPr().getRPrOrAliasOrLock();
            findCheckbox(objs);
        }

        if (o instanceof org.docx4j.wml.Text) {
            System.out.println("      Text Value : "+((org.docx4j.wml.Text)o).getValue());
        }

        // Adapt as required
        if (o.getClass().equals(typeToFind)) {
            results.add(o);
        }
        return null;
    }

    private static void findCheckbox(List<Object> objs) {
        for (Object obj : objs) {
            if (obj instanceof javax.xml.bind.JAXBElement) {
                if (((javax.xml.bind.JAXBElement)obj).getDeclaredType().getName().equals("org.docx4j.w14.CTSdtCheckbox")) {
                    JAXBElement<CTSdtCheckbox> elem = ((javax.xml.bind.JAXBElement)obj);
                    org.docx4j.w14.CTSdtCheckbox cb = elem.getValue();
                    org.docx4j.w14.CTOnOff OnOff=cb.getChecked();
                    System.out.println("      CheckBox found with value="+OnOff.getVal());
                }
            }
        }
    }
}

结果是:

org.docx4j.wml.Tbl
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : WORK INSTRUCTION #
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Drawing
org.docx4j.dml.wordprocessingDrawing.Inline
org.docx4j.dml.CTBlip
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value :  
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : A
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value :  
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value :  
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : STEP BY STEP
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value :  
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : - 
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : WORK INSTRUCTION
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Drawing
org.docx4j.dml.wordprocessingDrawing.Inline
org.docx4j.dml.CTBlip
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : 1234567
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : TASK
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : Chlorine drum change
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : DATE
org.docx4j.wml.CTSdtCell
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : 12/07/2015
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : MACHINE
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : ORIGINATOR
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : D.GROVE
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : CLOCK NUMBER
org.docx4j.wml.CTSdtCell
      CheckBox found with value=1
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : ?
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : AREA
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : CHLORINE HOUSE
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : CHECKED
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value :  
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : (EXPERT)
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : J Clarke
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : CLOCK NUMBER
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : 4985
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : PPE 
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Drawing
org.docx4j.dml.wordprocessingDrawing.Anchor
org.docx4j.dml.CTBlip
org.docx4j.dml.CTColorChangeEffect
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : EYE
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Drawing
org.docx4j.dml.wordprocessingDrawing.Anchor
org.docx4j.dml.CTBlip
org.docx4j.dml.CTColorChangeEffect
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : EAR
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Drawing
org.docx4j.dml.wordprocessingDrawing.Anchor
org.docx4j.dml.CTBlip
org.docx4j.dml.CTColorChangeEffect
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : FOOT
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Drawing
org.docx4j.dml.wordprocessingDrawing.Anchor
org.docx4j.dml.CTBlip
org.docx4j.dml.CTColorChangeEffect
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : HEAD
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Drawing
org.docx4j.dml.wordprocessingDrawing.Anchor
org.docx4j.dml.CTBlip
org.docx4j.dml.CTColorChangeEffect
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : HAND
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.CTObject
org.docx4j.vml.CTShapetype
org.docx4j.vml.CTStroke
org.docx4j.vml.CTFormulas
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTF
org.docx4j.vml.CTPath
org.docx4j.vml.officedrawing.CTLock
org.docx4j.vml.CTShape
org.docx4j.vml.CTImageData
org.docx4j.wml.CTControl
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.CTObject
org.docx4j.vml.CTShape
org.docx4j.vml.CTImageData
org.docx4j.wml.CTControl
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.CTObject
org.docx4j.vml.CTShape
org.docx4j.vml.CTImageData
org.docx4j.wml.CTControl
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.CTObject
org.docx4j.vml.CTShape
org.docx4j.vml.CTImageData
org.docx4j.wml.CTControl
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.CTObject
org.docx4j.vml.CTShape
org.docx4j.vml.CTImageData
org.docx4j.wml.CTControl
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : COSHH
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : SPECIAL PPE REQUIREMENTS
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : *SITE 
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : R/A NUMBER
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : CONSIDERATION
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : PRODUCTS
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : B.A. EQUIPMENT
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : 12668
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.CTObject
org.docx4j.vml.CTShape
org.docx4j.vml.CTImageData
org.docx4j.wml.CTControl
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value : CHLORINE
org.docx4j.wml.R
org.docx4j.wml.Text
      Text Value :  GAS
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tr
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.Tc
org.docx4j.wml.P
org.docx4j.wml.P
org.docx4j.wml.CTBookmark
org.docx4j.wml.CTMarkupRange

我现在已经为包含其中一个难以捉摸的复选框的单元格添加了MainDocumentPart.getXML()的输出。我什么都看不到告诉我的价值。谁能告诉我我想要的东西呢?

<w:tc>
        <w:tcPr>
            <w:tcW w:w="1015" w:type="dxa"/>
            <w:tcBorders>
                <w:left w:val="single" w:color="auto" w:sz="24" w:space="0"/>
                <w:bottom w:val="single" w:color="auto" w:sz="24" w:space="0"/>
                <w:right w:val="single" w:color="auto" w:sz="24" w:space="0"/>
            </w:tcBorders>
            <w:vAlign w:val="center"/>
        </w:tcPr>
        <w:p w:rsidRPr="00A7008C" w:rsidR="00F909A4" w:rsidP="00017AE9" w:rsidRDefault="000F5760">
            <w:pPr>
                <w:jc w:val="center"/>
                <w:rPr>
                    <w:b/>
                    <w:color w:val="FFFFFF" w:themeColor="background1"/>
                </w:rPr>
            </w:pPr>
            <w:r>
                <w:rPr>
                    <w:b/>
                    <w:color w:val="FFFFFF" w:themeColor="background1"/>
                    <w:sz w:val="36"/>
                </w:rPr>
                <w:object w:dxaOrig="225" w:dyaOrig="225">
                    <v:shape type="#_x0000_t75" style="width:12pt;height:29.25pt" id="_x0000_i1063" o:ole="">
                        <v:imagedata o:title="" r:id="rId17"/>
                    </v:shape>
                    <w:control w:name="CheckBox11" w:shapeid="_x0000_i1063" r:id="rId18"/>
                </w:object>
            </w:r>
            <w:bookmarkEnd w:id="0"/>
        </w:p>
    </w:tc>

2 个答案:

答案 0 :(得分:0)

我破解了!! CTImageData指向可以通过文档关系访问的图像。这些图像包含勾选或未勾选的框。通过检查图像的大小,我可以判断它是什么。

我不了解Word而不是表面使用,并且不知道这些“复选框”是如何创建的,但似乎它们的创建方式与我的测试方式不同。因此,如果/当组织升级其MS Office软件,再次编辑和保存文档文件时,我不知道这些图像是否会发生变化。但是,我的软件需要在初始加载后迅速改变,因此这种风险对我来说意义不大。

答案 1 :(得分:0)

现有的复选框是旧版ActiveX控件:

          <w:object w:dxaOrig="225" w:dyaOrig="225">
            <v:shapetype id="_x0000_t75" coordsize="21600,21600" o:spt="75" o:preferrelative="t" path="m@4@5l@4@11@9@11@9@5xe" filled="f" stroked="f">
              <v:stroke joinstyle="miter"/>
              <v:formulas>
                :
              </v:formulas>
              <v:path o:extrusionok="f" gradientshapeok="t" o:connecttype="rect"/>
              <o:lock v:ext="edit" aspectratio="t"/>
            </v:shapetype>
            <v:shape id="_x0000_i1025" type="#_x0000_t75" style="width:12pt;height:29.25pt" o:ole="">
              <v:imagedata r:id="rId15" o:title=""/>
            </v:shape>
            <w:control r:id="rId16" w:name="CheckBox" w:shapeid="_x0000_i1025"/>
          </w:object>

您正在创建的是现代XML友好的复选框内容控件。

还有复选框字符和复选框表单字段......