Android + Apache POI,我需要阅读.docx并转换为文本

时间:2019-09-10 12:43:57

标签: java android apache-poi

我需要阅读.docx并转换为文本。我正在尝试使用库Apache POI,但出现一些错误

build.gradle

apply plugin: 'com.android.application'

apply plugin: 'kotlin-android'

apply plugin: 'kotlin-android-extensions'

android {
    compileSdkVersion 29
    defaultConfig {
        applicationId "by.roman.irtov.apachepoi"
        minSdkVersion 23
        targetSdkVersion 29
        versionCode 1
        versionName "1.0"
        multiDexEnabled true
    }
    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }

    compileOptions {
        sourceCompatibility = 1.8
        targetCompatibility = 1.8
    }

}

dependencies {
    implementation fileTree(dir: 'libs', include: ['*.jar'])
    implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk7:$kotlin_version"
    implementation 'androidx.appcompat:appcompat:1.0.2'
    implementation 'androidx.core:core-ktx:1.0.2'
    implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
    implementation 'com.android.support:multidex:1.0.3'
    implementation 'org.apache.poi:poi:4.1.0'
    implementation 'org.apache.poi:poi-ooxml:4.1.0'
}

应用

class App : MultiDexApplication() {

    override fun onCreate() {
        super.onCreate()
        MultiDex.install(this)
    }
}

块正在读取docx

private fun readDocx(){
        try {
            val dir = Environment.getExternalStorageDirectory()
            val yourFile = File(dir, "test.docx")
            val opcPackage = OPCPackage.open(yourFile, PackageAccess.READ)
            val docx = XWPFDocument(opcPackage)
            val wx = XWPFWordExtractor(docx)
            val text = wx.text
        } catch (ex: Exception) {
            ex.printStackTrace()
        }
    }

在一行-val docx = XWPFDocument(opcPackage)我遇到错误->

FATAL EXCEPTION: main
    Process: by.roman.irtov.apachepoi, PID: 8158
    java.lang.NoClassDefFoundError: Failed resolution of: Ljavax/xml/stream/XMLStreamReader;
        at org.apache.xmlbeans.XmlBeans.buildStreamToNodeMethod(XmlBeans.java:251)
        at org.apache.xmlbeans.XmlBeans.<clinit>(XmlBeans.java:138)
        at org.apache.xmlbeans.XmlBeans.typeLoaderForClassLoader(XmlBeans.java:719)
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.getTypeLoader(Unknown Source:25)
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source:0)
        at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:178)
        at org.apache.poi.ooxml.POIXMLDocument.load(POIXMLDocument.java:184)
        at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:138)
        at by.roman.irtov.apachepoi.MainActivity.openOpenPDF(MainActivity.kt:50)
        at by.roman.irtov.apachepoi.MainActivity.access$openOpenPDF(MainActivity.kt:15)
        at by.roman.irtov.apachepoi.MainActivity$onCreate$1.onClick(MainActivity.kt:22)
        at android.view.View.performClick(View.java:6608)
        at android.view.View.performClickInternal(View.java:6585)
        at android.view.View.access$3100(View.java:785)
        at android.view.View$PerformClick.run(View.java:25921)
        at android.os.Handler.handleCallback(Handler.java:873)
        at android.os.Handler.dispatchMessage(Handler.java:99)
        at android.os.Looper.loop(Looper.java:201)
        at android.app.ActivityThread.main(ActivityThread.java:6810)
        at java.lang.reflect.Method.invoke(Native Method)
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:547)
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:873)
     Caused by: java.lang.ClassNotFoundException: Didn't find class "javax.xml.stream.XMLStreamReader" on path: DexPathList[[zip file "/data/app/by.roman.irtov.apachepoi-dz-oAIxqfj8GBAesyNnFKA==/base.apk"],nativeLibraryDirectories=[/data/app/by.roman.irtov.apachepoi-dz-oAIxqfj8GBAesyNnFKA==/lib/arm64, /system/lib64]]
        at dalvik.system.BaseDexClassLoader.findClass(BaseDexClassLoader.java:134)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:379)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:312)
        at org.apache.xmlbeans.XmlBeans.buildStreamToNodeMethod(XmlBeans.java:251) 
        at org.apache.xmlbeans.XmlBeans.<clinit>(XmlBeans.java:138) 
        at org.apache.xmlbeans.XmlBeans.typeLoaderForClassLoader(XmlBeans.java:719) 
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.getTypeLoader(Unknown Source:25) 
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source:0) 
        at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:178) 
        at org.apache.poi.ooxml.POIXMLDocument.load(POIXMLDocument.java:184) 
        at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:138) 
        at by.roman.irtov.apachepoi.MainActivity.openOpenPDF(MainActivity.kt:50) 
        at by.roman.irtov.apachepoi.MainActivity.access$openOpenPDF(MainActivity.kt:15) 
        at by.roman.irtov.apachepoi.MainActivity$onCreate$1.onClick(MainActivity.kt:22) 
        at android.view.View.performClick(View.java:6608) 
        at android.view.View.performClickInternal(View.java:6585) 
        at android.view.View.access$3100(View.java:785) 
        at android.view.View$PerformClick.run(View.java:25921) 
        at android.os.Handler.handleCallback(Handler.java:873) 
        at android.os.Handler.dispatchMessage(Handler.java:99) 
        at android.os.Looper.loop(Looper.java:201) 
        at android.app.ActivityThread.main(ActivityThread.java:6810) 
        at java.lang.reflect.Method.invoke(Native Method) 
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:547) 
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:873) 

然后我尝试添加库->实现'javax.xml.stream:stax-api:1.0' 现在我出现错误->

 java.lang.RuntimeException: http://xml.org/sax/properties/declaration-handler
        at org.apache.xmlbeans.impl.store.Locale$SaxLoader.<init>(Locale.java:3397)
        at org.apache.xmlbeans.impl.store.Locale$XmlReaderSaxLoader.<init>(Locale.java:3087)
        at org.apache.xmlbeans.impl.store.Locale.getSaxLoader(Locale.java:3072)
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1272)
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1259)
        at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source:6)
        at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:178)
        at org.apache.poi.ooxml.POIXMLDocument.load(POIXMLDocument.java:184)
        at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:138)
        at by.roman.irtov.apachepoi.MainActivity.openOpenPDF(MainActivity.kt:50)
        at by.roman.irtov.apachepoi.MainActivity.access$openOpenPDF(MainActivity.kt:15)
        at by.roman.irtov.apachepoi.MainActivity$onCreate$1.onClick(MainActivity.kt:22)
        at android.view.View.performClick(View.java:6608)
        at android.view.View.performClickInternal(View.java:6585)
        at android.view.View.access$3100(View.java:785)
        at android.view.View$PerformClick.run(View.java:25921)
        at android.os.Handler.handleCallback(Handler.java:873)
        at android.os.Handler.dispatchMessage(Handler.java:99)
        at android.os.Looper.loop(Looper.java:201)
        at android.app.ActivityThread.main(ActivityThread.java:6810)
        at java.lang.reflect.Method.invoke(Native Method)
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:547)
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:873)
     Caused by: org.xml.sax.SAXNotRecognizedException: http://xml.org/sax/properties/declaration-handler
        at org.apache.harmony.xml.ExpatReader.setProperty(ExpatReader.java:162)
        at org.apache.xmlbeans.impl.store.Locale$SaxLoader.<init>(Locale.java:3391)
        at org.apache.xmlbeans.impl.store.Locale$XmlReaderSaxLoader.<init>(Locale.java:3087) 
        at org.apache.xmlbeans.impl.store.Locale.getSaxLoader(Locale.java:3072) 
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1272) 
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1259) 
        at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345) 
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source:6) 
        at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:178) 
        at org.apache.poi.ooxml.POIXMLDocument.load(POIXMLDocument.java:184) 
        at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:138) 
        at by.roman.irtov.apachepoi.MainActivity.openOpenPDF(MainActivity.kt:50) 
        at by.roman.irtov.apachepoi.MainActivity.access$openOpenPDF(MainActivity.kt:15) 
        at by.roman.irtov.apachepoi.MainActivity$onCreate$1.onClick(MainActivity.kt:22) 
        at android.view.View.performClick(View.java:6608) 
        at android.view.View.performClickInternal(View.java:6585) 
        at android.view.View.access$3100(View.java:785) 
        at android.view.View$PerformClick.run(View.java:25921) 
        at android.os.Handler.handleCallback(Handler.java:873) 
        at android.os.Handler.dispatchMessage(Handler.java:99) 
        at android.os.Looper.loop(Looper.java:201) 
        at android.app.ActivityThread.main(ActivityThread.java:6810) 
        at java.lang.reflect.Method.invoke(Native Method) 
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:547) 
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:873) 

谁知道如何正确使用Apache Poi? 在Android上可以吗? 有可行的例子吗? (将.docx转换为文本的Apache POI)

2 个答案:

答案 0 :(得分:0)

答案 1 :(得分:0)

我尝试使用xmlbeans-3.1.1。然后我添加了所有带有poi-bin-4.1.0-20190412.tar的jar文件(不带xmlbeans-3.1.0的情况下更改为xmlbeans-3.1.1)+实现“ stax:stax-api:1.0.1”有新的错误。调用时:val opcPackage = OPCPackage.open(yourFile,PackageAccess.READ_WRITE)val docx = XWPFDocument(opcPackage)<-我抓住了下一个错误:

2019-09-12 16:30:32.493 10006-10006/by.roman.irtov.apachepoi E/Roma: Error
    org.apache.poi.ooxml.POIXMLException: org.apache.xmlbeans.XmlException: error: The 'namespace-prefix' feature is not supported while the 'namespaces' feature is enabled.
        at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:267)
        at org.apache.poi.ooxml.POIXMLDocument.load(POIXMLDocument.java:184)
        at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:137)
        at by.roman.irtov.apachepoi.MainActivity.readDocx(MainActivity.kt:38)
        at by.roman.irtov.apachepoi.MainActivity.access$readDocx(MainActivity.kt:17)
        at by.roman.irtov.apachepoi.MainActivity$onCreate$1.onClick(MainActivity.kt:26)
        at android.view.View.performClick(View.java:6608)
        at android.view.View.performClickInternal(View.java:6585)
        at android.view.View.access$3100(View.java:785)
        at android.view.View$PerformClick.run(View.java:25921)
        at android.os.Handler.handleCallback(Handler.java:873)
        at android.os.Handler.dispatchMessage(Handler.java:99)
        at android.os.Looper.loop(Looper.java:201)
        at android.app.ActivityThread.main(ActivityThread.java:6810)
        at java.lang.reflect.Method.invoke(Native Method)
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:547)
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:873)
     Caused by: org.apache.xmlbeans.XmlException: error: The 'namespace-prefix' feature is not supported while the 'namespaces' feature is enabled.
        at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3465)
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1276)
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1263)
        at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source:6)
        at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:177)
        at org.apache.poi.ooxml.POIXMLDocument.load(POIXMLDocument.java:184) 
        at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:137) 
        at by.roman.irtov.apachepoi.MainActivity.readDocx(MainActivity.kt:38) 
        at by.roman.irtov.apachepoi.MainActivity.access$readDocx(MainActivity.kt:17) 
        at by.roman.irtov.apachepoi.MainActivity$onCreate$1.onClick(MainActivity.kt:26) 
        at android.view.View.performClick(View.java:6608) 
        at android.view.View.performClickInternal(View.java:6585) 
        at android.view.View.access$3100(View.java:785) 
        at android.view.View$PerformClick.run(View.java:25921) 
        at android.os.Handler.handleCallback(Handler.java:873) 
        at android.os.Handler.dispatchMessage(Handler.java:99) 
        at android.os.Looper.loop(Looper.java:201) 
        at android.app.ActivityThread.main(ActivityThread.java:6810) 
        at java.lang.reflect.Method.invoke(Native Method) 
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:547) 
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:873) 
     Caused by: org.xml.sax.SAXNotSupportedException: The 'namespace-prefix' feature is not supported while the 'namespaces' feature is enabled.
        at org.apache.harmony.xml.ExpatReader.parse(ExpatReader.java:258)
        at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3431)
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1276) 
        at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1263) 
        at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345) 
        at org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source:6) 
        at org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:177) 
        at org.apache.poi.ooxml.POIXMLDocument.load(POIXMLDocument.java:184) 
        at org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:137) 
        at by.roman.irtov.apachepoi.MainActivity.readDocx(MainActivity.kt:38) 
        at by.roman.irtov.apachepoi.MainActivity.access$readDocx(MainActivity.kt:17) 
        at by.roman.irtov.apachepoi.MainActivity$onCreate$1.onClick(MainActivity.kt:26) 
        at android.view.View.performClick(View.java:6608) 
        at android.view.View.performClickInternal(View.java:6585) 
        at android.view.View.access$3100(View.java:785) 
        at android.view.View$PerformClick.run(View.java:25921) 
        at android.os.Handler.handleCallback(Handler.java:873) 
        at android.os.Handler.dispatchMessage(Handler.java:99) 
        at android.os.Looper.loop(Looper.java:201) 
        at android.app.ActivityThread.main(ActivityThread.java:6810) 
        at java.lang.reflect.Method.invoke(Native Method) 
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:547) 
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:873) 

我正在尝试不同版本的库。但是没有一个图书馆(4.1.0、4.0.1、4.0.0、3.12和其他图书馆)尚未赚钱。但是Poi(ExtractorFactory)可以读取文件doc,而我可以获取文本,而poi(ExtractorFactory)可以正常工作。

 val dir = Environment.getExternalStorageDirectory()
        val yourFile = File(dir, "Hello.doc")
        val fileSystem = POIFSFileSystem(yourFile)
        val textExtractor = ExtractorFactory.createExtractor<WordExtractor>(fileSystem)

XWPFDocument(文件.docx)尚未在Android上运行。您也许可以在服务器上构建Apache POI并对其进行测试。但这不取决于我