我已经在PHPStorm(IDE)上安装并设置了PHP Word。我正在尝试使用PHPWord从下面标题为“ helloWorld.docx”的单词文档中读取“ 从昨天开始学习,活在今天,希望明天... ”这一行。
这是到目前为止我加载和阅读文档的代码:
<?php
require_once 'PHPWord/bootstrap.php';
$objReader = \PhpOffice\PhpWord\IOFactory::createReader("Word2007");
$phpWord = $objReader->load("helloWorld.docx");
$sections = $phpWord->getSection(0);
echo var_dump($sections);
输出:
/usr/bin/php7.2 /home/wade/PhpstormProjects/getWord/readDoc.php
object(PhpOffice\PhpWord\Element\Section)#21 (21) {
["container":protected]=>
string(7) "Section"
["style":"PhpOffice\PhpWord\Element\Section":private]=>
object(PhpOffice\PhpWord\Style\Section)#22 (32) {
["orientation":"PhpOffice\PhpWord\Style\Section":private]=>
string(8) "portrait"
["paper":"PhpOffice\PhpWord\Style\Section":private]=>
object(PhpOffice\PhpWord\Style\Paper)#14 (8) {
["sizes":"PhpOffice\PhpWord\Style\Paper":private]=>
array(7) {
["A3"]=>
array(3) {
[0]=>
int(297)
[1]=>
int(420)
[2]=>
string(2) "mm"
}
["A4"]=>
array(3) {
[0]=>
int(210)
[1]=>
int(297)
[2]=>
string(2) "mm"
}
["A5"]=>
array(3) {
[0]=>
int(148)
[1]=>
int(210)
[2]=>
string(2) "mm"
}
["B5"]=>
array(3) {
[0]=>
int(176)
[1]=>
int(250)
[2]=>
string(2) "mm"
}
["Folio"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(13)
[2]=>
string(2) "in"
}
["Legal"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(14)
[2]=>
string(2) "in"
}
["Letter"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(11)
[2]=>
string(2) "in"
}
}
["size":"PhpOffice\PhpWord\Style\Paper":private]=>
string(2) "A4"
["width":"PhpOffice\PhpWord\Style\Paper":private]=>
float(11905.511811024)
["height":"PhpOffice\PhpWord\Style\Paper":private]=>
float(16837.795275591)
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["aliases":protected]=>
array(0) {
}
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["pageSizeW":"PhpOffice\PhpWord\Style\Section":private]=>
string(15) "11905.511811024"
["pageSizeH":"PhpOffice\PhpWord\Style\Section":private]=>
string(15) "16837.795275591"
["marginTop":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginLeft":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginRight":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginBottom":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["gutter":"PhpOffice\PhpWord\Style\Section":private]=>
string(1) "0"
["headerHeight":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["footerHeight":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["pageNumberingStart":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["colsNum":"PhpOffice\PhpWord\Style\Section":private]=>
int(1)
["colsSpace":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["breakType":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["lineNumbering":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["borderTopSize":protected]=>
NULL
["borderTopColor":protected]=>
NULL
["borderTopStyle":protected]=>
NULL
["borderLeftSize":protected]=>
NULL
["borderLeftColor":protected]=>
NULL
["borderLeftStyle":protected]=>
NULL
["borderRightSize":protected]=>
NULL
["borderRightColor":protected]=>
NULL
["borderRightStyle":protected]=>
NULL
["borderBottomSize":protected]=>
NULL
["borderBottomColor":protected]=>
NULL
["borderBottomStyle":protected]=>
NULL
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["aliases":protected]=>
array(0) {
}
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["headers":"PhpOffice\PhpWord\Element\Section":private]=>
array(0) {
}
["footers":"PhpOffice\PhpWord\Element\Section":private]=>
array(0) {
}
["footnoteProperties":"PhpOffice\PhpWord\Element\Section":private]=>
NULL
["elements":protected]=>
array(4) {
[0]=>
object(PhpOffice\PhpWord\Element\TextRun)#34 (18) {
["container":protected]=>
string(7) "TextRun"
["paragraphStyle":protected]=>
object(PhpOffice\PhpWord\Style\Paragraph)#35 (34) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["basedOn":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(6) "Normal"
["next":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["alignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(0) ""
["indentation":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["lineHeight":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["widowControl":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(true)
["keepNext":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["keepLines":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["pageBreakBefore":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["numStyle":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["numLevel":"PhpOffice\PhpWord\Style\Paragraph":private]=>
int(0)
["tabs":"PhpOffice\PhpWord\Style\Paragraph":private]=>
array(0) {
}
["shading":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["contextualSpacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["bidi":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["textAlignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["suppressAutoHyphens":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["borderTopSize":protected]=>
NULL
["borderTopColor":protected]=>
NULL
["borderTopStyle":protected]=>
NULL
["borderLeftSize":protected]=>
NULL
["borderLeftColor":protected]=>
NULL
["borderLeftStyle":protected]=>
NULL
["borderRightSize":protected]=>
NULL
["borderRightColor":protected]=>
NULL
["borderRightStyle":protected]=>
NULL
["borderBottomSize":protected]=>
NULL
["borderBottomColor":protected]=>
NULL
["borderBottomStyle":protected]=>
NULL
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["elements":protected]=>
array(1) {
[0]=>
object(PhpOffice\PhpWord\Element\Text)#41 (18) {
["text":protected]=>
string(134) ""Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning." (Albert Einstein)"
["fontStyle":protected]=>
object(PhpOffice\PhpWord\Style\Font)#43 (28) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["type":"PhpOffice\PhpWord\Style\Font":private]=>
string(4) "text"
["name":"PhpOffice\PhpWord\Style\Font":private]=>
string(15) "Times New Roman"
["hint":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["size":"PhpOffice\PhpWord\Style\Font":private]=>
int(20)
["color":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["bold":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["italic":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["underline":"PhpOffice\PhpWord\Style\Font":private]=>
string(4) "none"
["superScript":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["subScript":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["strikethrough":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["doubleStrikethrough":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["smallCaps":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["allCaps":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["fgColor":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["scale":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["kerning":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["paragraph":"PhpOffice\PhpWord\Style\Font":private]=>
object(PhpOffice\PhpWord\Style\Paragraph)#42 (34) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["basedOn":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(6) "Normal"
["next":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["alignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(0) ""
["indentation":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["lineHeight":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["widowControl":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(true)
["keepNext":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["keepLines":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["pageBreakBefore":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
完整的输出内容太长,无法发布,但是如果您向下滚动,则可以在此代码段中看到我正在寻找的字符串
我的主要问题是“ 是否可以找到一种无需使用var_dump并搜索大量输出的字符串?”
答案 0 :(得分:1)
文本信息位于[text]
属性中,而属性又嵌套在[elements]
属性中。只需使用浏览器的“在文本中查找内容”功能在浏览器中找到的对象中搜索它们,即可查看要搜索的文本。
这两个属性受到保护,因此您必须将它们公开,才能访问/提取它们。
在PHPWord库中定义以下属性的位置:https://stackoverflow.com/a/50989007/8510094
一旦将它们制成public
,就可以开始切除接收到的对象的每一层,从而访问[elements]->[text]
属性仅位于“树”下一层的对象。 / p>
因此,算法是:1)找到这些[text]
属性,2)查看拥有这些属性的对象的路径,3)逐级切下更高级别的对象和数组,4)得到一个[elements]->[text]
属性只是第二级的对象,5)将[text]
属性的所有值收集在一个数组中。
请勿尝试使用foreach
循环,递归函数等来尝试访问文本。产生的对象是巨大的。您将不会获得足够大的内存或时间来迭代,展平,减少等大型多维关联数据数组。
或者,可以将PHPWord库文件进行某些更改,并且在将Word文件加载到PHPWord中时,不会在生成的对象中获得不必要的属性和值(样式,段落信息等)。
在PHPSpreadsheet中,他们实现了一种仅从Excel文件中获取实际数据的方法(格式,样式信息等均已剥离)。另一方面,PHPWord还声明了$readDataOnly
属性,但它们仅在此处停止,并且由于某种原因未实现仅读取实际文本数据的机制。