我找到了一个将.doc,.docx和.xls / .xlsx文件转换为文本格式的脚本。它成功地将.doc和.docx文件转换为文本格式。但是,当我尝试转换Excel文件时,它会显示以下错误消息。
你知道为什么它会告诉我这个错误信息,我该如何修复它?谢谢。
错误消息:
Warning: Missing argument 1 for DocxConversion::xlsx_to_text(), called in D:\software installed\xampp\htdocs\contact-management\class-free.php on line 105 and defined in D:\software installed\xampp\htdocs\contact-management\class-free.php on line 48
Notice: Undefined variable: input_file in D:\software installed\xampp\htdocs\contact-management\class-free.php on line 52
Warning: ZipArchive::open(): Empty string as source in D:\software installed\xampp\htdocs\contact-management\class-free.php on line 52
Php Class(class-free.php):
<?php
class DocxConversion{
private $filename;
public function __construct($filePath) {
$this->filename = $filePath;
}
private function read_doc() {
$doc = new doc;
$doc->read($this->filename);
return $doc->parse();
}
private function read_docx(){
$striped_content = '';
$content = '';
$zip = zip_open($this->filename);
if (!$zip || is_numeric($zip)) return false;
while ($zip_entry = zip_read($zip)) {
if (zip_entry_open($zip, $zip_entry) == FALSE) continue;
if (zip_entry_name($zip_entry) != "word/document.xml") continue;
$content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
zip_entry_close($zip_entry);
}// end while
zip_close($zip);
$content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
$content = str_replace('</w:r></w:p>', "\r\n", $content);
$striped_content = strip_tags($content);
return $striped_content;
}
/************************excel sheet************************************/
function xlsx_to_text($input_file){
$xml_filename = "xl/sharedStrings.xml"; //content file name
$zip_handle = new ZipArchive;
$output_text = "";
if(true === $zip_handle->open($input_file)){
if(($xml_index = $zip_handle->locateName($xml_filename)) !== false){
$xml_datas = $zip_handle->getFromIndex($xml_index);
$xml_handle = DOMDocument::loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
$output_text = strip_tags($xml_handle->saveXML());
}else{
$output_text .="";
}
$zip_handle->close();
}else{
$output_text .="";
}
return $output_text;
}
/*************************power point files*****************************/
function pptx_to_text($input_file){
$zip_handle = new ZipArchive;
$output_text = "";
if(true === $zip_handle->open($input_file)){
$slide_number = 1; //loop through slide files
while(($xml_index = $zip_handle->locateName("ppt/slides/slide".$slide_number.".xml")) !== false){
$xml_datas = $zip_handle->getFromIndex($xml_index);
$xml_handle = DOMDocument::loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
$output_text .= strip_tags($xml_handle->saveXML());
$slide_number++;
}
if($slide_number == 1){
$output_text .="";
}
$zip_handle->close();
}else{
$output_text .="";
}
return $output_text;
}
public function convertToText() {
if(isset($this->filename) && !file_exists($this->filename)) {
// return "File Not exists";
}
$fileArray = pathinfo($this->filename);
$file_ext = $fileArray['extension'];
if($file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx")
{
if($file_ext == "doc") {
return $this->read_doc();
} elseif($file_ext == "docx") {
return $this->read_docx();
} elseif($file_ext == "xlsx") {
return $this->xlsx_to_text();
}elseif($file_ext == "pptx") {
return $this->pptx_to_text();
}
} else {
return "Invalid File Type";
}
}
}
//$docObj = new DocxConversion("test102.doc");
//$docObj = new DocxConversion("content.doc");
//$docObj = new DocxConversion("english.doc");
//$docObj = new DocxConversion("content.docx");
//$docObj = new DocxConversion("test.xlsx");
//$docObj = new DocxConversion("test.pptx");
//echo $docText= $docObj->convertToText();
?>
答案 0 :(得分:0)
问题在于convertToText()
方法,它调用不带参数的不同转换器,而它们都需要$input_file
参数。
此修复必须使其正常工作:
if($file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx") {
if($file_ext == "doc") {
return $this->read_doc($this->filename);
} elseif($file_ext == "docx") {
return $this->read_docx($this->filename);
} elseif($file_ext == "xlsx") {
return $this->xlsx_to_text($this->filename);
}elseif($file_ext == "pptx") {
return $this->pptx_to_text($this->filename);
} else {
return "Invalid File Type";
}
}
另外,请更改此行:
$xml_handle = DOMDocument::loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
使用此代码:
$xml_handle = new DOMDocument();
$xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
因为DOMDocument::loadXML(..)
是一个实例方法。它的原型是:
public mixed loadXML ( string $source [, int $options = 0 ] )
有关详细信息,请参阅PHP:DOMDocument。