我已经在网站上查找答案,我似乎无法找到问题的解决方案。
我尝试过使用此
$cleaned = strval(str_replace("\0", "", $string));
我也尝试过:
但是我总是错误file_exists()
期望参数1是一个有效的路径,字符串在
但是我可以使用file_get_contents()
,它将打开该目录中的文档。但是,当我将docx文件传递给文本时,它将无法打开目录。
这是我的代码:
class DocxConversion{
private $filename;
public function __construct($filePath) {
$this->filename = $filePath;
}
private function read_doc() {
$fileHandle = fopen($this->filename, "r");
$line = @fread($fileHandle, filesize($this->filename));
$lines = explode(chr(0x0D),$line);
$outtext = "";
foreach($lines as $thisline)
{
$pos = strpos($thisline, chr(0x00));
if (($pos !== FALSE)||(strlen($thisline)==0))
{
} else {
$outtext .= $thisline." ";
}
}
$outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
return $outtext;
}
private function read_docx(){
$striped_content = '';
$content = '';
$zip = zip_open($this->filename);
if (!$zip || is_numeric($zip)) return false;
while ($zip_entry = zip_read($zip)) {
if (zip_entry_open($zip, $zip_entry) == FALSE) continue;
if (zip_entry_name($zip_entry) != "word/document.xml") continue;
$content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
zip_entry_close($zip_entry);
}// end while
zip_close($zip);
$content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
$content = str_replace('</w:r></w:p>', "\r\n", $content);
$striped_content = strip_tags($content);
return $striped_content;
}
public function convertToText() {
if(isset($this->filename) && !file_exists($this->filename)) {
return "File Not exists";
}
$fileArray = pathinfo($this->filename);
$file_ext = $fileArray['extension'];
echo $file_ext;
if($ext == "doc" || $ext == "docx" || $ext == "xlsx" || $ext == "pptx")
{
if($file_ext == "doc") {
return $this->read_doc();
} elseif($file_ext == "docx") {
return $this->read_docx();
} elseif($file_ext == "xlsx") {
return $this->xlsx_to_text();
}elseif($file_ext == "pptx") {
return $this->pptx_to_text();
}
} else {
return "Invalid File Type";
}
}
}
//calling class to scrape text from file
//echo $attachment['name'] . "<br />";
$filenames = file_get_contents('../api/attachmentCV/' . $attachment['name']);
//echo $filename;
$docObj = new DocxConversion($filenames);
//converting string to text
$docText= $docObj->convertToText();
echo $docText;