我试图创建一个使用Levenshtein距离函数的类来比较目录中所有其他文档中指定文档的文本。
我有基本的想法,但我不知道如何用PHP编写代码。我来自C#背景,所以我会提供尽可能详细的信息。
class ComputeLevenshtein
{
public $filePathList = new Array(); //The array that stores the absolute path of all documents within a specified directory
public $directory;
public $filePath; //This is the document that will be compared for each document in a directory
public function __construct() {
$this->directory = //;
/* I'm stuck here, once a user registers, a separate directory is
named after the user. I need to be able to read the username
from the Session Variable once the user logs in.
I'll just have to pass it in as a parameter.
Do I have to create a session wrapper?
If it's too complex,
then I'll just start off with a static directory */
}
// Returns the array containing each filePath for every document in a directory.
function computeFilePathList($directory)
{
for each file in Directory
{
$filepath = file.FilePath(); //store the filepath in a variable
$this->filePathList.add($filePath) //add the filepath to the array
}
}
function ($docFilePath) // returns the Levenshtein Distance
{
for each path in filePathList
{
$input= readDoc($docFilePath);
$lev = levenshtein($input, readDoc($path));
}
return $lev;
}
function readDoc($docFilePath) // Returns the raw text of that doc
{
//I Have the code for reading the doc in a seperate function
return $text;
}
}
答案 0 :(得分:1)
这个怎么样:
class Levenshtein
{
private $_p = array();
public function __construct($input, $compare)
{
$this->_p['input'] = $input;
$this->_p['compare'] = $compare; // string to check against
}
public function __get($property)
{
if (array_key_exists($property, $this->_p)) {
return $this->_p[$property];
}
if (!isset($this->_p['dist']) && $property === 'dist') {
$this->_p['dist'] = levenshtein($this->_p['input'],
$this->_p['compare']);
return $this->_p['dist'];
}
}
}
class DirectoryLevenshtein
{
private $_directory;
private $_filePath;
private $_distances = array();
public function __construct($directoryPath, $filePath = null)
{
if (!is_dir($directoryPath)) {
throw new Exception("Path '$directoryPath' does not exist");
}
if (substr($directoryPath, -1) !== '/') {
$directoryPath .= '/';
}
$this->_directory = $directoryPath;
if ($filePath !== null) {
if (!$this->setFilePath($filePath)) {
throw new Exception("File '$filePath' is not readable");
}
}
}
public function __get($file)
{
if (array_key_exists($file, $this->_distances)) {
return $this->_distances[$file];
}
if (is_readable($this->_directory . $file)) {
if (empty($this->_filePath)) {
return null;
}
$input = file_get_contents($this->_filePath);
$compare = file_get_contents($this->_directory . $file);
$this->_distances[$file] = new Levenshtein($input, $compare);
return $this->_distances[$file];
}
}
public function getDirectoryContents()
{
$files = scandir($this->_directory);
while ($files[0] === '.' || $files[0] === '..') {
array_shift($files);
}
return $files;
}
public function setFilePath($filePath)
{
if (empty($this->_filePath) && is_readable($filePath)) {
$this->_filePath = $filePath;
return true;
}
return false;
}
}
要使用它,请执行以下操作:
// could user session wrapper instead
$userDir = '/path/to/user/dirs/' . $_SESSION['user'];
// file to compare all files with
$filePath = /path/to/file.txt
$dirLev = new DirectoryLevenshtein($userDir, $filePath);
// Files in directory
$files = $dirLev->getDirectoryContents();
// Distances
foreach ($files as $file) {
echo "$file: {$dirLev->file->dist}\n";
}