<?php
$wordFrequencyArray = array();
function countWordsfrequency($filename) {
global $wordFrequencyArray;
$contentoffile = (file_get_contents($filename));
$wordArray = preg_split('/[^a-zA-Z0-9]/', $contentoffile, -1, NO_EMPTY);
foreach (array_count_values($wordArray) as $word => $count) {
if (!isset($wordFrequencyArray[$word])) $wordFrequencyArray[$word] = 0;
$wordFrequencyArray[$word] += $count;
}
}
$filenames = array('file1.txt', 'file2.txt','file3.txt','file4.txt');
foreach ($filenames as $filename) {
countWordsfrequency($filename);
}
print_r($wordFrequencyArray);
?>
这是我的代码,用于查找多个文件中每个单词的频率并打印出来。现在我想要做的是检查查找交叉点哪个单词出现在哪个文件中。例如,如果有一个单词“stack”我想打印它出现在哪些文件中,以及我认为我已经计算过的频率。
最终结果应该与出现单词的文件的频率相似。
我该如何处理?我应该在countWords
函数本身的for循环中检查它。
答案 0 :(得分:0)
您需要保存更多信息。我会回避使用类,因为你似乎不需要太强大的东西。
<?php
$wordFrequencies = array();
function countWordsFrequency($filename) {
global $wordFrequencies;
$contentoffile = (file_get_contents($filename));
$wordArray = preg_split('/[^a-zA-Z0-9]/', $contentoffile, -1, NO_EMPTY);
foreach (array_count_values($wordArray) as $word => $count) {
$wordFreqInfo = $wordFrequencies[$word];
if (!isset($wordFreqInfo)) {
$wordFreqInfo = array();
$wordFreqInfo['total'] = 0;
$wordFreqInfo['files'] = array();
$wordFrequencies[$word] = $wordFreqInfo;
}
// If this is the first occurence of this word in the file, start a count.
if (!isset($wordFreqInfo['files'][$filename]))
$wordFreqInfo['files'][$filename] = 0;
}
// Increment counts for both the total and the file.
$wordFreqInfo['total'] += $count;
$wordFreqInfo['files'][$filename] += $count;
}
}
$filenames = array('file1.txt', 'file2.txt','file3.txt','file4.txt');
foreach ($filenames as $filename) {
countWordsFrequency($filename);
}
print_r($wordFrequencies);
?>