我有一个超过100万行的大文本文件(84mb)。
我想将它拆分为单独的50k行文件。我该怎么做?我在线搜索,但我一无所获。
答案 0 :(得分:1)
这是我的脚本的修改版本:Read through huge text files and store each line in database
set_time_limit(0);
// Split by Byte
splitData("b.php", __DIR__ . "/test", 1024 * 50); //Split 50Kb
// Split By line
splitLine("b.php", __DIR__ . "/test", 50000);
function splitData($filename, $destination, $chunkSize) {
$pathInfo = pathinfo($filename);
$handle = fopen($filename, 'rb');
$counter = 0;
if ($handle === false) {
return false;
}
while ( ! feof($handle) ) {
$counter ++;
$filePart = $destination . DIRECTORY_SEPARATOR . $pathInfo['filename'] . "_" . $counter . "." . $pathInfo['extension'];
touch($filePart);
file_put_contents($filePart, fread($handle, $chunkSize));
}
$status = fclose($handle);
return $status;
}
function splitLine($filename, $destination, $lineSize) {
$pathInfo = pathinfo($filename);
$handle = fopen($filename, 'rb');
$counter = 0;
$splitCount = 0;
if ($handle === false) {
return false;
}
$content = "";
while ( ($buffer = fgets($handle, 4096)) !== false ) {
$content .= $buffer;
$counter ++;
if ($counter >= $lineSize) {
$splitCount ++;
$filePart = $destination . DIRECTORY_SEPARATOR . $pathInfo['filename'] . "_" . $splitCount . "." . $pathInfo['extension'];
touch($filePart);
file_put_contents($filePart, $content);
$content = "";
$counter = 0;
}
}
$status = fclose($handle);
return $status;
}
答案 1 :(得分:0)
<?php
$handle = @fopen("/tmp/inputfile.txt", "r");
$maxLines = 50;
if ($handle) {
$counter = 1;
$fileCount = 1;
$data = array();
while (($buffer = fgets($handle, 4096)) !== false) {
$data[] = $buffer;
if(count($data) % $maxLines == 0) {
file_put_contents("filename{$fileCount}.txt", implode("\n\r", $data));
$data = array();
$fileCount++;
}
$counter++;
}
if (!feof($handle)) {
echo "Error: unexpected fgets() fail\n";
}
fclose($handle);
}
?>
这样的东西应该可以工作,虽然我永远不会推荐它,这不是解决问题的好办法。