我想编写一个代码来删除.csv文件的每一行的额外分隔符。使用另一个代码,我已经确定.csv文件只包含具有太多delmiters的行。我进一步知道哪个列(在第n个分隔符之后)有额外的分隔符。我已经编写了大部分代码,但它还没有工作。非常感谢帮助。
我的PHP技能仍然是基本的。
<?php
$delimiter = ';'; //type delimiter
$delimiter_start_column =23; //column-to-be-cleaned starts after this delimiter
$exp_delimiter =63; //expected delimiters per row
$total_delimiter =substr_count($line,$delimiter); //total delimiters in row
$delimiter_end_column =($exp_delimiter - $delimiter_start_column) + ($total_delimiter - $exp_delimiter); //column-to-be-cleaned ends before this delimiter
function splitleft($line,$delimiter,$delimiter_start_column){
$max = strlen($line);
$n = 0;
for($i=0;$i<$max;$i++){
if($line[$i]==$delimiter){
$n++;
if($n>=$delimiter_start_column){
break;
}
}
}
$arr[] = substr($line,0,$i);
return $arr;
}
function splitright($line,$delimiter,$delimiter_end_column){
$max = strlen($line);
$n = 0;
for($i=0;$i<$max;$i++){
if($line[$i]==$delimiter){
$n++;
if($n>=$delimiter_end_column){
break;
}
}
}
$arr[] = substr($line,$i,$max);
return $arr;
}
// determine start time in microseconds for runtime calculation
$file['datestamp'] = date("Y-m-d_H-i-s", $start);
$input['folder'] = 'input\\';
$input['file'] = ''; //enter filename
$output['folder'] = 'output\\';
$output['file_cleaned'] = $file['datestamp'].'_cleaned_';
// open input file read-only
$handle['input'] = @fopen($input['folder'].$input['file'], "r");
// initialize line, clean and dirty counters
$counter['total'] = 0;
$counter['cleaned'] = 0;
if($handle['input']) {
// open output files. set point to end of file.
$handle['cleaned'] = @fopen($output['folder'].$output['file_cleaned'].$input['file'], "a");
while(($line = fgets($handle['input'])) !== false) {
// increment line counter
$counter['total']++;
$result = substr_count($line, $delimiter);
if($result == $exp_delimiters AND $counter['line'] != 1 AND $line != $header) {
// if the number of delimiters matches the expected number as represented by $exp_delimiters
// increment clean lines counter
$counter['cleaned']++;
$output_file = $handle['cleaned'];
}
else {
// else, if the number of delimiters does not match the expectation
// remove extra delmiters from column
$line_cleaned = splitleft + str_replace(";","",substr($line,strlen(splitleft()),(strlen($line)-strlen(splitleft())-strlen(splitright()))) + splitright());
$output_file = $handle['cleaned'];
}
// prefix line number
$line = $counter['total'].$delimiter.$line;
// write line to correct output file
fwrite($output_file, $line_cleaned);
// output progress every 20.000 processed lines
if($counter['total'] % 20000 == 0) {
echo number_format($counter['total'], 0, ',', '.')."\r\n";
}
}
if(!feof($handle['input'])){
echo "Error: unexpected fgets() fail\n";
}
// close all input and output files
foreach($handle AS $close) {
fclose($close);
}
}
?>