我正在尝试解析并合并包含Ordnance Survey地址数据的大量csv文件。操作系统提供批处理文件,但我希望能够在Linux服务器上运行它,因为文件很大。
我尝试使用此very hand guide将bat文件转换为ssh。但是我正在努力调用这个程序。有人能够检查出了什么问题吗?
原始脚本(我在这里略微截断了它)
FOR /F %%A IN ('dir *.csv /b/s') DO (CALL :process "%%A" "%%~NA")
@rem -- merge the individual record identifier files
copy *_10_Records.csv Master_10_Records.out
copy *_11_Records.csv Master_11_Records.out
copy *_15_Records.csv Master_15_Records.out
copy *_21_Records.csv Master_21_Records.out
del *_Records.csv
@rem -- add header records to the individual record identifier files
copy Record_10_HEADER_Header.csv+Master_10_Records.out ID10_Header_Records.csv
copy Record_11_STREET_Header.csv+Master_11_Records.out ID11_Street_Records.csv
copy Record_15_STREETDESCRIPTOR_Header.csv+Master_15_Records.out ID15_StreetDesc_Records.csv
copy Record_21_BLPU_Header.csv+Master_21_Records.out ID21_BLPU_Records.csv
del *.out
pause
exit
@rem -- split the source csv into individual files based on the record identifier
:process
SET tempvar1=%~1
SET tempvar2=%~2
gawk < %tempvar1% -F "," "{ if ($1 == \"10\") { print $0 } }" > %tempvar2%_10_Records.csv
gawk < %tempvar1% -F "," "{ if ($1 == \"11\") { print $0 } }" > %tempvar2%_11_Records.csv
gawk < %tempvar1% -F "," "{ if ($1 == \"15\") { print $0 } }" > %tempvar2%_15_Records.csv
gawk < %tempvar1% -F "," "{ if ($1 == \"21\") { print $0 } }" > %tempvar2%_21_Records.csv
GOTO :EOF
我(差)尝试将其翻译为shell脚本语法:
FOR /F A IN ['dir *.csv /b/s'] DO (CALL :process "%%A" "%%~NA")
# merge the individual record identifier files
cp *_10_Records.csv Master_10_Records.out
cp *_11_Records.csv Master_11_Records.out
cp *_15_Records.csv Master_15_Records.out
cp *_21_Records.csv Master_21_Records.out
rm *_Records.csv
# add header records to the individual record identifier files
cp Record_10_HEADER_Header.csv+Master_10_Records.out ID10_Header_Records.csv
cp Record_11_STREET_Header.csv+Master_11_Records.out ID11_Street_Records.csv
cp Record_15_STREETDESCRIPTOR_Header.csv+Master_15_Records.out ID15_StreetDesc_Records.csv
cp Record_21_BLPU_Header.csv+Master_21_Records.out ID21_BLPU_Records.csv
rm *.out
sleep
exit
# split the source csv into individual files based on the record identifier
:process
export tempvar1=%~1
export tempvar2=%~2
gawk < %tempvar1% -F "," "{ if ($1 == \"10\") { print $0 } }" > %tempvar2%_10_Records.csv
gawk < %tempvar1% -F "," "{ if ($1 == \"11\") { print $0 } }" > %tempvar2%_11_Records.csv
gawk < %tempvar1% -F "," "{ if ($1 == \"15\") { print $0 } }" > %tempvar2%_15_Records.csv
gawk < %tempvar1% -F "," "{ if ($1 == \"21\") { print $0 } }" > %tempvar2%_21_Records.csv
GOTO :EOF
非常感谢任何帮助!我通过谷歌搜索了解了很多,但我还是新手。
答案 0 :(得分:2)
您可能想尝试以下内容。 (请注意,我实际上没有尝试过,但我希望它可以帮到你。)
#!/bin/bash
# split the source csv into individual files based on the record identifier
function process() {
tempvar1="$1"
tempvar2="$2"
awk < "$tempvar1" -F "," '{ if ($1 == "10") { print $0 } }' > "$tempvar2"_10_Records.csv
awk < "$tempvar1" -F "," '{ if ($1 == "11") { print $0 } }' > "$tempvar2"_11_Records.csv
awk < "$tempvar1" -F "," '{ if ($1 == "15") { print $0 } }' > "$tempvar2"_15_Records.csv
awk < "$tempvar1" -F "," '{ if ($1 == "21") { print $0 } }' > "$tempvar2"_21_Records.csv
}
for A in *.csv ; do
process "$A" "${A%.*}"
done
# merge the individual record identifier files
cat *_10_Records.csv > Master_10_Records.out
cat *_11_Records.csv > Master_11_Records.out
cat *_15_Records.csv > Master_15_Records.out
cat *_21_Records.csv > Master_21_Records.out
rm *_Records.csv
# add header records to the individual record identifier files
cat Record_10_HEADER_Header.csv Master_10_Records.out > ID10_Header_Records.csv
cat Record_11_STREET_Header.csv Master_11_Records.out > ID11_Street_Records.csv
cat Record_15_STREETDESCRIPTOR_Header.csv Master_15_Records.out > ID15_StreetDesc_Records.csv
cat Record_21_BLPU_Header.csv Master_21_Records.out > ID21_BLPU_Records.csv
rm *.out
echo -n "Press ENTER to continue..."
read DUMMY
exit
它的作用/差异:
子例程process
已在for循环之前移动并转换为bash函数。 (请参阅http://tldp.org/LDP/abs/html/functions.html)没有引号,因此代字号扩展(%~1
)可以简化为"$1"
。
for循环只会选择扩展名为csv的所有文件。我认为%%~NA
扩展为CMD中没有扩展名的文件名。在bash中${A%.*}
将删除扩展程序。请参阅Bash Reference Manual - Shell Parameter Expansion和https://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/percent.mspx。
DOS copy
命令不仅复制(如cp
),还连接多个文件,这些文件是使用bash脚本中的cat
完成的。请参阅http://support.microsoft.com/kb/240268。
pause命令打印提示并等待按下键。在bash中,您可以使用echo并等待按下ENTER,等待用户按ENTER键输入(空)虚拟行。