如何从数据存档中提取一个特定文件。
folder/test_files_20160101.zip.gz
folder/test_files_20160102.zip.gz
.
.
.
folder/test_files_20170630.zip.gz
每个压缩文件夹包含
file_1_20160101.txt
file_2_20160101.txt
file_3_20160101.txt
我需要一个file_1_2016_2017.txt
的单个文件作为所有file_1_<YYYYMMDD>.txt
文件的串联,并跳过每个文件的标题行。
答案 0 :(得分:0)
akshay@db-3325:~$ cd /tmp/
# create sample file
akshay@db-3325:/tmp$ seq 1 10 >file_1_20160101.txt
akshay@db-3325:/tmp$ seq 10 20 >file_2_20160101.txt
# create tar
akshay@db-3325:/tmp$ tar -zcvf test1.zip.gz *.txt
file_1_20160101.txt
file_2_20160101.txt
akshay@db-3325:/tmp$ tar -zcvf test2.zip.gz file_2_20160101.txt
file_2_20160101.txt
# make sure they are created
akshay@db-3325:/tmp$ ls *.zip.gz
test1.zip.gz test2.zip.gz
# oneliner
akshay@db-3325:/tmp$ for i in *.zip.gz; do tar -xOzf "$i" --wildcards --no-anchored "file_1_*.txt" >tmpfile 2>>errorfile; [ "$?" -eq 0 ] && awk 'NR>1' tmpfile >>out_file || echo "Filename : $i" >>errorfile ; done ; rm tmpfile
# output
akshay@db-3325:/tmp/testdir$ cat out_file
2
3
4
5
6
7
8
9
10
# error log
akshay@db-3325:/tmp$ cat errorfile
tar: file_1_*.txt: Not found in archive
tar: Exiting with failure status due to previous errors
Filename : test2.zip.gz
更好的可读性
#!/usr/bin/env bash
# create sample files
seq 1 10 >file_1_20160101.txt
seq 10 20 >file_2_20160101.txt
# sample tar
tar -zcvf test1.zip.gz *.txt
tar -zcvf test2.zip.gz file_2_20160101.txt
# list them
ls *.zip.gz
# delete if exists before because we are appending
[ -f "errorfile" ] && rm errorfile
[ -f "out_file" ] && rm out_file
# loop through files, gnu tar is used
for i in *.zip.gz; do
tar -xOzf "$i" --wildcards --no-anchored "file_1_*.txt" >tmpfile 2>>errorfile;
# if exit status is ok then skip header and append
# else append name of file to error file
[ "$?" -eq 0 ] && awk 'NR>1' tmpfile >>out_file || echo "Filename : $i" >>errorfile;
done
# clean
rm tmpfile