从档案中提取特定文件gunzip

时间:2017-07-13 17:22:00

标签: file unix awk process append

如何从数据存档中提取一个特定文件。

folder/test_files_20160101.zip.gz folder/test_files_20160102.zip.gz . . . folder/test_files_20170630.zip.gz

每个压缩文件夹包含 file_1_20160101.txt file_2_20160101.txt file_3_20160101.txt

我需要一个file_1_2016_2017.txt的单个文件作为所有file_1_<YYYYMMDD>.txt文件的串联,并跳过每个文件的标题行。

1 个答案:

答案 0 :(得分:0)

akshay@db-3325:~$ cd /tmp/

# create sample file
akshay@db-3325:/tmp$ seq 1 10 >file_1_20160101.txt
akshay@db-3325:/tmp$ seq 10 20 >file_2_20160101.txt

# create tar
akshay@db-3325:/tmp$ tar -zcvf test1.zip.gz *.txt
file_1_20160101.txt
file_2_20160101.txt

akshay@db-3325:/tmp$ tar -zcvf test2.zip.gz file_2_20160101.txt 
file_2_20160101.txt

# make sure they are created
akshay@db-3325:/tmp$ ls *.zip.gz
test1.zip.gz  test2.zip.gz

# oneliner
akshay@db-3325:/tmp$ for i in *.zip.gz; do tar -xOzf "$i" --wildcards --no-anchored "file_1_*.txt" >tmpfile 2>>errorfile; [ "$?" -eq 0 ] && awk 'NR>1' tmpfile >>out_file || echo "Filename : $i" >>errorfile   ;  done ; rm tmpfile 

# output
akshay@db-3325:/tmp/testdir$ cat out_file 
2
3
4
5
6
7
8
9
10

# error log
akshay@db-3325:/tmp$ cat errorfile 
tar: file_1_*.txt: Not found in archive
tar: Exiting with failure status due to previous errors
Filename : test2.zip.gz

更好的可读性

#!/usr/bin/env bash

# create sample files
seq 1 10 >file_1_20160101.txt
seq 10 20 >file_2_20160101.txt

# sample tar
tar -zcvf test1.zip.gz *.txt
tar -zcvf test2.zip.gz file_2_20160101.txt 

# list them
ls *.zip.gz

# delete if exists before because we are appending
[ -f "errorfile" ] && rm errorfile
[ -f "out_file" ]  && rm out_file

# loop through files, gnu tar is used
for i in *.zip.gz; do 
      tar -xOzf "$i" --wildcards --no-anchored "file_1_*.txt" >tmpfile 2>>errorfile; 
      # if exit status is ok then skip header and append
      # else append name of file to error file
      [ "$?" -eq 0 ] && awk 'NR>1' tmpfile >>out_file || echo "Filename : $i" >>errorfile;  
done  

# clean
rm tmpfile