从srt / txt文件中删除SOME换行符

时间:2014-03-09 14:01:44

标签: awk grep line-breaks

我有一个文本文件,其中包含编号条目,时间码和记录。我试图删除成绩单中的换行符并离开其他人。我正在尝试使用grep或awk。

文件就像

  

1
  00:00:27,160 - > 00:00:29054个
  有时没有太多的对话   
  2
  00:00:30,100 - > 00:00:31090个
  但其他时候还有很多,
  并将其格式化为两行   
  3
  00:00:31,500 - > 00:00:33,700
  我想删除仅在上的换行符   这些长行,留下所有其他格式   
  4
  00:00:33,805 - > 00:00:37285个
  所以所有对话最终都在一个单一的上   线无论多长一线。

输出如下:

  

1
  00:00:27,160 - > 00:00:29054个
  有时没有太多的对话   
  2
  00:00:30,100 - > 00:00:31090个
  但其他时间有相当多,   并将其格式化为两行   
  3
  00:00:31,500 - > 00:00:33,700
  我想只删除换行符   这些长行,留下所有其他格式   
  4
  00:00:33,805 - > 00:00:37285个
  因此,无论该线多长时间,所有对话最终都会在一条线上。

感谢所有提供帮助的人

5 个答案:

答案 0 :(得分:3)

不要依赖于使用任何特定字符开始(或不开始)的行 - 只需将每条记录中的第4行和后续行附加到该记录的第3行末尾:

$ awk '
BEGIN { RS=ORS=""; FS=OFS="\n" }
{
    print $1,$2,$3
    for (i=4;i<=NF;i++)
        printf " %s", $i
    print "\n\n"
}
' file
1
00:00:27,160 --> 00:00:29,054
Sometimes there's not much dialogue.

2
00:00:30,100 --> 00:00:31,090
But other times there is quite a bit, and it's formatted into two lines

3
00:00:31,500 --> 00:00:33,700
I want to remove the line breaks only on these long lines, leaving all other formatting.

4
00:00:33,805 --> 00:00:37,285
So that all dialogue ends up being on a single line no matter how long that line.

答案 1 :(得分:0)

删除所有以字母或空格或制表符开头的新行:

perl -pe 's/([a-zA-Z \t])\n$/$1/'

答案 2 :(得分:0)

我认为你需要像

这样的东西
awk '/[0-9]+/,/^$/{ if(NR<3) print $0; else {while($0!=""){ printf $0;next; }}}' file

这不起作用,但你可能会理解。

答案 3 :(得分:0)

您可以使用awk尝试类似的内容:

awk '!NF{print}/[a-z]/{printf "%s ", $0;next}1' file

$ cat file
1
00:00:27,160 --> 00:00:29,054
Sometimes there's not much dialogue.

2
00:00:30,100 --> 00:00:31,090
But other times there is quite a bit,
and it's formatted into two lines

3
00:00:31,500 --> 00:00:33,700
I want to remove the line breaks only on
these long lines, leaving all other formatting.

4
00:00:33,805 --> 00:00:37,285
So that all dialogue ends up being on a single
line no matter how long that line.

$ awk '!NF{print}/[a-z]/{printf "%s ", $0;next}1' file
1
00:00:27,160 --> 00:00:29,054
Sometimes there's not much dialogue.

2
00:00:30,100 --> 00:00:31,090
But other times there is quite a bit,  and it's formatted into two lines

3
00:00:31,500 --> 00:00:33,700
I want to remove the line breaks only on these long lines, leaving all other formatting.

4
00:00:33,805 --> 00:00:37,285
So that all dialogue ends up being on a single line no matter how long that line.

答案 4 :(得分:0)

我遇到了同样的问题,写了这个小代码,解决了我的问题:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(int argc, char *argv[]) {
FILE *quelle,*ziel;
int i;
long maxsub,count,tmp,sub;  
char puffer[10][200], *ptr,line[400];

if(argc != 3)
    {
    printf("Usage: srtlinejoin Filename CountOfSubtitles\n");
    return EXIT_FAILURE;
    }   

maxsub = strtol( argv[2], &ptr, 10);

if( (quelle=fopen(argv[1],"r")) == NULL) {
        fprintf(stderr, "Can't open %s\n", argv[1]);
        return EXIT_FAILURE;
    }
if( (ziel=fopen("out.srt","w")) == NULL) {
        fprintf(stderr, "Can't open out.srt\n");
    fclose(quelle);
        return EXIT_FAILURE;
    }

//read and write first line
fgets(puffer[0], 200, quelle);
fputs(puffer[0], ziel);

for(count=1; count < maxsub;count++)
//for(count=1; count <= 3;count++)
    {
    //printf("Processing subtitle %d\n",count);

    tmp=0;
    //Read and write time
    fgets(puffer[0], 200, quelle);
    fputs(puffer[0], ziel);

    do  {
        fgets(puffer[tmp], 200, quelle);
        //Scan for next Subtitle
        sub = strtol( puffer[tmp], &ptr, 10);
        tmp++;
        }
    while(sub != (count+1));

    //Der Untertitel hat nur eine Zeile
    if (strlen(puffer[1]) == 2)
        {
        fputs(puffer[0], ziel);     //New Subtitle
        fputs(puffer[1], ziel);     //Next empty line
        fputs(puffer[2], ziel);     //Next number
        }

    //Der Untertitel hat zwei Zeile
    if ((strlen(puffer[1]) > 2) && (strlen(puffer[2]) == 2))
        {
        for(i=0;i<400;i++)
            line[i]=0;

        strncpy(line,puffer[0],(strlen(puffer[0])-2));
        strcat(line," ");
        strcat(line,puffer[1]);
        fputs(line, ziel);      //New Subtitle
        fputs(puffer[2], ziel); //Next empty line
        fputs(puffer[3], ziel); //Next number
        }

    //Der Untertitel hat mehr als zwei Zeile
    if ((strlen(puffer[1]) == 2) && (strlen(puffer[2]) == 2))
        {
        printf("Attention: The subtitles has more than two lines\n");
        }
    }

printf("Check last subtitle!\n");

fclose(quelle);
fclose(ziel);
return EXIT_SUCCESS;
}