我有一个文本文件,其中包含编号条目,时间码和记录。我试图删除成绩单中的换行符并离开其他人。我正在尝试使用grep或awk。
文件就像
1
00:00:27,160 - > 00:00:29054个
有时没有太多的对话
2
00:00:30,100 - > 00:00:31090个
但其他时候还有很多,
并将其格式化为两行
3
00:00:31,500 - > 00:00:33,700
我想删除仅在上的换行符 这些长行,留下所有其他格式
4
00:00:33,805 - > 00:00:37285个
所以所有对话最终都在一个单一的上 线无论多长一线。
输出如下:
1
00:00:27,160 - > 00:00:29054个
有时没有太多的对话
2
00:00:30,100 - > 00:00:31090个
但其他时间有相当多, 并将其格式化为两行
3
00:00:31,500 - > 00:00:33,700
我想只删除换行符 这些长行,留下所有其他格式
4
00:00:33,805 - > 00:00:37285个
因此,无论该线多长时间,所有对话最终都会在一条线上。
感谢所有提供帮助的人
答案 0 :(得分:3)
不要依赖于使用任何特定字符开始(或不开始)的行 - 只需将每条记录中的第4行和后续行附加到该记录的第3行末尾:
$ awk '
BEGIN { RS=ORS=""; FS=OFS="\n" }
{
print $1,$2,$3
for (i=4;i<=NF;i++)
printf " %s", $i
print "\n\n"
}
' file
1
00:00:27,160 --> 00:00:29,054
Sometimes there's not much dialogue.
2
00:00:30,100 --> 00:00:31,090
But other times there is quite a bit, and it's formatted into two lines
3
00:00:31,500 --> 00:00:33,700
I want to remove the line breaks only on these long lines, leaving all other formatting.
4
00:00:33,805 --> 00:00:37,285
So that all dialogue ends up being on a single line no matter how long that line.
答案 1 :(得分:0)
删除所有以字母或空格或制表符开头的新行:
perl -pe 's/([a-zA-Z \t])\n$/$1/'
答案 2 :(得分:0)
我认为你需要像
这样的东西awk '/[0-9]+/,/^$/{ if(NR<3) print $0; else {while($0!=""){ printf $0;next; }}}' file
这不起作用,但你可能会理解。
答案 3 :(得分:0)
您可以使用awk
尝试类似的内容:
awk '!NF{print}/[a-z]/{printf "%s ", $0;next}1' file
$ cat file
1
00:00:27,160 --> 00:00:29,054
Sometimes there's not much dialogue.
2
00:00:30,100 --> 00:00:31,090
But other times there is quite a bit,
and it's formatted into two lines
3
00:00:31,500 --> 00:00:33,700
I want to remove the line breaks only on
these long lines, leaving all other formatting.
4
00:00:33,805 --> 00:00:37,285
So that all dialogue ends up being on a single
line no matter how long that line.
$ awk '!NF{print}/[a-z]/{printf "%s ", $0;next}1' file
1
00:00:27,160 --> 00:00:29,054
Sometimes there's not much dialogue.
2
00:00:30,100 --> 00:00:31,090
But other times there is quite a bit, and it's formatted into two lines
3
00:00:31,500 --> 00:00:33,700
I want to remove the line breaks only on these long lines, leaving all other formatting.
4
00:00:33,805 --> 00:00:37,285
So that all dialogue ends up being on a single line no matter how long that line.
答案 4 :(得分:0)
我遇到了同样的问题,写了这个小代码,解决了我的问题:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]) {
FILE *quelle,*ziel;
int i;
long maxsub,count,tmp,sub;
char puffer[10][200], *ptr,line[400];
if(argc != 3)
{
printf("Usage: srtlinejoin Filename CountOfSubtitles\n");
return EXIT_FAILURE;
}
maxsub = strtol( argv[2], &ptr, 10);
if( (quelle=fopen(argv[1],"r")) == NULL) {
fprintf(stderr, "Can't open %s\n", argv[1]);
return EXIT_FAILURE;
}
if( (ziel=fopen("out.srt","w")) == NULL) {
fprintf(stderr, "Can't open out.srt\n");
fclose(quelle);
return EXIT_FAILURE;
}
//read and write first line
fgets(puffer[0], 200, quelle);
fputs(puffer[0], ziel);
for(count=1; count < maxsub;count++)
//for(count=1; count <= 3;count++)
{
//printf("Processing subtitle %d\n",count);
tmp=0;
//Read and write time
fgets(puffer[0], 200, quelle);
fputs(puffer[0], ziel);
do {
fgets(puffer[tmp], 200, quelle);
//Scan for next Subtitle
sub = strtol( puffer[tmp], &ptr, 10);
tmp++;
}
while(sub != (count+1));
//Der Untertitel hat nur eine Zeile
if (strlen(puffer[1]) == 2)
{
fputs(puffer[0], ziel); //New Subtitle
fputs(puffer[1], ziel); //Next empty line
fputs(puffer[2], ziel); //Next number
}
//Der Untertitel hat zwei Zeile
if ((strlen(puffer[1]) > 2) && (strlen(puffer[2]) == 2))
{
for(i=0;i<400;i++)
line[i]=0;
strncpy(line,puffer[0],(strlen(puffer[0])-2));
strcat(line," ");
strcat(line,puffer[1]);
fputs(line, ziel); //New Subtitle
fputs(puffer[2], ziel); //Next empty line
fputs(puffer[3], ziel); //Next number
}
//Der Untertitel hat mehr als zwei Zeile
if ((strlen(puffer[1]) == 2) && (strlen(puffer[2]) == 2))
{
printf("Attention: The subtitles has more than two lines\n");
}
}
printf("Check last subtitle!\n");
fclose(quelle);
fclose(ziel);
return EXIT_SUCCESS;
}