clone(2)用CLONE_FILES泄漏fcntl锁?

时间:2012-08-30 12:50:27

标签: c linux clone fcntl

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#define __USE_GNU
#include <sched.h>

void init_lock(struct flock *f)
{
    f->l_type = F_WRLCK;      /* write lock set */
    f->l_whence = SEEK_SET;
    f->l_start = 0;
    f->l_len = 0;
    f->l_pid = getpid();
}

int lock(int fd, struct flock *f)
{
    init_lock(f);
    if(fcntl(fd, F_SETLKW, f) == -1) {
        fprintf(stderr,"fcntl() failed: %s\n", strerror(errno));
        return -1;
    }
    return 0;
}

int unlock(int fd, struct flock *f)
{
f->l_type = F_UNLCK;
if(fcntl(fd, F_SETLK, f) == -1) {
    fprintf(stderr, "fcntl() failed: %s\n", strerror(errno));
    return -1;
}
return 0;
}

int file_op(void *arg)
{
char buff[256];
int fd = (int) arg, n;
struct flock my_lock;

printf("Trying to get lock\n");
if(lock(fd, &my_lock) == -1) {    /* lock acquired by a thread */
    return -1;
}

printf("Got lock: %d\n", getpid());  /* I am printing thread id after lock() */
printf("Enter string to write in file : ");
scanf("%s", buff);

if((n=write(fd, &buff, strlen(buff))) == -1) {
    fprintf(stderr, "write() failed: %s\n", strerror(errno));
}

if(unlock(fd, &my_lock) == -1) {
    return -1;
}
printf("Lock Released: %d\n", getpid());
return 0;
}

int main()
{
char *stack;
int fd, i=0, cid, stacksize;

if((fd = open("sample.txt", O_CREAT | O_WRONLY | O_APPEND, 0644)) == -1) {
    printf("Error in file opening\n");
    exit(1);
}

stacksize = 3*1024*1024;
for(i=0; i<5; i++) {
    stack = malloc(stacksize);
    if((cid = clone(&file_op, stack + stacksize, CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD, (void *) fd)) == -1) {
        fprintf(stderr,"clone() failed: %s\n", strerror(errno));
        break;
    }
}
sleep(30);
close(fd);
return 0;
}

我想每个clone()都会等待锁定。 但输出此代码(类似于此):

Trying to get lock
Trying to get lock
Trying to get lock
Got lock: Got lock: 10287
Got lock: Got lock: 10287

Enter string to write in file : Trying to get lock
Enter string to wriGot lock: 10287
Got lock: 10287
Got lock: 10287
Enter string to write in file : Trying to get lock
Got lock: 10287
Got lock: Enter string to write in file :

但是当我从clone(2)中删除CLONE_FILES字段集时,它会很顺利。其他克隆线程将等待lock()。

输出:

Trying to get lock
Got lock: 10311
Trying to get lock
Trying to get lock
Trying to get lock
Trying to get lock

任何其他替代方案(使用CLONE_FILES)?为什么会出现这种行为?

这个领域的初学者。

1 个答案:

答案 0 :(得分:2)

flock提供的锁定是每个进程,而不是每个线程。

来自http://linux.die.net/man/2/flock(强调我的):

  

如果 另一个进程 持有不兼容的锁,则对flock()的调用可能会阻止。

     

对已经锁定的文件进行的后续flock()调用会将现有锁定转换为新的锁定模式。

     

flock()创建的锁与打开的文件表条目相关联。

虽然未明确提及线程,但多个线程共享文件表条目,而多个进程则不共享。将CLONE_FILES传递给clone会导致您的“进程”共享文件表。

解决方案可能是调用dup来创建更多文件描述符。来自文档:

  

如果进程使用open(2)(或类似)来获取相同的多个描述符   文件,这些描述符由flock()独立处理。尝试锁定文件   使用其中一个文件描述符可能会被调用进程所具有的锁拒绝   已经通过另一个描述符放置。