Question

我正在尝试使用ServiceStack-Redis库和described here提供的锁定机制来实现DLM，但我发现API似乎存在竞争条件，有时会授予相同的锁定多个客户。

BasicRedisClientManager mgr = new BasicRedisClientManager(redisConnStr);

using(var client = mgr.GetClient())
{
    client.Remove("touchcount");
    client.Increment("touchcount", 0);
}

Random rng = new Random();

Action<object> simulatedDistributedClientCode = (clientId) => {

    using(var redisClient = mgr.GetClient())
    {
        using(var mylock = redisClient.AcquireLock("mutex", TimeSpan.FromSeconds(2)))
        {
            long touches = redisClient.Get<long>("touchcount");
            Debug.WriteLine("client{0}: I acquired the lock! (touched: {1}x)", clientId, touches);
            if(touches > 0) {
                Debug.WriteLine("client{0}: Oh, but I see you've already been here. I'll release it.", clientId);
                return;
            }
            int arbitraryDurationOfExecutingCode = rng.Next(100, 2500);
            Thread.Sleep(arbitraryDurationOfExecutingCode); // do some work of arbitrary duration
            redisClient.Increment("touchcount", 1);
        }
        Debug.WriteLine("client{0}: Okay, I released my lock, your turn now.", clientId);
    }
};
Action<Task> exceptionWriter = (t) => {if(t.IsFaulted) Debug.WriteLine(t.Exception.InnerExceptions.First());};

int arbitraryDelayBetweenClients = rng.Next(5, 500);
var clientWorker1 = new Task(simulatedDistributedClientCode, 1);
var clientWorker2 = new Task(simulatedDistributedClientCode, 2);

clientWorker1.Start();
Thread.Sleep(arbitraryDelayBetweenClients);
clientWorker2.Start();

Task.WaitAll(
    clientWorker1.ContinueWith(exceptionWriter),
    clientWorker2.ContinueWith(exceptionWriter)
    );

using(var client = mgr.GetClient())
{
    var finaltouch = client.Get<long>("touchcount");
    Console.WriteLine("Touched a total of {0}x.", finaltouch);
}

mgr.Dispose();

当运行上面的代码来模拟两个客户端在短时间内连续尝试相同的操作时，有三种可能的输出。第一个是Mutex正常工作并且客户端按正确顺序进行的最佳情况。第二种情况是第二个客户端超时等待获取锁定;也是可以接受的结果。但问题是，当arbitraryDurationOfExecutingCode接近或超过获取锁定的超时时，很容易重现第二个客户端在第一个客户端释放它之前被授予锁定的情况，产生这样的输出：

client1：我获得了锁定！（触及：0x）
  client2：我获得了锁！（触及：0x）
  client1：好的，我现在轮到你的锁了。
  client2：好的，我现在轮到你的锁了。
  总共触及了2倍。

我对API及其文档的理解是，获取锁定时的timeOut参数只是 - 获取锁定的超时时间。如果我必须猜测一个timeOut值足够高，总是比执行代码的持续时间长，只是为了防止这种情况，这似乎很容易出错。除了传递null以永久等待锁定之外，是否还有其他工作？我绝对不想这样做，或者我知道我会找到来自坠毁工人的鬼锁。

Answer 1

mythz的答案（感谢您的快速响应！）确认ServiceStack.Redis中的内置AcquireLock方法并未区分锁获取期间与锁定到期期间。出于我们的目的，我们现有的代码期望分布式锁定机制在执行锁定时快速失败，但允许锁定范围内的长时间运行的进程。为了满足这些要求，我在ServiceStack RedisLock上派生了这种变体，允许区分两者。

// based on ServiceStack.Redis.RedisLock
// https://github.com/ServiceStack/ServiceStack.Redis/blob/master/src/ServiceStack.Redis/RedisLock.cs
internal class RedisDlmLock : IDisposable
{
    public static readonly TimeSpan DefaultLockAcquisitionTimeout = TimeSpan.FromSeconds(30);
    public static readonly TimeSpan DefaultLockMaxAge = TimeSpan.FromHours(2);
    public const string LockPrefix = "";    // namespace lock keys if desired

    private readonly IRedisClient _client; // note that the held reference to client means lock scope should always be within client scope

    private readonly string _lockKey;
    private string _lockValue;

    /// <summary>
    /// Acquires a distributed lock on the specified key.
    /// </summary>
    /// <param name="redisClient">The client to use to acquire the lock.</param>
    /// <param name="key">The key to acquire the lock on.</param>
    /// <param name="acquisitionTimeOut">The amount of time to wait while trying to acquire the lock. Defaults to <see cref="DefaultLockAcquisitionTimeout"/>.</param>
    /// <param name="lockMaxAge">After this amount of time expires, the lock will be invalidated and other clients will be allowed to establish a new lock on the same key. Deafults to <see cref="DefaultLockMaxAge"/>.</param>
    public RedisDlmLock(IRedisClient redisClient, string key, TimeSpan? acquisitionTimeOut = null, TimeSpan? lockMaxAge = null)
    {
        _client = redisClient;
        _lockKey = LockPrefix + key;

        ExecExtensions.RetryUntilTrue(
            () =>
            {
                //Modified from ServiceStack.Redis.RedisLock
                //This pattern is taken from the redis command for SETNX http://redis.io/commands/setnx
                //Calculate a unix time for when the lock should expire

                lockMaxAge = lockMaxAge ?? DefaultLockMaxAge; // hold the lock for the default amount of time if not specified.
                DateTime expireTime = DateTime.UtcNow.Add(lockMaxAge.Value);
                _lockValue = (expireTime.ToUnixTimeMs() + 1).ToString(CultureInfo.InvariantCulture);

                //Try to set the lock, if it does not exist this will succeed and the lock is obtained
                var nx = redisClient.SetEntryIfNotExists(_lockKey, _lockValue);
                if (nx)
                    return true;

                //If we've gotten here then a key for the lock is present. This could be because the lock is
                //correctly acquired or it could be because a client that had acquired the lock crashed (or didn't release it properly).
                //Therefore we need to get the value of the lock to see when it should expire
                string existingLockValue = redisClient.Get<string>(_lockKey);
                long lockExpireTime;
                if (!long.TryParse(existingLockValue, out lockExpireTime))
                    return false;
                //If the expire time is greater than the current time then we can't let the lock go yet
                if (lockExpireTime > DateTime.UtcNow.ToUnixTimeMs())
                    return false;

                //If the expire time is less than the current time then it wasn't released properly and we can attempt to 
                //acquire the lock. This is done by setting the lock to our timeout string AND checking to make sure
                //that what is returned is the old timeout string in order to account for a possible race condition.
                return redisClient.GetAndSetEntry(_lockKey, _lockValue) == existingLockValue;
            },
            acquisitionTimeOut ?? DefaultLockAcquisitionTimeout // loop attempting to get the lock for this amount of time.
            );
    }

    public override string ToString()
    {
        return String.Format("RedisDlmLock:{0}:{1}", _lockKey, _lockValue);
    }

    public void Dispose()
    {
        try
        {
            // only remove the entry if it still contains OUR value
            _client.Watch(_lockKey);
            var currentValue = _client.Get<string>(_lockKey);
            if (currentValue != _lockValue)
            {
                _client.UnWatch();
                return;
            }

            using (var tx = _client.CreateTransaction())
            {
                tx.QueueCommand(r => r.Remove(_lockKey));
                tx.Commit();
            }
        }
        catch (Exception ex)
        {
            // log but don't throw
        }
    }
}

为了尽可能地简化使用，我还公开了IRedisClient的一些扩展方法，以平行AcquireLock方法，沿着以下几行：

internal static class RedisClientLockExtensions
{
    public static IDisposable AcquireDlmLock(this IRedisClient client, string key, TimeSpan timeOut, TimeSpan maxAge)
    {
        return new RedisDlmLock(client, key, timeOut, maxAge);
    }
}

Answer 2

您的问题突出显示ServiceStack.Redis中分布式锁定的行为，如果超出指定的超时，超时客户端会将其视为无效锁定，并将尝试自动恢复锁定。如果没有自动恢复行为，崩溃的客户端将永远不会释放锁定，并且不允许等待该锁定的其他操作。

AcquireLock的锁定行为是encapsulated in the RedisLock class：

public IDisposable AcquireLock(string key, TimeSpan timeOut)
{
    return new RedisLock(this, key, timeOut);
}

您可以复制并修改以适合您喜欢的行为：

using (new MyRedisLock(client, key, timeout))
{
    //...
}

使用ServiceStack Redis进行分布式锁定的互斥锁违规

2 个答案: