GridGain:获得现有条目的间歇性失败

时间:2014-11-20 08:57:01

标签: gridgain

我们一直致力于将GridGain集成到现有应用程序中。在标准开发和单元测试环境中,一切似乎都运行良好,但是当我们针对整个应用程序运行一组自动回归/压力测试时,会引发一些错误,显然是因为应用程序从某些应用程序收到了一个意外的值(null) GridGain查找。我们现在已经创建了一个简单的复制案例,我们相信它会表现出同样的问题,并且希望我们可以在这里找到一些帮助来理解我们所看到的行为。

简单的测试用例首先用密钥1,2和3的三个条目初始化一个缓存。程序的两个实例(节点)同时启动。每个实例将依次通过获取值并将递增的结果作为条目存储在相同的键下来递增条目。修改发生在使用可重复读取隔离的悲观事务中。此操作在循环内重复多次。

尽管从未删除过三个条目,但对cache.get(key)的调用偶尔会返回一个空值。问题不是由被逐出的条目引起的(在测试结束时,结果值+在两个节点上观察到的错误数量,加起来达到预期的增量数)。由于某种原因,对cache.get(key)的调用间歇性地无法获取存储的值。

此问题仅在我们运行配置了以下任一项的网格时出现:

  • CacheMode.REPLICATED
  • CacheMode.PARTITIONED +备份数> 0

当我们运行完整的应用程序和具有CacheMode.PARTITIONED的测试用例+备份数量= 0时,问题不会发生。

应注意网格是用

设置的
  • GridCacheAtomocityMode.TRANSACTIONAL
  • GridCacheWriteSynchronizationMode.FULL_SYNC
  • GridCacheMemoryMode.OFFHEAP_TIERED

测试用例的代码:

import java.util.Arrays;
import java.util.List;

import org.gridgain.grid.Grid;
import org.gridgain.grid.GridConfiguration;
import org.gridgain.grid.GridException;
import org.gridgain.grid.GridGain;
import org.gridgain.grid.cache.GridCache;
import org.gridgain.grid.cache.GridCacheAtomicityMode;
import org.gridgain.grid.cache.GridCacheConfiguration;
import org.gridgain.grid.cache.GridCacheMemoryMode;
import org.gridgain.grid.cache.GridCacheMode;
import org.gridgain.grid.cache.GridCacheTx;
import org.gridgain.grid.cache.GridCacheTxConcurrency;
import org.gridgain.grid.cache.GridCacheTxIsolation;
import org.gridgain.grid.cache.GridCacheWriteSynchronizationMode;
import org.gridgain.grid.logger.java.GridJavaLogger;
import org.gridgain.grid.spi.GridSpiException;
import org.gridgain.grid.spi.communication.tcp.GridTcpCommunicationSpi;
import org.gridgain.grid.spi.discovery.GridDiscoverySpi;
import org.gridgain.grid.spi.discovery.tcp.GridTcpDiscoverySpi;
import org.gridgain.grid.spi.discovery.tcp.ipfinder.vm.GridTcpDiscoveryVmIpFinder;
import org.gridgain.grid.spi.indexing.GridNoopIndexingSpi;

/**
 * Usage: Start two instances of the program at the same time. Two cases:
 *  - PARTITIONED_CASE = true
 *  - PARTITIONED_CASE = false
 *
 * Issue: Why are the cache.get(key) calls inside the repeated
 * pessimistic write transaction sometimes returning a null value
 * for a key where a value is known/guaranteed to have been set.
 *
 * Problem presents for both CacheMode.REPLICATED and CacheMode.Partitioned + backups = 1
 */
public class GridGainSuddenNullValue {

    private static final boolean PARTITIONED_CASE = true;

    private static final Integer A = Integer.valueOf(1);
    private static final Integer B = Integer.valueOf(2);
    private static final Integer C = Integer.valueOf(3);

    public static void main(String[] args) throws Exception {
        GridConfiguration configuration = getGridConfiguration();

        try {
            GridGain.start(configuration);
        } catch (GridException e) {
            throw new RuntimeException(e);
        }

        runTest();
    }

    public static GridConfiguration getGridConfiguration() {
        GridConfiguration configuration = new GridConfiguration();
        configuration.setGridName("TEST");
        configuration.setGridGainHome("/tmp");
        configuration.setGridLogger(new GridJavaLogger(false));
        configuration.setIndexingSpi(new GridNoopIndexingSpi());

        configuration.setDiscoverySpi(
                configureSystemServersBasedDiscovery()
        );

        GridTcpCommunicationSpi commSpi = new GridTcpCommunicationSpi();
        commSpi.setSharedMemoryPort(-1); //Disables shared memory communication
        configuration.setCommunicationSpi(commSpi);

        GridCacheConfiguration cacheConfiguration = new GridCacheConfiguration();
        cacheConfiguration.setName("TESTCACHE");
        cacheConfiguration.setOffHeapMaxMemory(0);
        cacheConfiguration.setMemoryMode(GridCacheMemoryMode.OFFHEAP_TIERED);

        cacheConfiguration.setDefaultTxIsolation(GridCacheTxIsolation.READ_COMMITTED);
        cacheConfiguration.setDefaultTxConcurrency(GridCacheTxConcurrency.OPTIMISTIC);
        cacheConfiguration.setAtomicityMode(GridCacheAtomicityMode.TRANSACTIONAL);

        if (PARTITIONED_CASE) {
            cacheConfiguration.setCacheMode(GridCacheMode.PARTITIONED);
            cacheConfiguration.setBackups(1);
        } else {
            cacheConfiguration.setCacheMode(GridCacheMode.REPLICATED);
        }

        cacheConfiguration.setWriteSynchronizationMode(GridCacheWriteSynchronizationMode.FULL_SYNC);

        System.out.println("CacheMode: " + cacheConfiguration.getCacheMode());
        System.out.println("WriteSyncMode: " + cacheConfiguration.getWriteSynchronizationMode());

        configuration.setCacheConfiguration(cacheConfiguration);
        return configuration;
    }

    private static GridDiscoverySpi configureSystemServersBasedDiscovery() {
        List<String> serverNames = Arrays.asList("localhost");
        GridTcpDiscoverySpi tcpDiscovery = new GridTcpDiscoverySpi();
        GridTcpDiscoveryVmIpFinder ipFinder = new GridTcpDiscoveryVmIpFinder();

        try {
            ipFinder.setAddresses(serverNames);
        } catch (GridSpiException e) {
            throw new RuntimeException(e);
        }
        tcpDiscovery.setIpFinder(ipFinder);
        return tcpDiscovery;
    }

    private static void runTest() throws InterruptedException, GridException {
        Grid grid = GridGain.grid("TEST");
        GridCache<Integer, Integer> cache = grid.cache("TESTCACHE");
        int errors = 0;

        try (GridCacheTx tx = cache.txStart(GridCacheTxConcurrency.PESSIMISTIC, GridCacheTxIsolation.REPEATABLE_READ)) {
            cache.putxIfAbsent(A, 0);
            cache.putxIfAbsent(B, 0);
            cache.putxIfAbsent(C, 0);
            tx.commit();
        }

        System.out.println("WAITING");
        Thread.sleep(10000);
        System.out.println("RUNNING");

        for (int i = 0; i < 100000; i++) {
            try (GridCacheTx tx = cache.txStart(GridCacheTxConcurrency.PESSIMISTIC, GridCacheTxIsolation.REPEATABLE_READ)) {
                Integer a = cache.get(A);
                Integer b = cache.get(B);
                Integer c = cache.get(C);

                if (a == null || b == null || c == null) {
                    errors++;
                    System.err.println("Null after " + i + " local write rounds. Aborting tx");
                    System.err.printf("\tGets = A: %s, B: %s, C: %s\n", a, b, c);
                    continue;
                }

                a += 1;
                b += 2;
                c += 4;

                cache.put(A, a);
                cache.put(B, b);
                cache.put(C, c);

                tx.commit();
            } catch (GridException e) {
                e.printStackTrace();
            }
        }

        try (GridCacheTx tx = cache.txStart(GridCacheTxConcurrency.PESSIMISTIC, GridCacheTxIsolation.REPEATABLE_READ)) {
            Integer a = cache.get(A);
            Integer b = cache.get(B);
            Integer c = cache.get(C);

            System.out.printf("Result = A: %s, B: %s, C: %s\n", a, b, c);
            System.out.printf("Errors = %d\n", errors);
        }

        System.out.println("DONE");

        Thread.sleep(10000);
        GridGain.stopAll(true);
    }

}

由于

0 个答案:

没有答案