如果失败/完成,如何将线程重新提交给ExecutorService

时间:2017-11-03 11:37:58

标签: java multithreading concurrency parallel-processing

我有一个线程,每次作业完成或由于错误而崩溃时都必须重新提交。

不得阻止主线程。

必要时必须取消线程。

什么是最佳解决方案?

public class POC {
public static void main(String[] args) {
    System.out.println("Init");
    new SomeService().waitForEvents();
    System.out.println("main not blocked and do other stuff");
}

static class SomeService {
    public void waitForEvents() {
        ExecutorService executor = Executors.newSingleThreadExecutor();
        executor.submit(new AlwaysRunningJob("importantParamForJob"));
        // MISSING LOGIC 
        //    // if AlwaysRunningJob got error or finished,
        //    // wait 2 seconds recreate and resubmit it
        //    executor.submit(new AlwaysRunningJob("importantParamForJob"));
    }

    class AlwaysRunningJob implements Runnable {
        String importantParamForJob;

        public AlwaysRunningJob(String importantParamForJob) {
            this.importantParamForJob = importantParamForJob;
        }

        @Override
        public void run() {
            Thread.currentThread().setName("AlwaysRunningJob Job");
            while (!Thread.currentThread().isInterrupted()) {
                // keep waiting for events until
                // exception is thrown. or something bad happened
                try {
                    Thread.sleep(5000);
                    System.out.println("keep working on" + importantParamForJob);
                } catch (InterruptedException e) {
                    // exit if it failed
                    return;
                }
            }
            System.out.println("Finished run!");
        }
    }
}

}

4 个答案:

答案 0 :(得分:3)

我会扩展ThreadPoolExecutor,默认情况下会实现protected void afterExecute(Runnable r, Throwable t)这个方法什么都不做,但你可以这样做:

public class RetryExecutor extends ThreadPoolExecutor {

    private final long maxRetries;
    private Map<Runnable, Integer> retries = new ConcurrentHashMap<>();

    public RetryExecutor(int corePoolSize, int maximumPoolSize, long maxRetries,
                     long keepAliveTime, TimeUnit unit, BlockingQueue<Runnable> workQueue) {
        super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue);
        this.maxRetries = maxRetries;
    }

    @Override
    protected void afterExecute(Runnable r, Throwable t) {
        super.afterExecute(r, t);
        if (t != null && shouldRetry(r)) {
            retry(r);
        }  else if (t == null && r instanceof Future<?>) {
            try {
                ((Future<?>) r).get();
            } catch (CancellationException | ExecutionException e) {
                // you should log the error
                if (shouldRetry(r)) {
                    retry(r);
                } else {
                    retries.remove(r);
                }
            } catch (InterruptedException ie) {
                Thread.currentThread().interrupt(); // ignore/reset or catch it to reschedule
            }
        } else {
            retries.remove(r);
        }
    }

    private boolean shouldRetry(Runnable r) {
        final Integer nbRetries = retries.getOrDefault(r, 0);
        return nbRetries < maxRetries;
    }

    private void retry(Runnable r) {
        final Integer nbRetries = retries.getOrDefault(r, 0);
        retries.put(r, nbRetries + 1);
        this.execute(r);
    }

}

但是,由于这个未来毫无用处,它更像是一场火灾而忘记了。

作为@NikitataGorbatchevski,当您使用Callable时它将无法工作。所以这是一个可以使用Runnable和Callable处理的版本。如果发生错误,则无法再次运行Indead FutureTask(我重用代码来等待FutureTask的终止,并且不太确定):

public class RetryExecutor extends ThreadPoolExecutor {
    private final long maxRetries;
    private Map<Runnable, Integer> retries = new ConcurrentHashMap<>();

    public RetryExecutor(int corePoolSize, int maximumPoolSize, long maxRetries,
                         long keepAliveTime, TimeUnit unit, BlockingQueue<Runnable> workQueue) {
        super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue);
        this.maxRetries = maxRetries;
    }

    @Override
    protected <T> RunnableFuture<T> newTaskFor(Runnable runnable, T value) {
        return new RetryFutureTask<>(runnable, value);
    }

    @Override
    protected <T> RunnableFuture<T> newTaskFor(Callable<T> callable) {
        return new RetryFutureTask<>(callable);
    }

    @Override
    protected void afterExecute(Runnable r, Throwable t) {
        super.afterExecute(r, t);
        if (t != null && shouldRetry(r)) {
            retry(r);
        }  else if (t == null && r instanceof Future<?>) {
            try {
                Object result = ((Future<?>) r).get();
            } catch (CancellationException | ExecutionException e) {
                // you should log the error
                if (shouldRetry(r)) {
                    retry(r);
                }  else {
                    retries.remove(r);
                }
            } catch (InterruptedException ie) {
                Thread.currentThread().interrupt(); // ignore/reset
            }
        } else {
            retries.remove(r);
        }
    }

    private boolean shouldRetry(Runnable r) {
        final Integer nbRetries = retries.getOrDefault(r, 0);
        return nbRetries < maxRetries;
    }

    private void retry(Runnable r) {
        final Integer nbRetries = retries.getOrDefault(r, 0);
        retries.put(r, nbRetries + 1);
        this.execute(r);
    }

    private static class RetryFutureTask<V> implements RunnableFuture<V> {
        private static final int NEW = 0;
        private static final int RUNNING = 1;
        private static final int ERROR = 2;
        private static final int FINISHED = 3;
        private static final int INTERRUPTED = 4;
        private final AtomicInteger state = new AtomicInteger(NEW);
        private final AtomicReference<Thread> runner = new AtomicReference<>();
        private final AtomicReference<WaitNode> waiters = new AtomicReference<>();
        private final Callable<V> callable;
        private Exception error;
        private V result;

        public RetryFutureTask(Runnable runnable, V result) {
            this.callable = Executors.callable(runnable, result);
        }

        public RetryFutureTask(Callable<V> callable) {
            this.callable = callable;
        }

        @Override
        public void run() {
            try {
                // If not already running
                if (runner.compareAndSet(null, Thread.currentThread())) {
                    state.set(RUNNING);
                    result = this.callable.call();
                    state.compareAndSet(RUNNING, FINISHED);
                }
            } catch (Exception e) {
                error = e;
                state.compareAndSet(RUNNING, ERROR);
                finishCompletion();
            } finally {
                runner.set(null);
            }
        }

        @Override
        public boolean cancel(boolean mayInterruptIfRunning) {
            if (state.get() == RUNNING || state.get() == INTERRUPTED) {
                return false;
            }
            try {
                Thread t = runner.get();
                if (mayInterruptIfRunning && t != null) {
                    t.interrupt();
                }
            } finally {
                state.set(INTERRUPTED);
                finishCompletion();
            }
            return true;
        }

        @Override
        public boolean isCancelled() {
            return state.get() == INTERRUPTED;
        }

        @Override
        public boolean isDone() {
            return state.get() > RUNNING;
        }

        @Override
        public V get() throws InterruptedException, ExecutionException {
            if (state.get() <= RUNNING) {
                awaitDone(false, 0L);
            }
            return resolve();
        }

        @Override
        public V get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
            if (state.get() <= RUNNING) {
                awaitDone(true, unit.toNanos(timeout));
            }
            return resolve();
        }

        private V resolve() throws ExecutionException, InterruptedException {
            if (state.get() == ERROR) {
                throw new ExecutionException(error);
            } else if (state.get() == INTERRUPTED) {
                throw new InterruptedException();
            }
            return result;
        }

        private void finishCompletion() {
            for (WaitNode q; (q = waiters.get()) != null;) {
                if (waiters.compareAndSet(q, null)) {
                    for (;;) {
                        Thread t = q.thread;
                        if (t != null) {
                            q.thread = null;
                            LockSupport.unpark(t);
                        }
                        WaitNode next = q.next;
                        if (next == null)
                            break;
                        q.next = null; // unlink to help gc
                        q = next;
                    }
                break;
                }
            }
        }

        private void awaitDone(boolean timed, long nanos) throws InterruptedException {
            final long deadline = timed ? System.nanoTime() + nanos : 0L;
            WaitNode q = null;
            boolean queued = false;
            for (; ; ) {
                if (Thread.interrupted()) {
                    removeWaiter(q);
                    throw new InterruptedException();
                }

                int s = state.get();
                if (s > RUNNING) {
                    if (q != null)
                        q.thread = null;
                    return;
                } else if (q == null)
                    q = new WaitNode();
                else if (!queued)
                    queued = waiters.compareAndSet(q.next, q);
                else if (timed) {
                    nanos = deadline - System.nanoTime();
                    if (nanos <= 0L) {
                        removeWaiter(q);
                        return;
                    }
                    LockSupport.parkNanos(this, nanos);
                } else
                    LockSupport.park(this);
            }
        }

        private void removeWaiter(WaitNode node) {
            if (node != null) {
                node.thread = null;
                retry:
                for (;;) {          // restart on removeWaiter race
                    for (WaitNode pred = null, q = waiters.get(), s; q != null; q = s) {
                        s = q.next;
                        if (q.thread != null)
                            pred = q;
                        else if (pred != null) {
                            pred.next = s;
                            if (pred.thread == null) // check for race
                                continue retry;
                        }
                        else if (!waiters.compareAndSet(q, s))
                            continue retry;
                    }
                    break;
                }
            }
        }

        static final class WaitNode {
            volatile Thread thread;
            volatile WaitNode next;
            WaitNode() { thread = Thread.currentThread(); }
        }
    }
}

答案 1 :(得分:1)

我会建议像这样的东西:

public static void main(String[] args) throws InterruptedException {
    ExecutorService executorService = Executors.newSingleThreadExecutor();
    executorService.submit(new RepeatableWorker());

    System.out.println("Main does other work");
    Thread.sleep(3300);
    System.out.println("Main work was finished, time to exit");

    // shutdownNow interrupts running threads
    executorService.shutdownNow();
    executorService.awaitTermination(1, TimeUnit.SECONDS);
  }

  public static class RepeatableWorker extends Worker {
    @Override
    public void run() {
      while (!Thread.currentThread().isInterrupted()) {
        boolean error = false;
        Exception ex = null;
        try {
          // In some cases it's make sense to run this method in a separate thread.
          // For example if you want to give some time to the last worker thread to complete 
          // before interrupting it from repeatable worker
          super.run();
        } catch (Exception e) {
          error = true;
          ex = e;
        }

        if (Thread.currentThread().isInterrupted()) {
          System.out.println("worker was interrupted");
          // just exit as last task was interrupted
          continue;
        }

        if (!error) {
          System.out.println("worker task was finished normally");
        } else {
          System.out.println("worker task was finished due to error " + ex.getMessage());
        }
        // wait some time before the next start
        try {
          Thread.sleep(100);
        } catch (InterruptedException e) {
          System.out.println("Repeatable worker was interrupted");
          // ok we were interrupted
          // restore interrupted status and exit
          Thread.currentThread().interrupt();
        }
      }
      System.out.println("repeatable task was finished");
    }
  }

  public static class Worker implements Runnable {
    @Override
    public void run() {
      try {
        // emulate some work
        Thread.sleep(500L);
        if (new Random().nextBoolean()) {
          throw new RuntimeException("ouch");
        }
      } catch (InterruptedException e) {
        // restore interrupted status
        Thread.currentThread().interrupt();
      }
    }
  }

答案 2 :(得分:1)

如果您想坚持使用ExecutorService,我会从Runnable切换到Callable。提交Callable后,您会收到Future,您可以在Callable中获得作业代码的结果(或例外)。

因此,您可以检测作业是否已成功执行,并在需要时重新提交作业:

static class SomeService {
    public void waitForEvents() {
        ExecutorService executor = Executors.newSingleThreadExecutor();
        Future<Void> future = executor.submit(new AlwaysRunningJob("importantParamForJob"));

        try {
            future.get(); // this call waits until the Callable has finished (or failed)
        } catch (InterruptedException | ExecutionException e) {
            // Error -> resubmit
            e.printStackTrace();
        }   
        // No error -> do something else
    }

    class AlwaysRunningJob implements Callable<Void> {
        String importantParamForJob;

        public AlwaysRunningJob(String importantParamForJob) {
            this.importantParamForJob = importantParamForJob;
        }

        @Override
        public Void call() throws Exception {
            Thread.currentThread().setName("AlwaysRunningJob Job");
            while (!Thread.currentThread().isInterrupted()) {
                // keep waiting for events until
                // exception is thrown. or something bad happened
                try {
                    Thread.sleep(5000);
                    System.out.println("keep working on" + importantParamForJob);
                } catch (InterruptedException e) {
                    // exit if it failed
                    return null;
                }
            }
            System.out.println("Finished run!");
            return null;
        }
    }
}

答案 3 :(得分:1)

为完整起见,这是一个简单的解决方案(带有很多希望有用的评论),不涉及扩展执行器或编写线程/睡眠处理(这很容易出错……)。

import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;

import static java.lang.System.currentTimeMillis;
import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor;
import static java.util.concurrent.TimeUnit.SECONDS;

/**
 * This is an example for task re-scheduling using {@link ScheduledExecutorService}. If you copy this for actual use make
 * <em>sure to to manage the lifecycle</em> of the {@link RetryingJobExecutor}. It needs to be shut down when your app shuts down.
 * Also consider the thread pool size of the executor service.
 */
class Scratch {
    // We'll just use this to simulate job behavior.
    private static final Random RANDOM = new Random(currentTimeMillis());

    public static void main(String[] args) throws ExecutionException, InterruptedException {

        RetryingJobExecutor retryingExecutor = new RetryingJobExecutor();

        Callable<String> potentiallyFailingJob = () -> {
            if (shouldFail()) {
                System.out.println("Job fails");
                throw new RuntimeException();
            }
            System.out.println("Job succeeds");
            return "result";
        };

        Future<String> result = retryingExecutor.execute(potentiallyFailingJob);

        System.out.println("Job scheduled, awaiting result...");
        System.out.println("Job completed and returned: " + result.get());

        retryingExecutor.shutdown();
    }

    public static class RetryingJobExecutor {
        // We are using a scheduled executor so we can define an arbitrary re-execution delay
        // without having to care for Thread.sleep + InterruptedExceptions etc.
        // Since this service is single-threaded, only one job will be executed at a time (this is not relevant
        // to this example, though).
        private final ScheduledExecutorService executorService = newSingleThreadScheduledExecutor();

        public <K> Future<K> execute(Callable<K> task) {
            // Submit the task for asap execution. We will
            // use a CompletableFuture to provide the result when
            // the job eventually succeeds.
            final CompletableFuture<K> f = new CompletableFuture<>();
            executorService.submit(() -> runUntilSuccess(task, f));
            return f;
        }

        private <K> void runUntilSuccess(Callable<K> task, CompletableFuture<K> f) {
            try {
                f.complete(task.call());
            } catch (Exception e) {
                // This is where we decide whether to re-schedule a job.

                // It is important to consider what to catch here. For instance, catching "Exception"
                // Will also catch "InterruptedException". However, if you have a task that might be interrupted,
                // You might actually want to terminate the task, since Interruption may signal that a shutdown is in progress.
                // Catching Exception also does not catch all Exceptions - Throwable might not get caught, e.g. NoSuchMethod errors.
                // However, these usually represent unresolvable programming errors.

                // For simplicity we will simply re-submit the task with arbitrary delay.
                // Note that this does not mean the job will be executed exactly after the delay.
                // The task will be executed when the delay has passed and an executor thread becomes available.
                executorService.schedule(() -> runUntilSuccess(task, f), 1, SECONDS);
            }
        }

        public void shutdown() {
            // Never forget that executor services create and run threads, and *must* be shut down for an app to terminate properly.
            executorService.shutdownNow();
        }
    }

    private static boolean shouldFail() {
        return RANDOM.nextInt(10) < 8;
    }
}