创建具有给定分布的无限迭代器

时间:2009-08-19 13:48:13

标签: java collections random statistics iterator

给定java.util.Collection创建无限java.util.Iterator的最简单方法是什么,它返回这些元素,使它们根据给定的分布(org.apache.commons.math.distribution)显示?

2 个答案:

答案 0 :(得分:4)

List<Object> l = new ArrayList<Object>(coll);
Iterator<Object> i = new Iterator<Object>() {
    public boolean hasNext() { return true; }

    public Object next() {
        return coll.get(distribution.nextInt(0, l.size());
    }
} 

您的问题是如何转换apache库中的Distribution类来实现nextInt方法。我不得不说,对我来说,如何从Distribution界面实际执行此操作并不明显。

我能想到的一种(略带垃圾)方法是使用实​​际分布定义的概率生成EmpiricalDistribution(在random包)数据集中,然后使用经验分析作为distribution(上图)

答案 1 :(得分:1)

高斯分布的解决方案

import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.SortedMap;
import java.util.Map.Entry;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.ImmutableSortedMap.Builder;

/**
 * Endless sequence with gaussian distribution.
 * 
 * @param <T> the type of the elements
 * @author Michael Locher
 */
public class GaussianSequence<T> implements Iterable<T>, Iterator<T> {

  private static final int HISTOGRAMM_SAMPLES = 50000;

  private static final int HISTOGRAMM_ELEMENTS = 100;

  private static final int HISTOGRAMM_LENGTH = 80;

  private static final double DEFAULT_CUTOFF = 4.0;

  private final List<T> elements;
  private final int maxIndex;
  private final Random rnd;
  private final double scaling;
  private final double halfCount;

  /**
   * Creates this.
   * @param rnd the source of randomness to use
   * @param elements the elements to deliver
   */
  public GaussianSequence(final Random rnd, final Collection<T> elements) {
    this(rnd, DEFAULT_CUTOFF, elements);
  }

  private GaussianSequence(final Random rnd, final double tailCutOff, final Collection<T> elements) {
    super();
    this.rnd = rnd;
    this.elements = new ArrayList<T>(elements);
    if (this.elements.isEmpty()) {
      throw new IllegalArgumentException("no elements provided");
    }
    this.maxIndex = this.elements.size() - 1;
    this.halfCount = this.elements.size() / 2.0;
    this.scaling = this.halfCount / tailCutOff;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public Iterator<T> iterator() {
    return this;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public boolean hasNext() {
    return true;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void remove() {
    throw new UnsupportedOperationException();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public T next() {
    return this.elements.get(sanitizeIndex(determineNextIndex()));
  }

  private int determineNextIndex() {
    final double z = this.rnd.nextGaussian();
    return (int) (this.halfCount + (this.scaling * z));
  }

  private int sanitizeIndex(final int index) {
    if (index < 0) {
      return 0;
    }
    if (index > this.maxIndex) {
      return this.maxIndex;
    }
    return index;
  }

  /**
   * Prints a histogramm to stdout.
   * @param args not used
   */
  public static void main(final String[] args) {
    final PrintWriter out = new PrintWriter(new OutputStreamWriter(System.out, Charset.forName("UTF-8")), true);
    plotHistogramm(new Random(), out);
  }

  private static void plotHistogramm(final Random rnd, final PrintWriter out) {
    // build elements
    final Multimap<Integer, Integer> results = ArrayListMultimap.create();
    final List<Integer> elements = Lists.newArrayListWithCapacity(HISTOGRAMM_ELEMENTS);
    for (int i = 1; i < HISTOGRAMM_ELEMENTS; i++) {
      elements.add(i);
    }
    // sample sequence
    final Iterator<Integer> randomSeq = new GaussianSequence<Integer>(rnd, elements);
    for (int j = 0; j < HISTOGRAMM_SAMPLES; j++) {
      final Integer sampled = randomSeq.next();
      results.put(sampled, sampled);
    }
    // count and sort results
    final Builder<Integer, Integer> r = ImmutableSortedMap.naturalOrder();
    for (final Entry<Integer, Collection<Integer>> e : results.asMap().entrySet()) {
      final int count = e.getValue().size();
      r.put(e.getKey(), count);
    }
    // plot results
    final SortedMap<Integer, Integer> sortedAndCounted = r.build();
    final double histogramScale = (double) HISTOGRAMM_LENGTH / Collections.max(sortedAndCounted.values());
    for (final Entry<Integer, Integer> e : sortedAndCounted.entrySet()) {
      out.format("%3d [%4d]", e.getKey(), e.getValue());
      final StringBuilder c = new StringBuilder();
      final int lineLength = (int) (histogramScale * e.getValue());
      for (int i = 0; i < lineLength; i++) {
        c.append('*');
      }
      out.println(c);
    }
  }

}