从排序数据中获取中值

时间:2014-11-13 03:26:15

标签: java median

我有一组以下列格式排序(升序)的数据:

| Category | Value | S.D. |
|     A    |  0.1  | 0.1  |
|     A    |  0.2  | 0.05 |
|     A    |  1.3  | 0.08 |
|     B    |  0.1  | 0.01 |
|     B    |  0.2  | 0.08 |
|     B    |  0.6  | 0.9  |
|     B    |  0.7  | 0.01 |
|     B    |  0.9  | 0.05 |
|     B    |  1.1  | 0.6  |
|     C    |  0.5  | 0.3  |
|     C    |  0.9  | 0.04 |
|     C    |  1.0  | 0.14 |
|     C    |  2.1  | 0.1  | etc...

这有大约300行。我已经从csv导入了这个并且已经排序为List。例如,data.get(1).getCategory()会导致" A"和data.get(2).getValue()导致" 0.2" (这是String因为我正在使用图书馆。)

数据可能会发生变化。我需要计算每个类别的中值,并用它的类别名称打印每个中值。在存在偶数个条目的情况下,具有最小S.D的中间值。应该使用。例如,使用以上数据:

"A: 0.2"
"B: 0.7"
"C: 0.9"

1 个答案:

答案 0 :(得分:1)

以下是对排序列表解决方案的单一传递:

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class Medians {
  public static void printMedians(List<Row> rows) {
    if (rows.size() == 0) return;
    Collections.sort(rows);
    int currentCategoryIndex = 0;
    String currentCategory = rows.get(0).category;
    for (int i = 0; i < rows.size(); i++) {
      if (i == rows.size() - 1
          || !currentCategory.equals(rows.get(i + 1).category)) {
        int categorySize = i + 1 - currentCategoryIndex;
        int medianIndex = currentCategoryIndex + categorySize / 2;
        double median;

        if (categorySize % 2 == 0) {
          median = rows.get(medianIndex - 1).stdDev < rows.get(medianIndex).stdDev
              ? rows.get(medianIndex - 1).value
              : rows.get(medianIndex).value;
        } else {
          median = rows.get(medianIndex).value;
        }

        System.out.printf("%s: %.1f%n", currentCategory, median);

        if (i < rows.size() - 1) {
          currentCategory = rows.get(i + 1).category;
          currentCategoryIndex = i + 1;
        }
      }
    }
  }

  private static class Row implements Comparable<Row> {
    private final String category;
    private final double value;
    private final double stdDev;

    public Row(String category, double value, double standardDeviation) {
      this.category = category;
      this.value = value;
      this.stdDev = standardDeviation;
    }

    @Override
    public int compareTo(Row o) {
      if (category.equals(o.category)) {
        return value == o.value ? 0 : value > o.value ? 1 : - 1;
      }
      return category.compareTo(o.category);
    }
  }

  public static void main(String[] args) {
    List<Row> rows = new ArrayList<>();
    rows.add(new Row("A", 0.2, 0.05));
    rows.add(new Row("A", 1.3, 0.08));
    rows.add(new Row("A", 0.1, 0.1));

    rows.add(new Row("B", 0.6, 0.9));
    rows.add(new Row("B", 1.1, 0.6));
    rows.add(new Row("B", 0.7, 0.01));
    rows.add(new Row("B", 0.9, 0.05));
    rows.add(new Row("B", 0.1, 0.01));
    rows.add(new Row("B", 0.2, 0.08));

    rows.add(new Row("C", 0.5, 0.3));
    rows.add(new Row("C", 1.0, 0.14));
    rows.add(new Row("C", 2.1, 0.1));
    rows.add(new Row("C", 0.9, 0.04));
    printMedians(rows);
  }
}

但我更喜欢这个:

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

public class CategoryMedianCalculator {
  private final Map<String, List<Row>> categories = new HashMap<>();

  public void addRow(String category, double value, double stdDev) {
    List<Row> rows = categories.get(category);
    if (rows == null) {
      rows = new ArrayList<>();
      categories.put(category, rows);
    }
    rows.add(new Row(category, value, stdDev));
  }

  public Map<String, Double> getMedians() {
    Map<String, Double> result = new TreeMap<>();
    for (Map.Entry<String, List<Row>> entry: categories.entrySet()) {
      result.put(entry.getKey(), getMedian(entry.getValue()));
    }
    return result;
  }

  private static double getMedian(List<Row> rows) {
    Collections.sort(rows);
    int index = rows.size() / 2;
    if (rows.size() % 2 == 0) {
      return rows.get(index - 1).stdDev < rows.get(index).stdDev
          ? rows.get(index - 1).value
          : rows.get(index).value;
    } else {
      return rows.get(index).value;
    }
  }

  private static class Row implements Comparable<Row> {
    private final String category;
    private final double value;
    private final double stdDev;

    public Row(String category, double value, double stdDev) {
      this.category = category;
      this.value = value;
      this.stdDev = stdDev;
    }

    @Override
    public int compareTo(Row o) {
      return value == o.value ? 0 : value > o.value ? 1 : - 1;
    }
  }

  public static void main(String[] args) {
    CategoryMedianCalculator calc = new CategoryMedianCalculator();
    calc.addRow("A", 0.2, 0.05);
    calc.addRow("A", 1.3, 0.08);
    calc.addRow("A", 0.1, 0.1);

    calc.addRow("B", 0.6, 0.9);
    calc.addRow("B", 1.1, 0.6);
    calc.addRow("B", 0.7, 0.01);
    calc.addRow("B", 0.9, 0.05);
    calc.addRow("B", 0.1, 0.01);
    calc.addRow("B", 0.2, 0.08);

    calc.addRow("C", 0.5, 0.3);
    calc.addRow("C", 1.0, 0.14);
    calc.addRow("C", 2.1, 0.1);
    calc.addRow("C", 0.9, 0.04);

    for (Map.Entry<String, Double> median : calc.getMedians().entrySet()) {
      System.out.printf("%s: %.1f%n", median.getKey(), median.getValue());
    }
  }
}