我正在尝试找到一种更好的方法来实现这些方法,因为在非常大的集合中它们需要很长时间,任何想法?
import java.util.HashMap;
import java.util.HashSet;
public class Multiset<E> extends HashSet<E> {
private static final long serialVersionUID = -9013417064272046980L;
private HashMap<E, Integer> multiplicities = new HashMap<E, Integer>();
@Override
public boolean add(E element){
if(multiplicities.containsKey(element)){
int x = (int) multiplicities.get(element);
multiplicities.put(element, ++x);
}else{
multiplicities.put(element, 1);
}
return super.add(element);
}
/**
* Adds all of the elements of another multiset to this one.
* This method allows the preservation of multiplicities
* which would not occur using the superclass's addAll().
* @param elements
* @return true if all elements were successfully added
*/
public boolean addAll(Multiset<E> elements) {
boolean flag = false;
for(E element : elements){
for(int i = 0; i < elements.multiplicity(element); i++)
flag = add(element);
}
return flag;
}
/**
* The set-view of a multiset is the ordinary set of all
* elements with multiplicity >= 1.
* @return all elements that have multiplicity >= 1
*/
public Multiset<E> setView(){
Multiset<E> set = new Multiset<E>();
for(E o : multiplicities.keySet()){
set.add(o);
}
return set;
}
/**
* provides a union of two multisets whereby the multiplicity of each
* element is the larger of the two
* @param second
* @return
*/
public Multiset<E> union(Multiset<E> second){
Multiset<E> union = new Multiset<E>();
Multiset<E> join = new Multiset<E>();
join.addAll(this);
join.addAll(second);
for(E o : join){
int i = this.multiplicity(o);
int j = second.multiplicity(o);
i = i > j ? i : j;
for(int c = 0; c < i; c++){
union.add(o);
}
}
return union;
}
/**
* provides an intersection of two multisets whereby
* the multiplicity of each element is the smaller of the two
* @param second
* @return The multiset containing the intersection of two multisets
*/
public Multiset<E> intersect(Multiset<E> second){
Multiset<E> intersection = new Multiset<E>();
for(E o : this.setView()){
if (second.setView().contains(o)) {
int i = this.multiplicity(o);
int j = second.multiplicity(o);
i = i < j ? i : j;
for(int c = 0; c < i; c++){
intersection.add(o);
}
}
}
return intersection;
}
/**
* The Multiplicity is the number of occurrences of an object
* in the multiset
* @param o
* @return number of occurrences of o
*/
public int multiplicity(E o){
return (multiplicities.containsKey(o)) ? multiplicities.get(o) : 0;
}
public int cardinality(){
int card = 0;
for(Integer i : multiplicities.values()){
card += i;
}
return card;
}
/**
* Measures the similarity between two multisets
* @param A
* @param B
* @return the cardinality of the difference of A and B
*/
public int similarityOfMultisets(Multiset<E> second){
Multiset<E> union, intersection;
int difference;
union = union(second);
intersection = intersect(second);
difference = union.cardinality() - intersection.cardinality();
return difference;
}
}
编辑:
我相信我找到了一种更快的方法来计算similarityOfMultisets方法:
public int similarityOfMultisets(Multiset<E> second){
int c = 0;
for(E elem: this.setView()){
c += Math.min(this.multiplicity(elem), second.multiplicity(elem));
}
Multiset<E> union = this.union(second);
return union.cardinality() - c;
}
答案 0 :(得分:2)
这是课程的重构。不一定更快 - 除了不在循环中重新运行setView() - 但在某些方面可能更干净。 FWIW。
import java.util.HashMap;
import java.util.HashSet;
public class Multiset<E> extends HashSet<E> {
private static final long serialVersionUID = -9013417064272046980L;
private final HashMap<E, Integer> multiplicities = new HashMap<E, Integer>();
public boolean add(E element) {
return add(element, 1);
}
private boolean add(E element, int copies) {
if (!contains(element))
multiplicities.put(element, 0);
int n = multiplicities.get(element);
multiplicities.put(element, n + copies);
return super.add(element);
}
/**
* Adds all of the elements of another multiset to this one. This method allows the preservation of multiplicities which would not occur
* using the superclass's addAll().
*
* @param that
* @return true if all elements were successfully added
*/
public boolean addAll(Multiset<E> that) {
boolean flag = false;
for (E element : that)
flag = add(element, that.multiplicity(element));
return flag;
}
/**
* The set-view of a multiset is the ordinary set of all elements with multiplicity >= 1.
*
* @return all elements that have multiplicity >= 1
*/
public Multiset<E> setView() {
Multiset<E> set = new Multiset<E>();
for (E o : multiplicities.keySet())
set.add(o);
return set;
}
/**
* provides a union of two multisets whereby the multiplicity of each element is the larger of the two
*
* @param that
* @return
*/
public Multiset<E> union(Multiset<E> that) {
HashSet<E> both = new HashSet<E>();
both.addAll(this);
both.addAll(that);
Multiset<E> union = new Multiset<E>();
for (E element : both)
union.add(element, Math.max(this.multiplicity(element), that.multiplicity(element)));
return union;
}
/**
* provides an intersection of two multisets whereby the multiplicity of each element is the smaller of the two
*
* @param that
* @return The multiset containing the intersection of two multisets
*/
public Multiset<E> intersect(Multiset<E> that) {
Multiset<E> intersection = new Multiset<E>();
final Multiset<E> other = that.setView();
for (E element : this.setView())
if (other.contains(element))
intersection.add(element, Math.min(this.multiplicity(element), that.multiplicity(element)));
return intersection;
}
/**
* The Multiplicity is the number of occurrences of an object in the multiset
*
* @param element
* @return number of occurrences of o
*/
public int multiplicity(E element) {
return contains(element) ? multiplicities.get(element) : 0;
}
public int cardinality() {
int card = 0;
for (Integer n : multiplicities.values())
card += n;
return card;
}
/**
* Measures the similarity between two multisets
*
* @param that
* @return the cardinality of the difference of A and B
*/
public int similarityOfMultisets(Multiset<E> that) {
return union(that).cardinality() - intersect(that).cardinality();
}
}
答案 1 :(得分:2)
我们算法的第一个变体的性能测试结果:
Robert-Union: 2263374 us Robert-Intersection: 603134 us Robert-Similarity: 2926389 us Carl-Union: 3372 us Carl-Intersection: 5097 us Carl-Similarity: 6913 us David-Union: 5182 us David-Intersection: 2527 us David-Similarity: 5270 us卡尔的工会击败了我的工会。
测试代码here。我没有验证计算输出的正确性。
答案 2 :(得分:0)
这就是我提出的G-C:
import com.google.common.collect.Multiset;
import com.google.common.collect.Multisets;
import com.google.common.collect.Multiset.Entry;
public class MultisetOp {
public static void main(String[] args) {
Multiset<Integer> ms1 = Multisets.newHashMultiset(1, 1, 2, 3, 4, 4, 4);
Multiset<Integer> ms2 = Multisets.newHashMultiset(1, 2, 3, 3,
4, 5, 5, 5);
Multiset<Integer> mu = Multisets.newHashMultiset();
Multiset<Integer> mi = Multisets.newHashMultiset();
// -------- UNION START -----------
for (Entry<Integer> e : ms1.entrySet()) {
int j = ms2.count(e.getElement());
mu.add(e.getElement(), Math.max(e.getCount(), j));
}
for (Entry<Integer> e : ms2.entrySet()) {
int j = ms1.count(e.getElement());
if (j == 0) {
mu.add(e.getElement(), e.getCount());
}
}
// -------- UNION END -----------
// -------- INTERSECT START -----------
for (Entry<Integer> e : ms1.entrySet()) {
int j = ms2.count(e.getElement());
if (j > 0) {
mi.add(e.getElement(), Math.min(e.getCount(), j));
}
}
// -------- INTERSECT END -----------
System.out.printf("Union: %s%n", mu);
System.out.printf("Intersection: %s%n", mi);
System.out.printf("Cardinality: %d%n", mu.size() - mi.size());
}
}
结果:
[1 x 2, 2, 3 x 2, 4 x 3, 5 x 3] [1, 2, 3, 4]
没有基准。 看来,你的基数可以通过两次遍历来计算,而不是三次。
答案 3 :(得分:0)
我不明白setView方法的目的......似乎你只是返回自己的副本,但每个键的多重性设置为1。
对于联盟尝试这可能(可能无法编译):
public Multiset<E> union(Multiset<E> second) {
Multiset<E> union = new Multiset<E>();
union.addAll(this);
union.addAll(second);
for(E o : union){
int multiplicity = Math.max (this.multiplicity(o), second.multiplicity(o));
union.multiplicities.put (o, multiplicity);
}
return union;
}
答案 4 :(得分:0)
我认为问题在于你正在为this.setView()中的每个元素调用second.setView() - 重新创建该集合。试试这个:
/**
* provides an intersection of two multisets whereby
* the multiplicity of each element is the smaller of the two
* @param second
* @return The multiset containing the intersection of two multisets
*/
public Multiset<E> intersect(Multiset<E> second){
Multiset<E> intersection = new Multiset<E>();
Set<E> other = second.setView();
for(E o : this.setView()){
if (other.contains(o)) {
int i = this.multiplicity(o);
int j = second.multiplicity(o);
i = i < j ? i : j;
for(int c = 0; c < i; c++){
intersection.add(o);
}
}
}
return intersection;
}