问题描述
我发现有一个 c>
$I found that there is an implementation of a Set that uses hashes (with all the useful consequences, like O(1) for contains() etc) that is claimed to be more efficient than java.util.HashSet in every aspect:
http://ontopia.wordpress.com/2009/09/23/a-faster-and-more-compact-set/
http://alias-i.com/lingpipe/docs/api/com/aliasi/util/CompactHashSet.html
Would it then be a good idea to quit using java.util.HashSet completely wherever I need a java.util.Set in favor of com.aliasi.util.CompactHashSet?
Let's start a little benchmark game.
Benchmarks are based on benchmarks from the original article, but use modern tools.
package tests; import com.carrotsearch.hppc.ObjectOpenHashSet; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.collect.GuavaCompactHashSet; import net.ontopia.utils.CompactHashSet; import net.openhft.koloboke.collect.set.hash.HashObjSet; import net.openhft.koloboke.collect.set.hash.HashObjSets; import org.openjdk.jmh.annotations.*; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import static java.util.Arrays.stream; import static org.openjdk.jol.info.GraphLayout.parseInstance; @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @OperationsPerInvocation(TestHashSet.TIMES) @Threads(1) @Fork(1) @State(Scope.Thread) public class TestHashSet { public static final int TIMES = 1000000; private static final int MAX = 5000000; private static long ELEMENTS_SIZE; static Long[] add = new Long[TIMES], lookup = new Long[TIMES], remove = new Long[TIMES]; static { for (int ix = 0; ix < TIMES; ix++) add[ix] = new Long(Math.round(Math.random() * MAX)); ELEMENTS_SIZE = stream(add).distinct().count() * parseInstance(add[0]).totalSize(); for (int ix = 0; ix < TIMES; ix++) lookup[ix] = new Long(Math.round(Math.random() * MAX)); for (int ix = 0; ix < TIMES; ix++) remove[ix] = new Long(Math.round(Math.random() * MAX)); } @Benchmark public int hashSet() { Set<Long> set = new HashSet<Long>(); for (Long o : add) { set.add(o); } int r = 0; for (Long o : lookup) { r ^= set.contains(o) ? 1 : 0; } for (Long o : set) { r += o.intValue(); } for (Long o : remove) { set.remove(o); } return r + set.size(); } @Benchmark public int compactHashSet() { Set<Long> set = new CompactHashSet<Long>(); for (Long o : add) { set.add(o); } int r = 0; for (Long o : lookup) { r ^= set.contains(o) ? 1 : 0; } for (Long o : set) { r += o.intValue(); } for (Long o : remove) { set.remove(o); } return r + set.size(); } @Benchmark public int hppcSet() { ObjectOpenHashSet<Long> set = new ObjectOpenHashSet<Long>(); for (Long o : add) { set.add(o); } int r = 0; for (Long o : lookup) { r ^= set.contains(o) ? 1 : 0; } for (ObjectCursor<Long> cur : set) { r += cur.value.intValue(); } for (Long o : remove) { set.remove(o); } return r + set.size(); } @Benchmark public int kolobokeSet() { Set<Long> set = HashObjSets.newMutableSet(); for (Long o : add) { set.add(o); } int r = 0; for (Long o : lookup) { r ^= set.contains(o) ? 1 : 0; } for (Long o : set) { r += o.intValue(); } for (Long o : remove) { set.remove(o); } return r + set.size(); } @Benchmark public int guavaCompactHashSet() { // fair comparison -- growing table Set<Long> set = new GuavaCompactHashSet<>(10); for (Long o : add) { set.add(o); } int r = 0; for (Long o : lookup) { r ^= set.contains(o) ? 1 : 0; } for (Long o : set) { r += o.intValue(); } for (Long o : remove) { set.remove(o); } return r + set.size(); } public static void main(String[] argv) { HashSet hashSet = new HashSet(); test("HashSet", hashSet, hashSet::add); CompactHashSet compactHashSet = new CompactHashSet(); test("CompactHashSet", compactHashSet, compactHashSet::add); HashObjSet<Object> kolobokeSet = HashObjSets.newMutableSet(); test("KolobokeSet", kolobokeSet, kolobokeSet::add); ObjectOpenHashSet hppcSet = new ObjectOpenHashSet(); test("HPPC set", hppcSet, hppcSet::add); GuavaCompactHashSet guavaCompactHashSet = new GuavaCompactHashSet(10); test("GuavaCompactHashSet", guavaCompactHashSet, guavaCompactHashSet::add); } public static void test(String name, Object set, Consumer setAdd) { for (Long o : add) { setAdd.accept(o); } System.out.printf("%s: %.1f bytes per element\n", name, ((parseInstance(set).totalSize() - ELEMENTS_SIZE) * 1.0 / TIMES)); } }
Results:
Set implementation Speed Memory footprint Score Units +UCOops -UseCompressedOops CompactHashSet 828 ns/op 8.4 16.8 bytes/elem HashSet 676 ns/op 37.4 60.3 bytes/elem HPPC Set 853 ns/op 10.5 18.9 bytes/elem Koloboke Set 587 ns/op 8.4 16.8 bytes/elem GuavaCompactHashSet 874 ns/op 25.9 37.4 bytes/elem
Appears that CompactHashSet is even slower than old good HashSet, despite it uses much less memory.
这篇关于我应该转储java.util.HashSet有利于CompactHashSet?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!