分析Java中Map的遍历性能问题

一、引言

我们知道java hashmap的扩容是有成本的，为了减少扩容的次数和成本，可以给hashmap设置初始容量大小，如下所示：

				?

									hashmap<string, integer=""> map0 = new hashmap<string, integer="">(100000);

但是在实际使用的过程中，发现性能不但没有提升，反而显著下降了！代码里对hashmap的操作也只有遍历了，看来是遍历出了问题，于是做了一番测试，得到如下结果：

hashmap的迭代器遍历性能与 initial capacity 有关，与size无关

二、迭代器测试

贴上测试代码：

				?

									public class mapforeachtest {

									    public static void main(string[] args) {

									        hashmap<string, integer=""> map0 = new hashmap<string, integer="">(100000);

									        initdataandprint(map0);

									        hashmap<string, integer=""> map1 = new hashmap<string, integer="">();

									        initdataandprint(map1);

									    }

									    private static void initdataandprint(hashmap map) {

									        initdata(map);

									        long start = system.currenttimemillis();

									        for (int i = 0; i < 100; i++) {

									            foreach(map);

									        }

									        long end = system.currenttimemillis();

									        system.out.println("");

									        system.out.println("hashmap size: " + map.size() +  " 耗时: " + (end - start) + " ms");

									    }

									    private static void foreach(hashmap map) {

									        for (iterator<map.entry<string, integer="">> it = map.entryset().iterator(); it.hasnext();){

									            map.entry<string, integer=""> item = it.next();

									            system.out.print(item.getkey());

									            // do something

									        }

									    }

									    private static void initdata(hashmap map) {

									        map.put("a", 0);

									        map.put("b", 1);

									        map.put("c", 2);

									        map.put("d", 3);

									        map.put("e", 4);

									        map.put("f", 5);

									    }

									}

这是运行结果：

分析Java中Map的遍历性能问题

我们将第一个map初始化10w大小，第二个map不指定大小(实际16)，两个存储相同的数据，但是用迭代器遍历100次的时候发现性能迥异，一个36ms一个4ms，实际上性能差距更大，这里的4ms是600次system.out.print的耗时，这里将print注掉再试下

				?

									for (iterator<map.entry<string, integer="">> it = map.entryset().iterator(); it.hasnext();){

									    map.entry<string, integer=""> item = it.next();

									    // system.out.print(item.getkey());

									    // do something

									}

输出结果如下：

分析Java中Map的遍历性能问题

可以发现第二个map耗时几乎为0，第一个达到了28ms，遍历期间没有进行任何操作，既然石锤了和 initial capacity 有关，下一步我们去看看为什么会这样，找找map迭代器的源码看看。

三、迭代器源码探究

我们来看看map.entryset().iterator()的源码；

				?

									public final iterator<map.entry<k,v>> iterator() {

									    return new entryiterator();

									}

其中entryiterator是hashmap的内部抽象类，源码并不多，我全部贴上来并附上中文注释

				?

									abstract class hashiterator {

									    // 下一个node

									    node<k,v> next; // next entry to return

									    // 当前node

									    node<k,v> current;     // current entry

									    // 预期的map大小，也就是说每个hashmap可以有多个迭代器(每次调用 iterator() 会new 一个迭代器出来)，但是只能有一个迭代器对他remove，否则会直接报错(快速失败)

									    int expectedmodcount;  // for fast-fail

									    // 当前节点所在的数组下标，hashmap内部是使用数组来存储数据的，不了解的先去看看hashmap的源码吧

									    int index;             // current slot

									    hashiterator() {

									        // 初始化 expectedmodcount

									        expectedmodcount = modcount;

									        // 浅拷贝一份map的数据

									        node<k,v>[] t = table;

									        current = next = null;

									        index = 0;

									        // 如果 map 中数据不为空，遍历数组找到第一个实际存储的素，赋值给next

									        if (t != null && size > 0) { // advance to first entry

									            do {} while (index < t.length && (next = t[index++]) == null);

									        }

									    }

									    public final boolean hasnext() {

									        return next != null;

									    }

									    final node<k,v> nextnode() {

									        // 用来浅拷贝table，和别名的作用差不多，没啥用

									        node<k,v>[] t;

									        // 定义一个e指存储next，并在找到下一值时返它自己

									        node<k,v> e = next;

									        if (modcount != expectedmodcount)

									            throw new concurrentmodificationexception();

									        if (e == null)

									            throw new nosuchelementexception();

									        // 使current指向e，也就是next，这次要找的值，并且让next = current.next，一般为null

									        if ((next = (current = e).next) == null && (t = table) != null) {

									            do {} while (index < t.length && (next = t[index++]) == null);

									        }

									        return e;

									    }

									    /**

									     * 删除元素，这里不讲了，调的是hashmap的removenode，没啥特别的

									     **/

									    public final void remove() {

									        node<k,v> p = current;

									        if (p == null)

									            throw new illegalstateexception();

									        if (modcount != expectedmodcount)

									            throw new concurrentmodificationexception();

									        current = null;

									        k key = p.key;

									        removenode(hash(key), key, null, false, false);

									        // 用来保证快速失败的

									        expectedmodcount = modcount;

									    }

									}

上面的代码一看就明白了，迭代器每次寻找下一个元素都会去遍历数组，如果 initial capacity 特别大的话，也就是说 threshold 也大，table.length就大，所以遍历比较耗性能。

table数组的大小设置是在resize()方法里：

				?

									node<k,v>[] newtab = (node<k,v>[])new node[newcap];

									table = newtab;

四、其他遍历方法

注意代码里我们用的是map.entryset().iterator()，实际上和keys().iterator(), values().iterator() 一样，源码如下：

				?

									final class keyiterator extends hashiterator

									    implements iterator<k> {

									    public final k next() { return nextnode().key; }

									}

									final class valueiterator extends hashiterator

									    implements iterator<v> {

									    public final v next() { return nextnode().value; }

									}

									final class entryiterator extends hashiterator

									    implements iterator<map.entry<k,v>> {

									    public final map.entry<k,v> next() { return nextnode(); }

									}

这两个就不分析了，性能一样。

实际使用中对集合的遍历还有几种方法：

普通for循环+下标
增强型for循环
map.foreach
stream.foreach

普通for循环+下标的方法不适用于map，这里不讨论了。

4.1、增强型for循环

增强行for循环实际上是通过迭代器来实现的，我们来看两者的联系

源码：

				?

									private static void foreach(hashmap map) {

									    for (iterator<map.entry<string, integer="">> it = map.entryset().iterator(); it.hasnext();){

									        map.entry<string, integer=""> item = it.next();

									        system.out.print(item.getkey());

									        // do something

									    }

									}

									private static void foreach0(hashmap<string, integer=""> map) {

									    for (map.entry entry : map.entryset()) {

									        system.out.print(entry.getkey());

									    }

									}

编译后的字节码：

				?

									// access flags 0xa

									  private static foreach(ljava/util/hashmap;)v

									   l0

									    linenumber 41 l0

									    aload 0

									    invokevirtual java/util/hashmap.entryset ()ljava/util/set;

									    invokeinterface java/util/set.iterator ()ljava/util/iterator; (itf)

									    astore 1

									   l1

									   frame append [java/util/iterator]

									    aload 1

									    invokeinterface java/util/iterator.hasnext ()z (itf)

									    ifeq l2

									   l3

									    linenumber 42 l3

									    aload 1

									    invokeinterface java/util/iterator.next ()ljava/lang/object; (itf)

									    checkcast java/util/map$entry

									    astore 2

									   l4

									    linenumber 43 l4

									    getstatic java/lang/system.out : ljava/io/printstream;

									    aload 2

									    invokeinterface java/util/map$entry.getkey ()ljava/lang/object; (itf)

									    checkcast java/lang/string

									    invokevirtual java/io/printstream.print (ljava/lang/string;)v

									   l5

									    linenumber 45 l5

									    goto l1

									   l2

									    linenumber 46 l2

									   frame chop 1

									    return

									   l6

									    localvariable item ljava/util/map$entry; l4 l5 2

									    // signature ljava/util/map$entry<ljava lang="" string;ljava="" integer;="">;

									    // declaration: item extends java.util.map$entry<java.lang.string, java.lang.integer="">

									    localvariable it ljava/util/iterator; l1 l2 1

									    // signature ljava/util/iterator<ljava util="" map$entry<ljava="" lang="" string;ljava="" integer;="">;>;

									    // declaration: it extends java.util.iterator<java.util.map$entry<java.lang.string, java.lang.integer="">>

									    localvariable map ljava/util/hashmap; l0 l6 0

									    maxstack = 2

									    maxlocals = 3

									  // access flags 0xa

									  // signature (ljava/util/hashmap<ljava lang="" string;ljava="" integer;="">;)v

									  // declaration: void foreach0(java.util.hashmap<java.lang.string, java.lang.integer="">)

									  private static foreach0(ljava/util/hashmap;)v

									   l0

									    linenumber 50 l0

									    aload 0

									    invokevirtual java/util/hashmap.entryset ()ljava/util/set;

									    invokeinterface java/util/set.iterator ()ljava/util/iterator; (itf)

									    astore 1

									   l1

									   frame append [java/util/iterator]

									    aload 1

									    invokeinterface java/util/iterator.hasnext ()z (itf)

									    ifeq l2

									    aload 1

									    invokeinterface java/util/iterator.next ()ljava/lang/object; (itf)

									    checkcast java/util/map$entry

									    astore 2

									   l3

									    linenumber 51 l3

									    getstatic java/lang/system.out : ljava/io/printstream;

									    aload 2

									    invokeinterface java/util/map$entry.getkey ()ljava/lang/object; (itf)

									    invokevirtual java/io/printstream.print (ljava/lang/object;)v

									   l4

									    linenumber 52 l4

									    goto l1

									   l2

									    linenumber 53 l2

									   frame chop 1

									    return

									   l5

									    localvariable entry ljava/util/map$entry; l3 l4 2

									    localvariable map ljava/util/hashmap; l0 l5 0

									    // signature ljava/util/hashmap<ljava lang="" string;ljava="" integer;="">;

									    // declaration: map extends java.util.hashmap<java.lang.string, java.lang.integer="">

									    maxstack = 2

									    maxlocals = 3

都不用耐心观察，两个方法的字节码除了局部变量不一样其他都几乎一样，由此可以得出增强型for循环性能与迭代器一样，实际运行结果也一样，我不展示了，感兴趣的自己去copy文章开头和结尾的代码试下。

分析Java中Map的遍历性能问题

4.2、map.foreach

先说一下为什么不把各种方法一起运行同时打印性能，这是因为cpu缓存的原因和jvm的一些优化会干扰到性能的判断，附录全部测试结果有说明

直接来看源码吧

				?

									@override

									public void foreach(biconsumer<!--? super k, ? super v--> action) {

									    node<k,v>[] tab;

									    if (action == null)

									        throw new nullpointerexception();

									    if (size > 0 && (tab = table) != null) {

									        int mc = modcount;

									        for (int i = 0; i < tab.length; ++i) {

									            for (node<k,v> e = tab[i]; e != null; e = e.next)

									                action.accept(e.key, e.value);

									        }

									        if (modcount != mc)

									            throw new concurrentmodificationexception();

									    }

									}

很简短的源码，就不打注释了，从源码我们不难获取到以下信息：

该方法也是快速失败的，遍历期间不能删除元素
需要遍历整个数组
biconsumer加了@functionalinterface注解，用了 lambda

第三点和性能无关，这里只是提下

通过以上信息我们能确定这个性能与table数组的大小有关。

但是在实际测试的时候却发现性能比迭代器差了不少：

分析Java中Map的遍历性能问题

4.3、stream.foreach

stream与map.foreach的共同点是都使用了lambda表达式。但两者的源码没有任何复用的地方。

不知道你有没有看累，先上测试结果吧：

分析Java中Map的遍历性能问题

耗时比map.foreach还要高点。

下面讲讲straam.foreach顺序流的源码，这个也不复杂，不过累的话先去看看总结吧。

stream.foreach的执行者是分流器，hashmap的分流器源码就在hashmap类中，是一个静态内部类，类名叫 entryspliterator

下面是顺序流执行的方法

				?

									public void foreachremaining(consumer<!--? super map.entry<k,v-->> action) {

									    int i, hi, mc;

									    if (action == null)

									        throw new nullpointerexception();

									    hashmap<k,v> m = map;

									    node<k,v>[] tab = m.table;

									    if ((hi = fence) < 0) {

									        mc = expectedmodcount = m.modcount;

									        hi = fence = (tab == null) ? 0 : tab.length;

									    }

									    else

									        mc = expectedmodcount;

									    if (tab != null && tab.length >= hi &&

									        (i = index) >= 0 && (i < (index = hi) || current != null)) {

									        node<k,v> p = current;

									        current = null;

									        do {

									            if (p == null)

									                p = tab[i++];

									            else {

									                action.accept(p);

									                p = p.next;

									            }

									        } while (p != null || i < hi);

									        if (m.modcount != mc)

									            throw new concurrentmodificationexception();

									    }

									}

从以上源码中我们也可以轻易得出遍历需要顺序扫描所有数组

五、总结

至此，map的四种遍历方法都测试完了，我们可以简单得出两个结论

map的遍历性能与内部table数组大小有关，也就是说与常用参数 initial capacity 有关，不管哪种遍历方式都是的
性能（由高到低）：迭代器 == 增强型for循环 > map.foreach > stream.foreach

这里就不说什么多少倍多少倍的性能差距了，抛开数据集大小都是扯淡，当我们不指定initial capacity的时候，四种遍历方法耗时都是3ms，这3ms还是输入输出流的耗时，实际遍历耗时都是0，所以数据集不大的时候用哪种都无所谓，就像不加输入输出流耗时不到1ms一样，很多时候性能消耗是在遍历中的业务操作，这篇文章不是为了让你去优化代码把foreach改成迭代器的，在大多数场景下并不需要关注迭代本身的性能，stream与lambda带来的可读性提升更加重要。

所以此文的目的就当是知识拓展吧，除了以上说到的遍历性能问题，你还应该从中能获取到的知识点有：

hashmap的数组是存储在table数组里的
table数组是resize方法初始化的，new map不会初始化数组
map遍历是table数组从下标0递增排序的，所以他是无序的
keyset().iterator，values.iterator， entryset.iterator 来说没有本质区别，用的都是同一个迭代器
各种遍历方法里，只有迭代器可以remove，虽然增强型for循环底层也是迭代器，但这个语法糖隐藏了 remove 方法
每次调用迭代器方法都会new 一个迭代器，但是只有一个可以修改
map.foreach与stream.foreach看上去一样，实际实现是不一样的

附：四种遍历源码

				?

									private static void foreach(hashmap map) {

									    for (iterator<map.entry<string, integer="">> it = map.entryset().iterator(); it.hasnext();){

									        map.entry<string, integer=""> item = it.next();

									        // system.out.print(item.getkey());

									        // do something

									    }

									}

									private static void foreach0(hashmap<string, integer=""> map) {

									    for (map.entry entry : map.entryset()) {

									        system.out.print(entry.getkey());

									    }

									}

									private static void foreach1(hashmap<string, integer=""> map) {

									    map.foreach((key, value) -> {

									        system.out.print(key);

									    });

									}

									private static void foreach2(hashmap<string, integer=""> map) {

									    map.entryset().stream().foreach(e -> {

									        system.out.print(e.getkey());

									    });

									}

附：完整测试类与测试结果+一个奇怪的问题

				?

									public class mapforeachtest {

									    public static void main(string[] args) {

									        hashmap<string, integer=""> map0 = new hashmap<string, integer="">(100000);

									        hashmap<string, integer=""> map1 = new hashmap<string, integer="">();

									        initdata(map0);

									        initdata(map1);

									        testiterator(map0);

									        testiterator(map1);

									        testfor(map0);

									        testfor(map1);

									        testmapforeach(map0);

									        testmapforeach(map1);

									        testmapstreamforeach(map0);

									        testmapstreamforeach(map1);

									    }

									    private static void testiterator(hashmap map) {

									        long start = system.currenttimemillis();

									        for (int i = 0; i < 100; i++) {

									            foreach(map);

									        }

									        long end = system.currenttimemillis();

									        system.out.println("");

									        system.out.println("hashmap size: " + map.size() +  " 迭代器 耗时: " + (end - start) + " ms");

									    }

									    private static void testfor(hashmap map) {

									        long start = system.currenttimemillis();

									        for (int i = 0; i < 100; i++) {

									            foreach0(map);

									        }

									        long end = system.currenttimemillis();

									        system.out.println("");

									        system.out.println("hashmap size: " + map.size() +  " 增强型for 耗时: " + (end - start) + " ms");

									    }

									    private static void testmapforeach(hashmap map) {

									        long start = system.currenttimemillis();

									        for (int i = 0; i < 100; i++) {

									            foreach1(map);

									        }

									        long end = system.currenttimemillis();

									        system.out.println("");

									        system.out.println("hashmap size: " + map.size() +  " mapforeach 耗时: " + (end - start) + " ms");

									    }

									    private static void testmapstreamforeach(hashmap map) {

									        long start = system.currenttimemillis();

									        for (int i = 0; i < 100; i++) {

									            foreach2(map);

									        }

									        long end = system.currenttimemillis();

									        system.out.println("");

									        system.out.println("hashmap size: " + map.size() +  " mapstreamforeach 耗时: " + (end - start) + " ms");

									    }

									    private static void foreach(hashmap map) {

									        for (iterator<map.entry<string, integer="">> it = map.entryset().iterator(); it.hasnext();){

									            map.entry<string, integer=""> item = it.next();

									            system.out.print(item.getkey());

									            // do something

									        }

									    }

									    private static void foreach0(hashmap<string, integer=""> map) {

									        for (map.entry entry : map.entryset()) {

									            system.out.print(entry.getkey());

									        }

									    }

									    private static void foreach1(hashmap<string, integer=""> map) {

									        map.foreach((key, value) -> {

									            system.out.print(key);

									        });

									    }

									    private static void foreach2(hashmap<string, integer=""> map) {

									        map.entryset().stream().foreach(e -> {

									            system.out.print(e.getkey());

									        });

									    }

									    private static void initdata(hashmap map) {

									        map.put("a", 0);

									        map.put("b", 1);

									        map.put("c", 2);

									        map.put("d", 3);

									        map.put("e", 4);

									        map.put("f", 5);

									    }

									}