yutopp's blog

サンドバッグになりたい

goroutineのstackが再確保されたとき、stack上のポインタの値はどうなるのか

goroutineはstackful corountine[1]でありcontiguous stack[2]であるから、stackの延伸が必要なときは新たな別の領域のstackへのコピーが発生するはず。このとき、スタック上のポインタの値はどうなってしまうのだろう?という疑問を調べたメモです。

実験の環境は以下。

$ uname -v
Darwin Kernel Version 22.1.0: Sun Oct  9 20:14:30 PDT 2022; root:xnu-8792.41.9~2/RELEASE_ARM64_T8103
$ go version
go version go1.19.4 darwin/arm64

見てみる

Go: How Does the Goroutine Stack Size Evolve? | by Vincent Blanchon | A Journey With Go | Medium の記事を参考に、stackのポインタを保存してprintlnするコードを実行してみる。

(fmt.Printlnではなくprintlnを使っているのは、前者を用いるとxがescapeされるため。)

package main

import (
    "unsafe"
)

func main() {
    var x [10]int
    p := &x[0]
    pp := unsafe.Pointer(&x[0])
    up := uintptr(pp)

    var px [10]*int // ポインタの配列も
    px[5] = p

    println("=== origin")
    println(&x)    // 同じ
    println(&x[0]) // 同じ
    println(p)     // 同じ
    println(pp)    // 同じ
    println(px[5]) // 同じ
    println(up)    // 同じ

    a(x) // stackmoveを引き起こす

    println("=== after copy")
    println(&x[0]) // アドレスが変わる
    println(p)     // 変わった値と同じ
    println(pp)    // 変わった値と同じ
    println(px[5]) // 変わった値と同じ
    println(up)    // *前の値のまま*

    println("=== write")
    println(x[0]) // 0

    *(*int)(unsafe.Pointer(p)) = 10

    println(x[0]) // 10

    *(*int)(unsafe.Pointer(up)) = 20 // どこかに書き込んでしまってる

    println(x[0]) // 10
}

//go:noinline
func a(x [10]int) {
    var y [1000]int
    c(y)
}

//go:noinline
func c(x [1000]int) {
}

ビルドは以下のコマンドで行う。compile command - cmd/compile - Go Packages [3] の通り -m を指定して、エスケープされていないことを確認しておく(buildの出力が空であればok)。

$ go build -gcflags "-m" main.go && ./main

実行結果は以下。

=== origin
0x14000098ec0
0x14000098ec0
0x14000098ec0
0x14000098ec0
0x14000098ec0
1374390161088
=== after copy
0x1400006fec0
0x1400006fec0
0x1400006fec0
0x1400006fec0
1374390161088
=== write
0
10
10

最初に取得したアドレスは 0x14000098ec0after copy の後のアドレスは 0x1400006fec0 になっている。

つまり、スタックに書き込まれたポインタの値も変化しているのが分かる。一方で uintptr に変換した値 1374390161088 は変化がない。

というわけで、runtimeのstack.goを覗いてみると、 stack mapを取得してスタックフレームを書き換えていそうな一連の処理がある。

動作が気になるので、runtime.stackDebug4に書き換えた改造runtimeで再度出力を見てみる。

const (
    // stackDebug == 0: no logging
    //            == 1: logging of per-stack operations
    //            == 2: logging of per-frame operations
    //            == 3: logging of per-word updates
    //            == 4: logging of per-word reads
-  stackDebug       = 0
+   stackDebug       = 4
    stackFromSystem  = 0 // allocate stacks from system memory instead of the heap
$ go/bin/go build main.go && ./main
stackalloc 32768
  allocated 0x14000004000
stackalloc 2048
stackcacherefill order=0
  allocated 0x14000046000
stackalloc 32768
  allocated 0x1400004c000
stackalloc 2048
  allocated 0x14000046800
stackalloc 32768
  allocated 0x14000054000
runtime: newstack sp=0x14000046770 stack=[0x14000046000, 0x14000046800]
    morebuf={pc:0x1044a22d4 sp:0x14000046770 lr:0x0}
    sched={pc:0x104485684 sp:0x14000046770 lr:0x1044a22d4 ctxt:0x0}
stackalloc 32768
  allocated 0x14000084000
stackalloc 32768
  allocated 0x1400005c000
stackalloc 32768
  allocated 0x14000104000
stackalloc 2048
stackcacherefill order=0
  allocated 0x14000042000
stackalloc 2048
  allocated 0x14000042800
runtime: newstack sp=0x14000046390 stack=[0x14000046000, 0x14000046800]
    morebuf={pc:0x104482108 sp:0x14000046390 lr:0x0}
    sched={pc:0x104481728 sp:0x14000046390 lr:0x104482108 ctxt:0x0}
stackalloc 4096
stackcacherefill order=1
  allocated 0x14000118000
copystack gp=0x140000021a0 [0x14000046000 0x14000046390 0x14000046800] -> [0x14000118000 0x14000118b90 0x14000119000]/4096
        0x140000021f0:0x0
        0x14000002208:0x14000046388
        adjust ptr 0x14000002208:0x14000046388 -> 0x14000118b88
        0x140000021c8:0x0
        0x140000021c0:0x0
    adjusting runtime.heapBits.forwardOrBoundary frame=[0x14000118b90,0x14000118b90] pc=0x104481728 continpc=0x104481728
      args
        0x14000118b98:ptr:0x10b774900 # 0 5
        0x14000118ba0:scalar:0x500000000000 # 0 5
        0x14000118ba8:ptr:0x10b96bfff # 0 5
        0x14000118bb0:scalar:0x400 # 0 5
        0x14000118bb8:scalar:0x140000463d0 # 0 5
        0x14000118bc0:scalar:0x0 # 0 5
        0x14000118bc8:scalar:0x400 # 0 5
        0x14000118bd0:scalar:0x1044941a0 # 0 5
    adjusting runtime.heapBits.initSpan frame=[0x14000118b90,0x14000118c00] pc=0x104482108 continpc=0x104482108
      locals 1/4 3 words 0x1044d003d
        0x14000118be0:scalar:0x1 # 0 4
        0x14000118be8:scalar:0x10 # 0 4
        0x14000118bf0:ptr:0x10b774900 # 0 4
        0x14000118bf8:scalar:0x14000046448 # 0 4
        0x14000118c00:scalar:0x1044844bc # 0 4
        0x14000118c08:scalar:0x10b774900 # 0 4
        0x14000118c10:scalar:0x0 # 0 4
        0x14000118c18:scalar:0x10b96bfff # 0 4
      args
        0x14000118c08:scalar:0x10b774900 # 0 0
        0x14000118c10:scalar:0x0 # 0 0
        0x14000118c18:scalar:0x10b96bfff # 0 0
        0x14000118c20:scalar:0x104484214 # 0 0
        0x14000118c28:scalar:0x14000046448 # 0 0
        0x14000118c30:scalar:0x600001044841c8 # 0 0
        0x14000118c38:scalar:0x10baa32a8 # 0 0
        0x14000118c40:scalar:0x1 # 0 0
    adjusting runtime.(*mcentral).grow frame=[0x14000118c00,0x14000118c50] pc=0x104484704 continpc=0x104484704
      no locals to adjust
    adjusting runtime.(*mcentral).cacheSpan frame=[0x14000118c50,0x14000118cc0] pc=0x1044844bc continpc=0x1044844bc
      locals 0/3 2 words 0x1044cfae4
        0x14000118ca8:scalar:0x10b96bfff # 0 0
        0x14000118cb0:scalar:0x10455c348 # 0 0
        0x14000118cb8:scalar:0x14000046508 # 0 0
        0x14000118cc0:scalar:0x10447d804 # 0 0
        0x14000118cc8:scalar:0x10455c340 # 0 0
        0x14000118cd0:scalar:0x10447d7d0 # 0 0
        0x14000118cd8:scalar:0x10b774700 # 0 0
        0x14000118ce0:scalar:0x104494164 # 0 0
    adjusting runtime.(*mcache).refill frame=[0x14000118cc0,0x14000118d10] pc=0x104483a88 continpc=0x104483a88
      locals 1/2 1 words 0x1044cf845
        0x14000118d00:ptr:0x1045bca90 # 0 1
        0x14000118d08:scalar:0x14000046558 # 0 1
        0x14000118d10:scalar:0x10447de08 # 0 1
        0x14000118d18:scalar:0x1045bca68 # 0 1
        0x14000118d20:scalar:0x10b96bfff # 0 1
        0x14000118d28:scalar:0x10b774800 # 0 1
        0x14000118d30:scalar:0x0 # 0 1
        0x14000118d38:scalar:0x0 # 0 1
    adjusting runtime.(*mcache).nextFree frame=[0x14000118d10,0x14000118d60] pc=0x10447d804 continpc=0x10447d804
      locals 1/2 1 words 0x1044cf845
        0x14000118d50:ptr:0x1045bca90 # 0 1
        0x14000118d58:scalar:0x140000465c8 # 0 1
        0x14000118d60:scalar:0x10447e39c # 0 1
        0x14000118d68:scalar:0x1045bca68 # 0 1
        0x14000118d70:scalar:0x10000104484110 # 0 1
        0x14000118d78:scalar:0x10baa33c8 # 0 1
        0x14000118d80:scalar:0x1 # 0 1
        0x14000118d88:scalar:0x600100000065f8 # 0 1
    adjusting runtime.mallocgc frame=[0x14000118d60,0x14000118dd0] pc=0x10447de08 continpc=0x10447de08
      locals 3/8 4 words 0x1044d090f
        0x14000118da8:ptr:0x1045339a0 # 0 5
        0x14000118db0:scalar:0x0 # 0 5
        0x14000118db8:ptr:0x0 # 0 5
        0x14000118dc0:scalar:0x1044a28fc # 0 5
        0x14000118dc8:scalar:0x140000465f8 # 0 5
        0x14000118dd0:scalar:0x1044a2af8 # 0 5
        0x14000118dd8:scalar:0x58 # 0 5
        0x14000118de0:scalar:0x1044eec20 # 0 5
      args
        0x14000118dd8:scalar:0x58 # 0 2
        0x14000118de0:ptr:0x1044eec20 # 0 2
        0x14000118de8:scalar:0x14000046601 # 0 2
        0x14000118df0:scalar:0x140000021a0 # 0 2
        0x14000118df8:scalar:0x14000046678 # 0 2
        0x14000118e00:scalar:0x10447816c # 0 2
        0x14000118e08:scalar:0x14000046648 # 0 2
        0x14000118e10:scalar:0x10447d818 # 0 2
    adjusting runtime.newobject frame=[0x14000118dd0,0x14000118e00] pc=0x10447e39c continpc=0x10447e39c
      no locals to adjust
      args
        0x14000118e08:scalar:0x14000046648 # 0 0
        0x14000118e10:scalar:0x10447d818 # 0 0
        0x14000118e18:scalar:0x10baa3458 # 0 0
        0x14000118e20:scalar:0x49 # 0 0
        0x14000118e28:scalar:0x10baa33c8 # 0 0
        0x14000118e30:scalar:0x200 # 0 0
        0x14000118e38:scalar:0x14000046678 # 0 0
        0x14000118e40:scalar:0x104478000 # 0 0
    adjusting runtime.acquireSudog frame=[0x14000118e00,0x14000118e80] pc=0x1044a2af8 continpc=0x1044a2af8
      locals 3/4 4 words 0x1044cfddb
        0x14000118e58:ptr:0x1045339a0 # 0 13
        0x14000118e60:scalar:0x0 # 0 13
        0x14000118e68:ptr:0x1400002b3b0 # 0 13
        0x14000118e70:ptr:0x1400002aa00 # 0 13
        0x14000118e78:scalar:0x14000046708 # 0 13
        0x14000118e80:scalar:0x104477ed4 # 0 13
        0x14000118e88:scalar:0x20 # 0 13
        0x14000118e90:scalar:0x1045bca90 # 0 13
    adjusting runtime.chanrecv frame=[0x14000118e80,0x14000118f10] pc=0x10447816c continpc=0x10447816c
      locals 3/6 9 words 0x1044d137e
        0x14000118ec0:scalar:0x0 # 0 12
        0x14000118ec8:scalar:0x1000000000ca68 # 0 12
        0x14000118ed0:ptr:0x1400010c058 # 0 12
        0x14000118ed8:ptr:0x140000021a0 # 0 12
        0x14000118ee0:scalar:0x10 # 0 12
        0x14000118ee8:scalar:0x140000466f8 # 0 12
        0x14000118ef0:scalar:0x1044aadb8 # 0 12
        0x14000118ef8:scalar:0x14000046738 # 0 12
        0x14000118f00:scalar:0x104485648 # 8 0
        0x14000118f08:scalar:0x14000046738 # 8 0
        0x14000118f10:scalar:0x104485654 # 8 0
        0x14000118f18:scalar:0x1400010c000 # 8 0
        0x14000118f20:scalar:0x0 # 8 0
        0x14000118f28:scalar:0x14000002101 # 8 0
        0x14000118f30:scalar:0x104485648 # 8 0
        0x14000118f38:scalar:0x14000046768 # 8 0
      args
        0x14000118f18:ptr:0x1400010c000 # 0 3
        0x14000118f20:ptr:0x0 # 0 3
        0x14000118f28:scalar:0x14000002101 # 0 3
        0x14000118f30:scalar:0x104485648 # 0 3
        0x14000118f38:scalar:0x14000046768 # 0 3
        0x14000118f40:scalar:0x1044a22d4 # 0 3
        0x14000118f48:scalar:0x0 # 0 3
        0x14000118f50:scalar:0x2 # 0 3
        0x14000118f00:0x104485648
    adjusting runtime.chanrecv1 frame=[0x14000118f10,0x14000118f40] pc=0x104477ed4 continpc=0x104477ed4
      no locals to adjust
      args
        0x14000118f48:scalar:0x0 # 0 0
        0x14000118f50:scalar:0x2 # 0 0
        0x14000118f58:scalar:0x1045304e0 # 0 0
        0x14000118f60:scalar:0x1400010c000 # 0 0
        0x14000118f68:scalar:0x0 # 0 0
        0x14000118f70:scalar:0x1044c7a24 # 0 0
        0x14000118f78:scalar:0x1045304c0 # 0 0
        0x14000118f80:scalar:0x0 # 0 0
    adjusting runtime.gcenable frame=[0x14000118f40,0x14000118f70] pc=0x104485654 continpc=0x104485654
      locals 1/2 1 words 0x1044cf845
        0x14000118f60:ptr:0x1400010c000 # 0 1
        0x14000118f68:scalar:0x0 # 0 1
        0x14000118f70:scalar:0x1044c7a24 # 0 1
        0x14000118f78:scalar:0x1045304c0 # 0 1
        0x14000118f80:scalar:0x0 # 0 1
        0x14000118f88:scalar:0x0 # 0 1
        0x14000118f90:scalar:0x0 # 0 1
        0x14000118f98:scalar:0x101000000000000 # 0 1
    adjusting runtime.main frame=[0x14000118f70,0x14000118fd0] pc=0x1044a22d4 continpc=0x1044a22d4
      locals 2/3 4 words 0x1044cfbb2
        0x14000118fa8:scalar:0x140000021a0 # 0 8
        0x14000118fb0:scalar:0x1044a2510 # 0 8
        0x14000118fb8:scalar:0x1400004679e # 0 8
        0x14000118fc0:ptr:0x140000467b0 # 0 8
        0x14000118fc8:scalar:0x0 # 0 8
        0x14000118fd0:scalar:0x0 # 0 8
        0x14000118fd8:scalar:0x0 # 0 8
        0x14000118fe0:scalar:0x0 # 0 8
adjust ptr 0x140000467b0 runtime.main
        0x14000118fb8:0x1400004679e
        adjust ptr 0x14000118fb8:0x1400004679e -> 0x14000118f9e
    adjusting runtime.goexit frame=[0x14000118fd0,0x14000118fd0] pc=0x1044c7a24 continpc=0x1044c7a24
stackfree 0x14000046000 2048
stack grow done
=== origin
0x14000118ec0
0x14000118ec0
0x14000118ec0
0x14000118ec0
0x14000118ec0
1374390685376
runtime: newstack sp=0x14000118e50 stack=[0x14000118000, 0x14000119000]
    morebuf={pc:0x1044c9b40 sp:0x14000118e50 lr:0x0}
    sched={pc:0x1044c9ce8 sp:0x14000118e50 lr:0x1044c9b40 ctxt:0x0}
stackalloc 32768
  allocated 0x14000090000
copystack gp=0x140000021a0 [0x14000118000 0x14000118e50 0x14000119000] -> [0x14000090000 0x14000097e50 0x14000098000]/32768
        0x140000021f0:0x0
        0x14000002208:0x14000118e48
        adjust ptr 0x14000002208:0x14000118e48 -> 0x14000097e48
        0x140000021c8:0x0
        0x140000021c0:0x0
    adjusting main.a frame=[0x14000097e50,0x14000097e50] pc=0x1044c9ce8 continpc=0x1044c9ce8
    adjusting main.main frame=[0x14000097e50,0x14000097f70] pc=0x1044c9b40 continpc=0x1044c9b40
      locals 1/4 11 words 0x1044d08ae
        0x14000097f10:scalar:0x14000118ec0 # 0 254
        0x14000097f18:ptr:0x0 # 0 254
        0x14000097f20:ptr:0x0 # 0 254
        0x14000097f28:ptr:0x0 # 0 254
        0x14000097f30:ptr:0x0 # 0 254
        0x14000097f38:ptr:0x0 # 0 254
        0x14000097f40:ptr:0x14000118ec0 # 0 254
        0x14000097f48:ptr:0x0 # 0 254
adjust ptr 0x14000118ec0 main.main
        0x14000097f50:ptr:0x0 # 8 7
        0x14000097f58:ptr:0x0 # 8 7
        0x14000097f60:ptr:0x0 # 8 7
        0x14000097f68:scalar:0x0 # 8 7
        0x14000097f70:scalar:0x1044c7a24 # 8 7
        0x14000097f78:scalar:0x1400008e000 # 8 7
        0x14000097f80:scalar:0x0 # 8 7
        0x14000097f88:scalar:0x0 # 8 7
    adjusting runtime.main frame=[0x14000097f70,0x14000097fd0] pc=0x1044a23dc continpc=0x1044a23dc
      locals 2/3 4 words 0x1044cfbb2
        0x14000097fa8:scalar:0x140000021a0 # 0 8
        0x14000097fb0:scalar:0x1044a2510 # 0 8
        0x14000097fb8:scalar:0x14000118f9e # 0 8
        0x14000097fc0:ptr:0x14000118fb0 # 0 8
        0x14000097fc8:scalar:0x0 # 0 8
        0x14000097fd0:scalar:0x0 # 0 8
        0x14000097fd8:scalar:0x0 # 0 8
        0x14000097fe0:scalar:0x0 # 0 8
adjust ptr 0x14000118fb0 runtime.main
        0x14000097fb8:0x14000118f9e
        adjust ptr 0x14000097fb8:0x14000118f9e -> 0x14000097f9e
    adjusting runtime.goexit frame=[0x14000097fd0,0x14000097fd0] pc=0x1044c7a24 continpc=0x1044c7a24
stackfree 0x14000118000 4096
stack grow done
=== after copy
0x14000097ec0
0x14000097ec0
0x14000097ec0
0x14000097ec0
1374390685376
=== write
0
10
10

というわけで、goroutineに紐づくstackの全traceのframeのアドレスを書き換えていそうな雰囲気を感じられました。(雰囲気と言っているのは、そんなにしっかり挙動を追ったわけではないため...)

stackの値を書き換えるとしてもgoroutineの実行状態など気を使わないといけないでしょうし、それっぽいコード片があるのでもう少し詳しく追ってみたいですが、ひとまずstackの値は書き換わっていそうということが分かった。

結構実行時のコストにもなるのかなと思いましたが、そうでもないのかな〜。普通に使っていればエスケープされてheapに載っているケースのほうが多く、そこまで書き換えコストにならないとかなんでしょうか。


[1] 少なくともstackless corountineのような仕組みではないのでこう書いたけれど、語弊があるかもしれん。

[2] Changing segmented stacks to contiguous stacksContiguous stacks へのリンクが貼られている。

[3] 余談ですが、noescape directiveを使った高速化の攻めたライブラリがあって面白かった。 GitHub - lukechampine/noescape: Promise to the Go compiler that your Reads and Writes are well-behaved