go进程启动分析
| 阅读 | 共 4075 字,阅读约
Overview
Go进程启动分析
- 我们都知道,go语言执行的入口为:main包下面的main()函数,但是底层指令真的是从这开始执行的吗?
- 这一篇内容用到上一篇《go协程原理》的GMP模型
Go进程启动概述
- main包的main函数并不是go语言的入口函数,入口函数是在asm_amd64.s中定义的
- main包的main函数是由runtime.main函数启动的
- go进程启动后,会调用runtime·rt0_go来执行程序的初始化和启动系统调度
go进程启动的四大步骤
- runtime.osinit:获取系统cpu个数
- runtime.schedinit:初始化调度系统,p初始化,m0和某个绑定
- runtime.newproc:新建groutine,执行runtime.main,建好后插入p的本地队列
- runtime.mstart:启动m,进入启动调度系统
概念介绍
m0
表示进程启动的第一个线程,也叫主线程。它是进程启动通过汇编复制的,是个全局变量
g0
- 每个m都有一个g0,因为每个线程都有一个系统堆栈。
- 和其他g的区别是栈的区别。
- g0上的栈是系统分配的,在linux上默认大小为8M,不能扩展也不能缩小。而普通g默认2k,可扩展
- g0上没有任何任务函数,也没有任何状态,它不能被调度程序抢占。
- 调度是在g0上跑的
源码位置:src/runtime/proc.go
1// 全局变量,赋值是汇编实现的
2var (
3 // 主线程
4 m0 m
5 // 和m0绑定的g0,也可以理解成m0的堆栈
6 g0 g
7 raceprocctx0 uintptr
8)
汇编入口
源码位置:src/runtime/asm_arm64.s
1TEXT runtime·rt0_go(SB),NOSPLIT,$0
2
3 // 进程启动时的主线程
4 // 当前栈和资源保存在全局变量runtime.g0中
5 MOVD $runtime·g0(SB), g
6
7 // 当前线程保存在m0
8 MOVD $runtime·m0(SB), R0
9
10 // g0绑定到m0
11 MOVD g, m_g0(R0)
12
13 // m0绑定到g0
14 MOVD R0, g_m(g)
15
16 // os初始化,获取cpu数量
17 BL runtime·osinit(SB)
18
19 // 调度器初始化
20 BL runtime·schedinit(SB)
21
22 // 这里进入runtime.main,开始执行用户程序
23 MOVD $runtime·mainPC(SB), R0 // entry
24
25 // runtime.newproc启动一个groutine
26 BL runtime·newproc(SB)
27 // 启动线程,启动调度系统
28 BL runtime·mstart(SB)
Go进程启动源码解析
osinit
- 源码位置:src/runtime/os_linux.go
1func osinit() {
2 // 获取cpu数量
3 ncpu = getproccount()
4 physHugePageSize = getHugePageSize()
5 osArchInit()
6}
scheint
1// 调度系统的初始化
2// 进行P的初始化
3// 也会把M0和某个P绑定
4func schedinit() {
5 // raceinit must be the first call to race detector.
6 // In particular, it must be done before mallocinit below calls racemapshadow.
7 _g_ := getg()
8 if raceenabled {
9 _g_.racectx, raceprocctx0 = raceinit()
10 }
11
12 sched.maxmcount = 10000
13
14 tracebackinit()
15 moduledataverify()
16 stackinit()
17 mallocinit()
18 fastrandinit() // must run before mcommoninit
19 mcommoninit(_g_.m)
20 cpuinit() // must run before alginit
21 alginit() // maps must not be used before this call
22 modulesinit() // provides activeModules
23 typelinksinit() // uses maps, activeModules
24 itabsinit() // uses activeModules
25
26 msigsave(_g_.m)
27 initSigmask = _g_.m.sigmask
28
29 goargs()
30 goenvs()
31 parsedebugvars()
32 gcinit()
33
34 sched.lastpoll = uint64(nanotime())
35
36 // 确认P的个数,默认为CPU个数,可以通过 GOMAXPROCS 环境变量更改
37 procs := ncpu
38 if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
39 procs = n
40 }
41
42 // 一次性分配procs个P
43 if procresize(procs) != nil {
44 throw("unknown runnable goroutine during bootstrap")
45 }
46
47 // For cgocheck > 1, we turn on the write barrier at all times
48 // and check all pointer writes. We can't do this until after
49 // procresize because the write barrier needs a P.
50 if debug.cgocheck > 1 {
51 writeBarrier.cgo = true
52 writeBarrier.enabled = true
53 for _, p := range allp {
54 p.wbBuf.reset()
55 }
56 }
57
58 if buildVersion == "" {
59 // Condition should never trigger. This code just serves
60 // to ensure runtime·buildVersion is kept in the resulting binary.
61 buildVersion = "unknown"
62 }
63 if len(modinfo) == 1 {
64 // Condition should never trigger. This code just serves
65 // to ensure runtime·modinfo is kept in the resulting binary.
66 modinfo = ""
67 }
68}
mstart
1// 启动线程,并且启动调度系统
2func mstart() {
3 // 这里获取的是g0,在系统堆栈
4 _g_ := getg()
5
6 osStack := _g_.stack.lo == 0
7 if osStack {
8 // Initialize stack bounds from system stack.
9 // Cgo may have left stack size in stack.hi.
10 // minit may update the stack bounds.
11 size := _g_.stack.hi
12 if size == 0 {
13 size = 8192 * sys.StackGuardMultiplier
14 }
15 _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
16 _g_.stack.lo = _g_.stack.hi - size + 1024
17 }
18 // Initialize stack guard so that we can start calling regular
19 // Go code.
20 _g_.stackguard0 = _g_.stack.lo + _StackGuard
21 // This is the g0, so we can also call go:systemstack
22 // functions, which check stackguard1.
23 _g_.stackguard1 = _g_.stackguard0
24 mstart1()
25
26 // Exit this thread.
27 switch GOOS {
28 case "windows", "solaris", "illumos", "plan9", "darwin", "aix":
29 // Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate
30 // the stack, but put it in _g_.stack before mstart,
31 // so the logic above hasn't set osStack yet.
32 osStack = true
33 }
34 mexit(osStack)
35}
36
37func mstart1() {
38 _g_ := getg()
39
40 // 确保g是系统栈上的g0,调度器只在g0上执行
41 if _g_ != _g_.m.g0 {
42 throw("bad runtime·mstart")
43 }
44
45 // Record the caller for use as the top of stack in mcall and
46 // for terminating the thread.
47 // We're never coming back to mstart1 after we call schedule,
48 // so other calls can reuse the current frame.
49 save(getcallerpc(), getcallersp())
50 asminit()
51
52 // 初始m
53 minit()
54
55 // Install signal handlers; after minit so that minit can
56 // prepare the thread to be able to handle the signals.
57 // 如果当前g的m是m0,执行mstartm0
58 if _g_.m == &m0 {
59 // 对于初始m,需要一些特殊处理
60 mstartm0()
61 }
62
63 // 如果有m的起始函数执行,先执行它
64 if fn := _g_.m.mstartfn; fn != nil {
65 fn()
66 }
67
68 if _g_.m != &m0 {
69 // 如果不是m0,需要绑定p
70 acquirep(_g_.m.nextp.ptr())
71 _g_.m.nextp = 0
72 }
73
74 // 开始进入调度
75 schedule()
76}
schedule
调度的本质是:尽力找可以运行的G,然后运行G上的任务函数
具体流程包括:
- 如果GC需要STW,就休眠M
- 每隔61次从全局队列获取G,避免全局队列的g被饿死
- 从p的本地队列获取G
- 调用findrunnable 找G,找不到的话就将M休眠,等待唤醒
- 找到G后,调用execute去执行G
main函数也是放入到G中的
1func schedule() {
2 _g_ := getg()
3
4top:
5 pp := _g_.m.p.ptr()
6 pp.preempt = false
7
8 // GC(垃圾回收)需要STW(stop the world),休眠当前m
9 if sched.gcwaiting != 0 {
10 gcstopm()
11 goto top
12 }
13 if pp.runSafePointFn != 0 {
14 runSafePointFn()
15 }
16
17 // Sanity check: if we are spinning, the run queue should be empty.
18 // Check this before calling checkTimers, as that might call
19 // goready to put a ready goroutine on the local run queue.
20 if _g_.m.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
21 throw("schedule: spinning with local work")
22 }
23
24 checkTimers(pp, 0)
25
26 var gp *g
27 var inheritTime bool
28
29 // Normal goroutines will check for need to wakeP in ready,
30 // but GCworkers and tracereaders will not, so the check must
31 // be done here instead.
32 tryWakeP := false
33 if trace.enabled || trace.shutdown {
34 gp = traceReader()
35 if gp != nil {
36 casgstatus(gp, _Gwaiting, _Grunnable)
37 traceGoUnpark(gp, 0)
38 tryWakeP = true
39 }
40 }
41 if gp == nil && gcBlackenEnabled != 0 {
42 gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
43 tryWakeP = tryWakeP || gp != nil
44 }
45 if gp == nil {
46 // Check the global runnable queue once in a while to ensure fairness.
47 // Otherwise two goroutines can completely occupy the local runqueue
48 // by constantly respawning each other.
49 // 每隔61次调度,从全局队列获取G
50 if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
51 lock(&sched.lock)
52 gp = globrunqget(_g_.m.p.ptr(), 1)
53 unlock(&sched.lock)
54 }
55 }
56 if gp == nil {
57 // 从P的本地队列获取G
58 gp, inheritTime = runqget(_g_.m.p.ptr())
59 // We can see gp != nil here even if the M is spinning,
60 // if checkTimers added a local goroutine via goready.
61 }
62 if gp == nil {
63 // 阻塞住,直到找到G
64 gp, inheritTime = findrunnable() // blocks until work is available
65 }
66
67 // This thread is going to run a goroutine and is not spinning anymore,
68 // so if it was marked as spinning we need to reset it now and potentially
69 // start a new spinning M.
70 if _g_.m.spinning {
71 resetspinning()
72 }
73
74 if sched.disable.user && !schedEnabled(gp) {
75 // Scheduling of this goroutine is disabled. Put it on
76 // the list of pending runnable goroutines for when we
77 // re-enable user scheduling and look again.
78 lock(&sched.lock)
79 if schedEnabled(gp) {
80 // Something re-enabled scheduling while we
81 // were acquiring the lock.
82 unlock(&sched.lock)
83 } else {
84 sched.disable.runnable.pushBack(gp)
85 sched.disable.n++
86 unlock(&sched.lock)
87 goto top
88 }
89 }
90
91 // If about to schedule a not-normal goroutine (a GCworker or tracereader),
92 // wake a P if there is one.
93 if tryWakeP {
94 if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
95 wakep()
96 }
97 }
98 if gp.lockedm != 0 {
99 // Hands off own p to the locked m,
100 // then blocks waiting for a new p.
101 startlockedm(gp)
102 goto top
103 }
104 // 执行G上的任务
105 execute(gp, inheritTime)
106}
findrunnable
函数的实现非常复杂,这个 300 多行的函数通过以下的过程。获取可运行的 Goroutine,获取不到就阻塞住
- 从本地运行队列、全局运行队列查找
- 从网络轮询器中查找是否有等待的groutine
- 通过 runtime.runqsteal 函数尝试从其他随机的处理器中窃取待运行的 Goroutine,在该过程中还可能窃取处理器中的计时器;
execute
- execute中调用gogo函数将groutine调度到当前线程上
1func execute(gp *g, inheritTime bool) {
2 _g_ := getg()
3
4 // Assign gp.m before entering _Grunning so running Gs have an
5 // M.
6 _g_.m.curg = gp
7 gp.m = _g_.m
8 casgstatus(gp, _Grunnable, _Grunning)
9 gp.waitsince = 0
10 gp.preempt = false
11 gp.stackguard0 = gp.stack.lo + _StackGuard
12 if !inheritTime {
13 _g_.m.p.ptr().schedtick++
14 }
15
16 // Check whether the profiler needs to be turned on or off.
17 hz := sched.profilehz
18 if _g_.m.profilehz != hz {
19 setThreadCPUProfiler(hz)
20 }
21
22 if trace.enabled {
23 // GoSysExit has to happen when we have a P, but before GoStart.
24 // So we emit it here.
25 if gp.syscallsp != 0 && gp.sysblocktraced {
26 traceGoSysExit(gp.sysexitticks)
27 }
28 traceGoStart()
29 }
30
31 gogo(&gp.sched)
32}
main函数
前面介绍过,main函数在和m绑定的P队列中。因此在调度时,先将main从本地队列取出来,然后传给execute,就可以执行main groutine的任务了。main groutine的任务函数为runtime.main()
主要流程包括:
- 新建线程执行sysmon,用于系统后台监控(定期GC和调度抢占)
- 确保在主线程上执行
- 执行runtime包下所有的init函数
- 启动执行GC的协程
- 执行用户定义的所有的init函数
- 然后在真正执行用户编写的main函数
- 最后exit(0)系统退出
- 如果没有退出,for循环一直访问非法地址,让操作系统杀死进程
源码位置:src/runtime/proc.go
1func main() {
2 g := getg()
3
4 // Racectx of m0->g0 is used only as the parent of the main goroutine.
5 // It must not be used for anything else.
6 g.m.g0.racectx = 0
7
8 // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
9 // Using decimal instead of binary GB and MB because
10 // they look nicer in the stack overflow failure message.
11 if sys.PtrSize == 8 {
12 maxstacksize = 1000000000
13 } else {
14 maxstacksize = 250000000
15 }
16
17 // Allow newproc to start new Ms.
18 mainStarted = true
19
20 if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
21 // 系统栈上分配一个新的m,运行sysmon(系统后台监控,定期垃圾回收和系统抢占)
22 // p里的G是按照顺序执行的,放在某个G执行时间过长,阻塞其他的G
23 systemstack(func() {
24 newm(sysmon, nil)
25 })
26 }
27
28 // Lock the main goroutine onto this, the main OS thread,
29 // during initialization. Most programs won't care, but a few
30 // do require certain calls to be made by the main thread.
31 // Those can arrange for main.main to run in the main thread
32 // by calling runtime.LockOSThread during initialization
33 // to preserve the lock.
34 lockOSThread()
35
36 // 确保在主线程上执行
37 if g.m != &m0 {
38 throw("runtime.main not on m0")
39 }
40
41 // 执行runtime包下所有的init
42 doInit(&runtime_inittask) // must be before defer
43 if nanotime() == 0 {
44 throw("nanotime returning zero")
45 }
46
47 // Defer unlock so that runtime.Goexit during init does the unlock too.
48 needUnlock := true
49 defer func() {
50 if needUnlock {
51 unlockOSThread()
52 }
53 }()
54
55 // Record when the world started.
56 runtimeInitTime = nanotime()
57
58 // 启动一个groutine进行GC
59 gcenable()
60
61 main_init_done = make(chan bool)
62 if iscgo {
63 if _cgo_thread_start == nil {
64 throw("_cgo_thread_start missing")
65 }
66 if GOOS != "windows" {
67 if _cgo_setenv == nil {
68 throw("_cgo_setenv missing")
69 }
70 if _cgo_unsetenv == nil {
71 throw("_cgo_unsetenv missing")
72 }
73 }
74 if _cgo_notify_runtime_init_done == nil {
75 throw("_cgo_notify_runtime_init_done missing")
76 }
77 // Start the template thread in case we enter Go from
78 // a C-created thread and need to create a new thread.
79 startTemplateThread()
80 cgocall(_cgo_notify_runtime_init_done, nil)
81 }
82
83 // 执行用户定义的所有init函数
84 doInit(&main_inittask)
85
86 close(main_init_done)
87
88 needUnlock = false
89 unlockOSThread()
90
91 if isarchive || islibrary {
92 // A program compiled with -buildmode=c-archive or c-shared
93 // has a main, but it is not executed.
94 return
95 }
96
97 // 真正执行main包下的main函数
98 fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
99 fn()
100 if raceenabled {
101 racefini()
102 }
103
104 // Make racy client program work: if panicking on
105 // another goroutine at the same time as main returns,
106 // let the other goroutine finish printing the panic trace.
107 // Once it does, it will exit. See issues 3934 and 20018.
108 if atomic.Load(&runningPanicDefers) != 0 {
109 // Running deferred functions should not take long.
110 for c := 0; c < 1000; c++ {
111 if atomic.Load(&runningPanicDefers) == 0 {
112 break
113 }
114 Gosched()
115 }
116 }
117 if atomic.Load(&panicking) != 0 {
118 gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1)
119 }
120
121 // 退出程序
122 exit(0)
123
124 // 确保程序崩溃,程序就一定会退出
125 // or循环一直访问非法地址,让操作系统杀死进程
126 for {
127 var x *int32
128 *x = 0
129 }
130}