提交 b9846c76 编写于 作者: D Derek Parker

command (next): Improvements for parallel programs

This patch aims to improve how Delve tracks the current goroutine,
especially in very highly parallel programs. The main spirit of this
patch is to ensure that even in situations where the goroutine we care
about is not executing (common for len(g) > len(m)) we still end up back
on that goroutine as a result of executing the 'next' command.

We accomplish this by tracking our original goroutine id, and any time a
breakpoint is hit or a threads stops, we examine the stopped threads and
see if any are executing the goroutine we care about. If not, we set
'next' breakpoint for them again and continue them. This is done so that
one of those threads can eventually pick up the goroutine we care about
and begin executing it again.
上级 71845350
package main
import (
"fmt"
"sync"
)
func sayhi(n int, wg *sync.WaitGroup) {
fmt.Println("hi", n)
fmt.Println("hi", n)
wg.Done()
}
func main() {
var wg sync.WaitGroup
wg.Add(10)
for i := 0; i < 10; i++ {
go sayhi(i, &wg)
}
wg.Wait()
}
......@@ -252,7 +252,7 @@ func (dbp *Process) Next() error {
return dbp.run(dbp.next)
}
func (dbp *Process) next() error {
func (dbp *Process) next() (err error) {
// Make sure we clean up the temp breakpoints created by thread.Next
defer dbp.clearTempBreakpoints()
......@@ -260,63 +260,78 @@ func (dbp *Process) next() error {
// blocked trying to read from a channel. This is so that
// if control flow switches to that goroutine, we end up
// somewhere useful instead of in runtime code.
chanRecvCount, err := dbp.setChanRecvBreakpoints()
if err != nil {
if _, err := dbp.setChanRecvBreakpoints(); err != nil {
return err
}
// Get the goroutine for the current thread. We will
// use it later in order to ensure we are on the same
// goroutine.
g, err := dbp.CurrentThread.GetG()
if err != nil {
return err
}
if g.DeferPC != 0 {
_, err = dbp.SetTempBreakpoint(g.DeferPC)
if err != nil {
return err
var goroutineExiting bool
threadNext := func(thread *Thread) error {
if err = thread.setNextBreakpoints(); err != nil {
switch t := err.(type) {
case ThreadBlockedError, NoReturnAddr: // Noop
case GoroutineExitingError:
goroutineExiting = t.goid == g.Id
default:
return err
}
}
return thread.Continue()
}
var goroutineExiting bool
var waitCount int
// Make sure that we halt the process at the end of this
// function. We could get into a situation where we have
// started some, but not all threads.
defer func() { err = dbp.Halt() }()
// Set next breakpoints and then continue each thread.
for _, th := range dbp.Threads {
// Ignore threads that aren't running go code.
if !th.blocked() {
waitCount++
if err = th.SetNextBreakpoints(); err != nil {
if gerr, ok := err.(GoroutineExitingError); ok {
waitCount = waitCount - 1 + chanRecvCount
if gerr.goid == g.Id {
goroutineExiting = true
}
} else {
return err
}
}
}
if err = th.Continue(); err != nil {
if err := threadNext(th); err != nil {
return err
}
}
for waitCount > 0 {
thread, err := dbp.trapWait(-1)
if err != nil {
return err
}
tg, err := thread.GetG()
if err != nil {
for {
if _, err := dbp.trapWait(-1); err != nil {
return err
}
// Make sure we're on the same goroutine, unless it has exited.
if tg.Id == g.Id || goroutineExiting {
if dbp.CurrentThread != thread {
dbp.SwitchThread(thread.Id)
// We need to wait for our goroutine to execute, which may not happen
// immediately.
//
// Loop through all threads, and for each stopped thread
// see if it is the thread that we care about (thread.g == original.g).
// If so, we're done. Otherwise set next temp breakpoints for
// each thread and continue them. The reason we do this is because
// if our goroutine is paused, we must execute other threads in order
// for them to get to a scheduling point, so they can pick up the
// goroutine we care about and begin executing it.
for _, thr := range dbp.Threads {
if !thr.Stopped() {
continue
}
tg, err := thr.GetG()
if err != nil {
return err
}
// Make sure we're on the same goroutine, unless it has exited.
if tg.Id == g.Id || goroutineExiting {
if dbp.CurrentThread != thr {
dbp.SwitchThread(thr.Id)
}
return nil
}
if err := threadNext(thr); err != nil {
return err
}
}
waitCount--
}
return dbp.Halt()
}
func (dbp *Process) setChanRecvBreakpoints() (int, error) {
......
......@@ -67,7 +67,7 @@ func (dbp *Process) Kill() (err error) {
if dbp.exited {
return nil
}
if !stopped(dbp.Pid) {
if !dbp.Threads[dbp.Pid].Stopped() {
return errors.New("process must be stopped in order to kill it")
}
if err = sys.Kill(-dbp.Pid, sys.SIGKILL); err != nil {
......@@ -322,14 +322,6 @@ func status(pid int) rune {
return state
}
func stopped(pid int) bool {
state := status(pid)
if state == STATUS_TRACE_STOP {
return true
}
return false
}
func wait(pid, tgid, options int) (int, *sys.WaitStatus, error) {
var s sys.WaitStatus
if (pid != tgid) || (options != 0) {
......
......@@ -130,6 +130,9 @@ func TestHalt(t *testing.T) {
// actually stopped, err will not be nil if the process
// is still running.
for _, th := range p.Threads {
if !th.Stopped() {
t.Fatal("expected thread to be stopped, but was not")
}
if th.running != false {
t.Fatal("expected running = false for thread", th.Id)
}
......@@ -297,6 +300,36 @@ func TestNextGeneral(t *testing.T) {
testnext("testnextprog", testcases, "main.testnext", t)
}
func TestNextConcurrent(t *testing.T) {
testcases := []nextTest{
{9, 10},
{10, 11},
}
withTestProcess("parallel_next", t, func(p *Process, fixture protest.Fixture) {
_, err := setFunctionBreakpoint(p, "main.sayhi")
assertNoError(err, t, "SetBreakpoint")
assertNoError(p.Continue(), t, "Continue")
f, ln := currentLineNumber(p, t)
initV, err := p.EvalVariable("n")
assertNoError(err, t, "EvalVariable")
for _, tc := range testcases {
if ln != tc.begin {
t.Fatalf("Program not stopped at correct spot expected %d was %s:%d", tc.begin, filepath.Base(f), ln)
}
assertNoError(p.Next(), t, "Next() returned an error")
f, ln = currentLineNumber(p, t)
if ln != tc.end {
t.Fatalf("Program did not continue to correct next location expected %d was %s:%d", tc.end, filepath.Base(f), ln)
}
v, err := p.EvalVariable("n")
assertNoError(err, t, "EvalVariable")
if v.Value != initV.Value {
t.Fatal("Did not end up on same goroutine")
}
}
})
}
func TestNextGoroutine(t *testing.T) {
testcases := []nextTest{
{47, 42},
......
......@@ -5,6 +5,14 @@ import (
"fmt"
)
type NoReturnAddr struct {
fn string
}
func (nra NoReturnAddr) Error() string {
return fmt.Sprintf("could not find return address for %s", nra.fn)
}
// Takes an offset from RSP and returns the address of the
// instruction the current function is going to return to.
func (thread *Thread) ReturnAddress() (uint64, error) {
......@@ -13,7 +21,7 @@ func (thread *Thread) ReturnAddress() (uint64, error) {
return 0, err
}
if len(locations) < 2 {
return 0, fmt.Errorf("could not find return address for %s", locations[0].Fn.BaseName())
return 0, NoReturnAddr{locations[0].Fn.BaseName()}
}
return locations[1].PC, nil
}
......
......@@ -115,6 +115,12 @@ func (thread *Thread) Location() (*Location, error) {
return &Location{PC: pc, File: f, Line: l, Fn: fn}, nil
}
type ThreadBlockedError struct{}
func (tbe ThreadBlockedError) Error() string {
return ""
}
// Set breakpoints for potential next lines.
//
// There are two modes of operation for this method. First,
......@@ -129,11 +135,23 @@ func (thread *Thread) Location() (*Location, error) {
// at every single line within the current function, and
// another at the functions return address, in case we're at
// the end.
func (thread *Thread) SetNextBreakpoints() (err error) {
func (thread *Thread) setNextBreakpoints() (err error) {
if thread.blocked() {
return ThreadBlockedError{}
}
curpc, err := thread.PC()
if err != nil {
return err
}
g, err := thread.GetG()
if err != nil {
return err
}
if g.DeferPC != 0 {
if _, err = thread.dbp.SetTempBreakpoint(g.DeferPC); err != nil {
return err
}
}
// Grab info on our current stack frame. Used to determine
// whether we may be stepping outside of the current function.
......@@ -148,15 +166,11 @@ func (thread *Thread) SetNextBreakpoints() (err error) {
return err
}
if filepath.Ext(loc.File) == ".go" {
if err = thread.next(curpc, fde, loc.File, loc.Line); err != nil {
return err
}
err = thread.next(curpc, fde, loc.File, loc.Line)
} else {
if err = thread.cnext(curpc, fde); err != nil {
return err
}
err = thread.cnext(curpc, fde)
}
return nil
return err
}
// Go routine is exiting.
......@@ -278,3 +292,10 @@ func (thread *Thread) GetG() (g *G, err error) {
}
return
}
// Returns whether the thread is stopped at
// the operating system level. Actual implementation
// is OS dependant, look in OS thread file.
func (thread *Thread) Stopped() bool {
return thread.stopped()
}
......@@ -123,3 +123,15 @@ clear_trap_flag(thread_act_t thread) {
return thread_set_state(thread, x86_THREAD_STATE64, (thread_state_t)&regs, count);
}
int
thread_blocked(thread_act_t thread) {
kern_return_t kret;
struct thread_basic_info info;
unsigned int info_count = THREAD_BASIC_INFO_COUNT;
kret = thread_info((thread_t)thread, THREAD_BASIC_INFO, (thread_info_t)&info, &info_count);
if (kret != KERN_SUCCESS) return -1;
return info.suspend_count;
}
......@@ -12,14 +12,22 @@ type OSSpecificDetails struct {
registers C.x86_thread_state64_t
}
func (t *Thread) Halt() error {
var kret C.kern_return_t
kret = C.thread_suspend(t.os.thread_act)
func (t *Thread) Halt() (err error) {
defer func() {
if err == nil {
t.running = false
}
}()
if t.Stopped() {
return
}
kret := C.thread_suspend(t.os.thread_act)
if kret != C.KERN_SUCCESS {
return fmt.Errorf("could not suspend thread %d", t.Id)
errStr := C.GoString(C.mach_error_string(C.mach_error_t(kret)))
err = fmt.Errorf("could not suspend thread %d %s", t.Id, errStr)
return
}
t.running = false
return nil
return
}
func (t *Thread) singleStep() error {
......@@ -50,10 +58,13 @@ func (t *Thread) resume() error {
return nil
}
func (t *Thread) blocked() bool {
func (thread *Thread) blocked() bool {
// TODO(dp) cache the func pc to remove this lookup
pc, _ := t.PC()
fn := t.dbp.goSymTable.PCToFunc(pc)
pc, err := thread.PC()
if err != nil {
return false
}
fn := thread.dbp.goSymTable.PCToFunc(pc)
if fn == nil {
return false
}
......@@ -65,6 +76,10 @@ func (t *Thread) blocked() bool {
}
}
func (thread *Thread) stopped() bool {
return C.thread_blocked(thread.os.thread_act) > C.int(0)
}
func (thread *Thread) writeMemory(addr uintptr, data []byte) (int, error) {
if len(data) == 0 {
return 0, nil
......
......@@ -30,3 +30,6 @@ set_registers(mach_port_name_t, x86_thread_state64_t*);
kern_return_t
get_identity(mach_port_name_t, thread_identifier_info_data_t *);
int
thread_blocked(thread_act_t thread);
......@@ -12,20 +12,31 @@ type OSSpecificDetails struct {
registers sys.PtraceRegs
}
func (t *Thread) Halt() error {
if stopped(t.Id) {
return nil
func (t *Thread) Halt() (err error) {
defer func() {
if err == nil {
t.running = false
}
}()
if t.Stopped() {
return
}
err := sys.Tgkill(t.dbp.Pid, t.Id, sys.SIGSTOP)
err = sys.Tgkill(t.dbp.Pid, t.Id, sys.SIGSTOP)
if err != nil {
return fmt.Errorf("halt err %s on thread %d", err, t.Id)
err = fmt.Errorf("halt err %s on thread %d", err, t.Id)
return
}
_, _, err = wait(t.Id, t.dbp.Pid, 0)
if err != nil {
return fmt.Errorf("wait err %s on thread %d", err, t.Id)
err = fmt.Errorf("wait err %s on thread %d", err, t.Id)
return
}
t.running = false
return nil
return
}
func (thread *Thread) stopped() bool {
state := status(thread.Id)
return state == STATUS_TRACE_STOP
}
func (t *Thread) resume() (err error) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册