diff --git a/_fixtures/debug_line_benchmark_data b/_fixtures/debug_line_benchmark_data new file mode 100644 index 0000000000000000000000000000000000000000..706fe5a2a666e71f345eae56143290a29f5f3af3 Binary files /dev/null and b/_fixtures/debug_line_benchmark_data differ diff --git a/pkg/dwarf/line/line_parser.go b/pkg/dwarf/line/line_parser.go index 6506c3d34353b6ce118b051f11c4d5f0f18f9c75..da4b383318394ba87c59e09de575c598cd46490e 100644 --- a/pkg/dwarf/line/line_parser.go +++ b/pkg/dwarf/line/line_parser.go @@ -26,6 +26,12 @@ type DebugLineInfo struct { FileNames []*FileEntry Instructions []byte Lookup map[string]*FileEntry + + // stateMachineCache[pc] is a state machine stopped at pc + stateMachineCache map[uint64]*StateMachine + + // lastMachineCache[pc] is a state machine stopped at an address after pc + lastMachineCache map[uint64]*StateMachine } type FileEntry struct { @@ -61,6 +67,9 @@ func Parse(compdir string, buf *bytes.Buffer) *DebugLineInfo { dbl.IncludeDirs = append(dbl.IncludeDirs, compdir) } + dbl.stateMachineCache = make(map[uint64]*StateMachine) + dbl.lastMachineCache = make(map[uint64]*StateMachine) + parseDebugLinePrologue(dbl, buf) parseIncludeDirs(dbl, buf) parseFileEntries(dbl, buf) diff --git a/pkg/dwarf/line/line_parser_test.go b/pkg/dwarf/line/line_parser_test.go index 3bfd3e9533ac320744ea0fafd4833707e867cc6b..4ee83bd398ec3c882bff5df6c7783d349be0ac84 100644 --- a/pkg/dwarf/line/line_parser_test.go +++ b/pkg/dwarf/line/line_parser_test.go @@ -5,11 +5,14 @@ import ( "debug/macho" "debug/pe" "flag" + "fmt" + "io/ioutil" "os" "os/exec" "path/filepath" "strings" "testing" + "time" "github.com/pkg/profile" ) @@ -157,3 +160,116 @@ func BenchmarkLineParser(b *testing.B) { _ = ParseAll(data) } } + +func loadBenchmarkData(tb testing.TB) DebugLines { + p, err := filepath.Abs("../../../_fixtures/debug_line_benchmark_data") + if err != nil { + tb.Fatal("Could not find test data", p, err) + } + + data, err := ioutil.ReadFile(p) + if err != nil { + tb.Fatal("Could not read test data", err) + } + + return ParseAll(data) +} + +func BenchmarkStateMachine(b *testing.B) { + lineInfos := loadBenchmarkData(b) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + sm := newStateMachine(lineInfos[0], lineInfos[0].Instructions) + + for { + if err := sm.next(); err != nil { + break + } + } + } +} + +type pctolineEntry struct { + pc uint64 + file string + line int +} + +func (entry *pctolineEntry) match(file string, line int) bool { + if entry.file == "" { + return true + } + return entry.file == file && entry.line == line +} + +func setupTestPCToLine(t testing.TB, lineInfos DebugLines) []pctolineEntry { + entries := []pctolineEntry{} + + sm := newStateMachine(lineInfos[0], lineInfos[0].Instructions) + for { + if err := sm.next(); err != nil { + break + } + if sm.valid { + if len(entries) == 0 || entries[len(entries)-1].pc != sm.address { + entries = append(entries, pctolineEntry{pc: sm.address, file: sm.file, line: sm.line}) + } else if len(entries) > 0 { + // having two entries at the same PC address messes up the test + entries[len(entries)-1].file = "" + } + } + } + + for i := 1; i < len(entries); i++ { + if entries[i].pc <= entries[i-1].pc { + t.Fatalf("not monotonically increasing %d %x", i, entries[i].pc) + } + } + + return entries +} + +func runTestPCToLine(t testing.TB, lineInfos DebugLines, entries []pctolineEntry, log bool, testSize uint64) { + const samples = 1000 + t0 := time.Now() + + i := 0 + for pc := entries[0].pc; pc <= entries[0].pc+testSize; pc++ { + file, line := lineInfos[0].PCToLine(pc/0x1000*0x1000, pc) + if pc == entries[i].pc { + if i%samples == 0 && log { + fmt.Printf("match %x / %x (%v)\n", pc, entries[len(entries)-1].pc, time.Since(t0)/samples) + t0 = time.Now() + } + + if !entries[i].match(file, line) { + t.Fatalf("Mismatch at PC %#x, expected %s:%d got %s:%d", pc, entries[i].file, entries[i].line, file, line) + } + i++ + } else { + if !entries[i-1].match(file, line) { + t.Fatalf("Mismatch at PC %#x, expected %s:%d (from previous valid entry) got %s:%d", pc, entries[i-1].file, entries[i-1].line, file, line) + } + } + } +} + +func TestPCToLine(t *testing.T) { + lineInfos := loadBenchmarkData(t) + + entries := setupTestPCToLine(t, lineInfos) + runTestPCToLine(t, lineInfos, entries, true, 0x50000) + t.Logf("restart form beginning") + runTestPCToLine(t, lineInfos, entries, true, 0x10000) +} + +func BenchmarkPCToLine(b *testing.B) { + lineInfos := loadBenchmarkData(b) + + entries := setupTestPCToLine(b, lineInfos) + b.ResetTimer() + for i := 0; i < b.N; i++ { + runTestPCToLine(b, lineInfos, entries, false, 0x10000) + } +} diff --git a/pkg/dwarf/line/state_machine.go b/pkg/dwarf/line/state_machine.go index 6ca3540817a02cfaf51c425b2b9bcd7a6060e09b..b7ecbef02d07c5d44919ddad8964a459ebbdb202 100644 --- a/pkg/dwarf/line/state_machine.go +++ b/pkg/dwarf/line/state_machine.go @@ -32,6 +32,15 @@ type StateMachine struct { // value of the state machine should be appended to the matrix representing // the compilation unit) valid bool + + started bool + + buf *bytes.Buffer // remaining instructions + opcodes []opcodefn + + lastAddress uint64 + lastFile string + lastLine int } type opcodefn func(*StateMachine, *bytes.Buffer) @@ -47,11 +56,6 @@ const ( DW_LNS_set_basic_block = 7 DW_LNS_const_add_pc = 8 DW_LNS_fixed_advance_pc = 9 - - // DWARF v4 - DW_LNS_set_prologue_end = 10 - DW_LNS_set_epilouge_begin = 11 - DW_LNS_set_isa = 12 ) // Extended opcodes @@ -71,11 +75,6 @@ var standardopcodes = map[byte]opcodefn{ DW_LNS_set_basic_block: setbasicblock, DW_LNS_const_add_pc: constaddpc, DW_LNS_fixed_advance_pc: fixedadvancepc, - - // DWARF v4 - DW_LNS_set_prologue_end: donothing0, - DW_LNS_set_epilouge_begin: donothing0, - DW_LNS_set_isa: donothing1, } var extendedopcodes = map[byte]opcodefn{ @@ -84,8 +83,13 @@ var extendedopcodes = map[byte]opcodefn{ DW_LINE_define_file: definefile, } -func newStateMachine(dbl *DebugLineInfo) *StateMachine { - return &StateMachine{dbl: dbl, file: dbl.FileNames[0].Path, line: 1} +func newStateMachine(dbl *DebugLineInfo, instructions []byte) *StateMachine { + opcodes := make([]opcodefn, len(standardopcodes)+1) + opcodes[0] = execExtendedOpcode + for op := range standardopcodes { + opcodes[op] = standardopcodes[op] + } + return &StateMachine{dbl: dbl, file: dbl.FileNames[0].Path, line: 1, buf: bytes.NewBuffer(instructions), opcodes: opcodes} } // Returns all PCs for a given file/line. Useful for loops where the 'for' line @@ -98,12 +102,13 @@ func (lineInfo *DebugLineInfo) AllPCsForFileLine(f string, l int) (pcs []uint64) var ( foundFile bool lastAddr uint64 - sm = newStateMachine(lineInfo) - buf = bytes.NewBuffer(lineInfo.Instructions) + sm = newStateMachine(lineInfo, lineInfo.Instructions) ) - for b, err := buf.ReadByte(); err == nil; b, err = buf.ReadByte() { - findAndExecOpcode(sm, buf, b) + for { + if err := sm.next(); err != nil { + break + } if foundFile && sm.file != f { return } @@ -116,8 +121,10 @@ func (lineInfo *DebugLineInfo) AllPCsForFileLine(f string, l int) (pcs []uint64) // Keep going until we're on a different line. We only care about // when a line comes back around (i.e. for loop) so get to next line, // and try to find the line we care about again. - for b, err := buf.ReadByte(); err == nil; b, err = buf.ReadByte() { - findAndExecOpcode(sm, buf, b) + for { + if err := sm.next(); err != nil { + break + } if line < sm.line { break } @@ -137,12 +144,13 @@ func (lineInfo *DebugLineInfo) AllPCsBetween(begin, end uint64) ([]uint64, error var ( pcs []uint64 lastaddr uint64 - sm = newStateMachine(lineInfo) - buf = bytes.NewBuffer(lineInfo.Instructions) + sm = newStateMachine(lineInfo, lineInfo.Instructions) ) - for b, err := buf.ReadByte(); err == nil; b, err = buf.ReadByte() { - findAndExecOpcode(sm, buf, b) + for { + if err := sm.next(); err != nil { + break + } if !sm.valid { continue } @@ -157,34 +165,76 @@ func (lineInfo *DebugLineInfo) AllPCsBetween(begin, end uint64) ([]uint64, error return pcs, nil } +// copy returns a copy of this state machine, running the returned state +// machine will not affect sm. +func (sm *StateMachine) copy() *StateMachine { + var r StateMachine + r = *sm + r.buf = bytes.NewBuffer(sm.buf.Bytes()) + return &r +} + // PCToLine returns the filename and line number associated with pc. // If pc isn't found inside lineInfo's table it will return the filename and // line number associated with the closest PC address preceding pc. -func (lineInfo *DebugLineInfo) PCToLine(pc uint64) (string, int) { +// basePC will be used for caching, it's normally the entry point for the +// function containing pc. +func (lineInfo *DebugLineInfo) PCToLine(basePC, pc uint64) (string, int) { if lineInfo == nil { return "", 0 } + if basePC > pc { + panic(fmt.Errorf("basePC after pc %#x %#x", basePC, pc)) + } - var ( - buf = bytes.NewBuffer(lineInfo.Instructions) - sm = newStateMachine(lineInfo) - lastFilename string - lastLineno int - ) - for b, err := buf.ReadByte(); err == nil; b, err = buf.ReadByte() { - findAndExecOpcode(sm, buf, b) - if !sm.valid { - continue + var sm *StateMachine + if basePC == 0 { + sm = newStateMachine(lineInfo, lineInfo.Instructions) + } else { + // Try to use the last state machine that we used for this function, if + // there isn't one or it's already past pc try to clone the cached state + // machine stopped at the entry point of the function. + // As a last resort start from the start of the debug_line section. + sm = lineInfo.lastMachineCache[basePC] + if sm == nil || sm.lastAddress > pc { + sm = lineInfo.stateMachineCache[basePC] + if sm == nil { + sm = newStateMachine(lineInfo, lineInfo.Instructions) + sm.PCToLine(basePC) + lineInfo.stateMachineCache[basePC] = sm + } + sm = sm.copy() + lineInfo.lastMachineCache[basePC] = sm } - if sm.address > pc { - return lastFilename, lastLineno + } + + file, line, _ := sm.PCToLine(pc) + return file, line +} + +func (sm *StateMachine) PCToLine(pc uint64) (string, int, bool) { + if !sm.started { + if err := sm.next(); err != nil { + return "", 0, false } - if sm.address == pc { - return sm.file, sm.line + } + if sm.lastAddress > pc { + return "", 0, false + } + for { + if sm.valid { + if sm.address > pc { + return sm.lastFile, sm.lastLine, true + } + if sm.address == pc { + return sm.file, sm.line, true + } + } + if err := sm.next(); err != nil { + break } - lastFilename, lastLineno = sm.file, sm.line } - return "", 0 + return "", 0, false } // LineToPC returns the first PC address associated with filename:lineno. @@ -195,12 +245,13 @@ func (lineInfo *DebugLineInfo) LineToPC(filename string, lineno int) uint64 { var ( foundFile bool - sm = newStateMachine(lineInfo) - buf = bytes.NewBuffer(lineInfo.Instructions) + sm = newStateMachine(lineInfo, lineInfo.Instructions) ) - for b, err := buf.ReadByte(); err == nil; b, err = buf.ReadByte() { - findAndExecOpcode(sm, buf, b) + for { + if err := sm.next(); err != nil { + break + } if foundFile && sm.file != filename { break } @@ -214,15 +265,30 @@ func (lineInfo *DebugLineInfo) LineToPC(filename string, lineno int) uint64 { return 0 } -func findAndExecOpcode(sm *StateMachine, buf *bytes.Buffer, b byte) { - switch { - case b == 0: - execExtendedOpcode(sm, b, buf) - case b < sm.dbl.Prologue.OpcodeBase: - execStandardOpcode(sm, b, buf) - default: +func (sm *StateMachine) next() error { + sm.started = true + if sm.valid { + sm.lastAddress, sm.lastFile, sm.lastLine = sm.address, sm.file, sm.line + } + b, err := sm.buf.ReadByte() + if err != nil { + return err + } + if int(b) < len(sm.opcodes) { + sm.lastWasStandard = b != 0 + sm.valid = false + sm.opcodes[b](sm, sm.buf) + } else if b < sm.dbl.Prologue.OpcodeBase { + // unimplemented standard opcode, read the number of arguments specified + // in the prologue and do nothing with them + opnum := sm.dbl.Prologue.StdOpLengths[b-1] + for i := 0; i < int(opnum); i++ { + util.DecodeSLEB128(sm.buf) + } + } else { execSpecialOpcode(sm, b) } + return nil } func execSpecialOpcode(sm *StateMachine, instr byte) { @@ -243,28 +309,12 @@ func execSpecialOpcode(sm *StateMachine, instr byte) { sm.valid = true } -func execExtendedOpcode(sm *StateMachine, instr byte, buf *bytes.Buffer) { +func execExtendedOpcode(sm *StateMachine, buf *bytes.Buffer) { _, _ = util.DecodeULEB128(buf) b, _ := buf.ReadByte() - fn, ok := extendedopcodes[b] - if !ok { - panic(fmt.Sprintf("Encountered unknown extended opcode %#v\n", b)) + if fn, ok := extendedopcodes[b]; ok { + fn(sm, buf) } - sm.lastWasStandard = false - sm.valid = false - - fn(sm, buf) -} - -func execStandardOpcode(sm *StateMachine, instr byte, buf *bytes.Buffer) { - fn, ok := standardopcodes[instr] - if !ok { - panic(fmt.Sprintf("Encountered unknown standard opcode %#v\n", instr)) - } - sm.lastWasStandard = true - sm.valid = false - - fn(sm, buf) } func copyfn(sm *StateMachine, buf *bytes.Buffer) { @@ -312,15 +362,6 @@ func fixedadvancepc(sm *StateMachine, buf *bytes.Buffer) { sm.address += uint64(operand) } -func donothing0(sm *StateMachine, buf *bytes.Buffer) { - // does nothing, no operands -} - -func donothing1(sm *StateMachine, buf *bytes.Buffer) { - // does nothing, consumes one operand - util.DecodeSLEB128(buf) -} - func endsequence(sm *StateMachine, buf *bytes.Buffer) { sm.endSeq = true sm.valid = true diff --git a/pkg/proc/bininfo.go b/pkg/proc/bininfo.go index 810405df22e61398817c1388918a05f7d53b3ac8..42aac9dfe293ee9e8e4e7ade34fae9fccba1e7d2 100644 --- a/pkg/proc/bininfo.go +++ b/pkg/proc/bininfo.go @@ -175,7 +175,7 @@ func (bi *BinaryInfo) PCToLine(pc uint64) (string, int, *Function) { if fn == nil { return "", 0, nil } - f, ln := fn.cu.lineInfo.PCToLine(pc) + f, ln := fn.cu.lineInfo.PCToLine(fn.Entry, pc) return f, ln, fn } diff --git a/pkg/proc/proc.go b/pkg/proc/proc.go index e840d569de9123e6e31cf29da06cbdf94a032943..02632a64dbd80c1de319689369d2ec6fa384f25d 100644 --- a/pkg/proc/proc.go +++ b/pkg/proc/proc.go @@ -55,7 +55,7 @@ func FindFunctionLocation(p Process, funcName string, firstLine bool, lineOffset if firstLine { return FirstPCAfterPrologue(p, origfn, false) } else if lineOffset > 0 { - filename, lineno := origfn.cu.lineInfo.PCToLine(origfn.Entry) + filename, lineno := origfn.cu.lineInfo.PCToLine(origfn.Entry, origfn.Entry) breakAddr, _, err := bi.LineToPC(filename, lineno+lineOffset) return breakAddr, err }