Skip to content

Commit

Permalink
Capture Go errors automatically
Browse files Browse the repository at this point in the history
  • Loading branch information
marctc committed Jul 16, 2024
1 parent 27dc018 commit b4f9c50
Show file tree
Hide file tree
Showing 68 changed files with 590 additions and 144 deletions.
27 changes: 27 additions & 0 deletions bpf/errors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef __ERRORS_H_
#define __ERRORS_H_

#ifndef TASK_COMM_LEN
#define TASK_COMM_LEN 16
#endif

#ifndef ERR_MSG_LEN
#define ERR_MSG_LEN 128
#endif

#ifndef MAX_STACK_DEPTH
#define MAX_STACK_DEPTH 32
#endif

typedef __u64 stack_trace_t[MAX_STACK_DEPTH];

typedef struct error_event {
__u32 pid;
__u32 cpu_id;
char comm[TASK_COMM_LEN];
__s32 ustack_sz;
stack_trace_t ustack;
u8 err_msg[ERR_MSG_LEN];
} error_event;

#endif /* __ERRORS_H_ */
84 changes: 83 additions & 1 deletion bpf/go_nethttp.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "tracing.h"
#include "hpack.h"
#include "ringbuf.h"
#include "errors.h"

typedef struct http_func_invocation {
u64 start_monotime_ns;
Expand All @@ -46,6 +47,13 @@ struct {
__uint(max_entries, MAX_CONCURRENT_REQUESTS);
} ongoing_http_client_requests SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, void *); // key: pointer to the request goroutine
__type(value, struct error_event);
__uint(max_entries, MAX_CONCURRENT_REQUESTS);
} last_error SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, void *); // key: pointer to the request goroutine
Expand Down Expand Up @@ -198,6 +206,9 @@ int uprobe_ServeHTTPReturns(struct pt_regs *ctx) {
resp_ptr = deref_resp_ptr;
}

struct error_event *error = bpf_map_lookup_elem(&last_error, &goroutine_addr);
bpf_map_delete_elem(&last_error, &goroutine_addr);

http_request_trace *trace = bpf_ringbuf_reserve(&events, sizeof(http_request_trace), 0);
if (!trace) {
bpf_dbg_printk("can't reserve space in the ringbuffer");
Expand All @@ -208,6 +219,8 @@ int uprobe_ServeHTTPReturns(struct pt_regs *ctx) {
trace->type = EVENT_HTTP_REQUEST;
trace->start_monotime_ns = invocation->start_monotime_ns;
trace->end_monotime_ns = bpf_ktime_get_ns();
if (error)
trace->error = *error;

goroutine_metadata *g_metadata = bpf_map_lookup_elem(&ongoing_goroutines, &goroutine_addr);
if (g_metadata) {
Expand Down Expand Up @@ -436,6 +449,75 @@ int uprobe_roundTripReturn(struct pt_regs *ctx) {
return 0;
}


SEC("uprobe/error")
int uprobe_error(struct pt_regs *ctx) {
bpf_dbg_printk("=== uprobe/proc error === ");

void *goroutine_addr = GOROUTINE_PTR(ctx);
bpf_dbg_printk("goroutine_addr %lx", goroutine_addr);

int pid = bpf_get_current_pid_tgid() >> 32;
int cpu_id = bpf_get_smp_processor_id();
int BPF_F_USER_STACK = (1ULL << 8);
struct error_event event = {
.pid = pid,
.cpu_id = cpu_id,
};

if (bpf_get_current_comm(event.comm, sizeof(event.comm)))
event.comm[0] = 0;

// Read the stack trace
event.ustack_sz = bpf_get_stack(ctx, event.ustack, sizeof(event.ustack), BPF_F_USER_STACK);

// Get the caller of the error function and store it in the first slot of the stack
void *sp_caller = STACK_PTR(ctx);
u64 caller = 0;
bpf_probe_read(&caller, sizeof(u64), sp_caller);
bpf_dbg_printk("sp_caller %lx caller %lx", sp_caller, caller);
event.ustack[0] = caller;

// Write event
if (bpf_map_update_elem(&last_error, &goroutine_addr, &event, BPF_ANY)) {
bpf_dbg_printk("can't update event error map element");
}
return 0;
}

SEC("uprobe/error_return")
int uprobe_errorReturn(struct pt_regs *ctx) {
bpf_dbg_printk("=== uprobe/proc error return === ");

void *goroutine_addr = GOROUTINE_PTR(ctx);
bpf_dbg_printk("goroutine_addr %lx", goroutine_addr);

error_event *event = bpf_map_lookup_elem(&last_error, &goroutine_addr);
if (event == NULL) {
bpf_dbg_printk("can't read error event");
return 0;
}

// Read the error message
// GO_PARAM1(ctx) is the pointer to the error message
// GO_PARAM2(ctx) is the length of the error message
void *msg_ptr = GO_PARAM1(ctx);
u64 len = (u64)GO_PARAM2(ctx);
u64 max_size = sizeof(event->err_msg);
u64 size = max_size < len ? max_size : len;
bpf_probe_read(&event->err_msg, size, msg_ptr);
if (size < max_size) {
((char *)event->err_msg)[size] = 0;
}
bpf_dbg_printk("error msg %llx, %s", msg_ptr, event->err_msg);

// Write event
if (bpf_map_update_elem(&last_error, &goroutine_addr, event, BPF_ANY)) {
bpf_dbg_printk("can't update event error map element");
}
return 0;
}

#ifndef NO_HEADER_PROPAGATION
// Context propagation through HTTP headers
SEC("uprobe/header_writeSubset")
Expand Down Expand Up @@ -938,4 +1020,4 @@ int uprobe_queryReturn(struct pt_regs *ctx) {
bpf_dbg_printk("can't reserve space in the ringbuffer");
}
return 0;
}
}
1 change: 0 additions & 1 deletion bpf/go_nethttp.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,4 @@ volatile const u64 rwc_conn_pos;
volatile const u64 rws_conn_pos;
volatile const u64 http2_server_conn_pos;
volatile const u64 cc_tconn_pos;

#endif
1 change: 1 addition & 0 deletions bpf/go_str.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define GO_STR_H

#include "utils.h"
#include "bpf_dbg.h"

static __always_inline int read_go_str_n(char *name, void *base_ptr, u64 len, void *field, u64 max_size) {
u64 size = max_size < len ? max_size : len;
Expand Down
3 changes: 2 additions & 1 deletion bpf/headers/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
// In x86, current goroutine is pointed by r14, according to
// https://go.googlesource.com/go/+/refs/heads/dev.regabi/src/cmd/compile/internal-abi.md#amd64-architecture
#define GOROUTINE_PTR(x) ((void*)(x)->r14)

#define STACK_PTR(x) ((void*)(x)->sp)
#elif defined(__TARGET_ARCH_arm64)

#define GO_PARAM1(x) ((void*)((PT_REGS_ARM64 *)(x))->regs[0])
Expand All @@ -50,6 +50,7 @@
// In arm64, current goroutine is pointed by R28 according to
// https://github.com/golang/go/blob/master/src/cmd/compile/abi-internal.md#arm64-architecture
#define GOROUTINE_PTR(x) ((void*)((PT_REGS_ARM64 *)(x))->regs[28])
#define STACK_PTR(x) ((void*)((PT_REGS_ARM64 *)(x))->regs[13])

#endif /*defined(__TARGET_ARCH_arm64)*/

Expand Down
2 changes: 2 additions & 0 deletions bpf/http_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "pid_types.h"
#include "utils.h"
#include "errors.h"
#include "http_types.h"

#define PATH_MAX_LEN 100
Expand All @@ -39,6 +40,7 @@ typedef struct http_request_trace_t {
u16 status;
connection_info_t conn __attribute__ ((aligned (8)));
s64 content_length;
error_event error;
tp_info_t tp;

pid_info pid;
Expand Down
11 changes: 6 additions & 5 deletions pkg/beyla/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,12 @@ var DefaultConfig = Config{
TTL: defaultMetricsTTL,
},
Traces: otel.TracesConfig{
Protocol: otel.ProtocolUnset,
TracesProtocol: otel.ProtocolUnset,
MaxQueueSize: 4096,
MaxExportBatchSize: 4096,
ReportersCacheLen: ReporterLRUSize,
Protocol: otel.ProtocolUnset,
TracesProtocol: otel.ProtocolUnset,
MaxQueueSize: 4096,
MaxExportBatchSize: 4096,
ReportersCacheLen: ReporterLRUSize,
ReportExceptionEvents: false,
Instrumentations: []string{
instrumentations.InstrumentationALL,
},
Expand Down
2 changes: 1 addition & 1 deletion pkg/internal/discover/typer.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (t *typer) inspectOffsets(execElf *exec.FileInfo) (*goexec.Offsets, bool, e
t.log.Debug("skipping inspection for Go functions", "pid", execElf.Pid, "comm", execElf.CmdExePath)
} else {
t.log.Debug("inspecting", "pid", execElf.Pid, "comm", execElf.CmdExePath)
offsets, err := goexec.InspectOffsets(execElf, t.allGoFunctions)
offsets, err := goexec.InspectOffsets(&t.cfg.Traces, execElf, t.allGoFunctions)
if err != nil {
t.log.Debug("couldn't find go specific tracers", "error", err)
return nil, false, err
Expand Down
Binary file added pkg/internal/ebpf/common/bpf_bpf.o
Binary file not shown.
11 changes: 10 additions & 1 deletion pkg/internal/ebpf/common/bpf_bpfel_arm64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified pkg/internal/ebpf/common/bpf_bpfel_arm64.o
Binary file not shown.
11 changes: 10 additions & 1 deletion pkg/internal/ebpf/common/bpf_bpfel_x86.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified pkg/internal/ebpf/common/bpf_bpfel_x86.o
Binary file not shown.
3 changes: 1 addition & 2 deletions pkg/internal/ebpf/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ func ReadBPFTraceAsSpan(record *ringbuf.Record, filter ServiceFilter) (request.S
if err != nil {
return request.Span{}, true, err
}

return HTTPRequestTraceToSpan(&event), false, nil
return HTTPRequestTraceToSpan(&event, filter), false, nil
}

func ReadSQLRequestTraceAsSpan(record *ringbuf.Record) (request.Span, bool, error) {
Expand Down
31 changes: 26 additions & 5 deletions pkg/internal/ebpf/common/pids.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ebpfcommon

import (
"debug/gosym"
"log/slog"
"sync"

Expand Down Expand Up @@ -31,11 +32,12 @@ type PIDInfo struct {
}

type ServiceFilter interface {
AllowPID(uint32, uint32, svc.ID, PIDType)
AllowPID(uint32, uint32, svc.ID, PIDType, *gosym.Table)
BlockPID(uint32, uint32)
ValidPID(uint32, uint32, PIDType) bool
Filter(inputSpans []request.Span) []request.Span
CurrentPIDs(PIDType) map[uint32]map[uint32]svc.ID
GetSymTab(uint32) *gosym.Table
}

// PIDsFilter keeps a thread-safe copy of the PIDs whose traces are allowed to
Expand All @@ -44,6 +46,7 @@ type ServiceFilter interface {
type PIDsFilter struct {
log *slog.Logger
current map[uint32]map[uint32]PIDInfo
symTabs map[uint32]*gosym.Table
mux *sync.RWMutex
}

Expand All @@ -55,6 +58,7 @@ func NewPIDsFilter(log *slog.Logger) *PIDsFilter {
log: log,
current: map[uint32]map[uint32]PIDInfo{},
mux: &sync.RWMutex{},
symTabs: make(map[uint32]*gosym.Table),
}
}

Expand All @@ -73,16 +77,17 @@ func CommonPIDsFilter(systemWide bool) ServiceFilter {
return commonPIDsFilter
}

func (pf *PIDsFilter) AllowPID(pid, ns uint32, svc svc.ID, pidType PIDType) {
func (pf *PIDsFilter) AllowPID(pid, ns uint32, svc svc.ID, pidType PIDType, symTab *gosym.Table) {
pf.mux.Lock()
defer pf.mux.Unlock()
pf.addPID(pid, ns, svc, pidType)
pf.addPID(pid, ns, svc, pidType, symTab)
}

func (pf *PIDsFilter) BlockPID(pid, ns uint32) {
pf.mux.Lock()
defer pf.mux.Unlock()
pf.removePID(pid, ns)

}

func (pf *PIDsFilter) ValidPID(userPID, ns uint32, pidType PIDType) bool {
Expand Down Expand Up @@ -151,13 +156,24 @@ func (pf *PIDsFilter) Filter(inputSpans []request.Span) []request.Span {
return outputSpans
}

func (pf *PIDsFilter) addPID(pid, nsid uint32, s svc.ID, t PIDType) {
func (pf *PIDsFilter) GetSymTab(pid uint32) *gosym.Table {
pf.mux.RLock()
defer pf.mux.RUnlock()
return pf.symTabs[pid]
}

func (pf *PIDsFilter) addPID(pid, nsid uint32, s svc.ID, t PIDType, symTab *gosym.Table) {
ns, nsExists := pf.current[nsid]
if !nsExists {
ns = make(map[uint32]PIDInfo)
pf.current[nsid] = ns
}

_, stExists := pf.symTabs[pid]
if !stExists {
pf.symTabs[pid] = symTab
}

allPids, err := readNamespacePIDs(int32(pid))

if err != nil {
Expand All @@ -180,13 +196,14 @@ func (pf *PIDsFilter) removePID(pid, nsid uint32) {
if len(ns) == 0 {
delete(pf.current, nsid)
}
delete(pf.symTabs, pid)
}

// IdentityPidsFilter is a PIDsFilter that does not filter anything. It is feasible
// for system-wide instrumenation
type IdentityPidsFilter struct{}

func (pf *IdentityPidsFilter) AllowPID(_ uint32, _ uint32, _ svc.ID, _ PIDType) {}
func (pf *IdentityPidsFilter) AllowPID(_ uint32, _ uint32, _ svc.ID, _ PIDType, _ *gosym.Table) {}

func (pf *IdentityPidsFilter) BlockPID(_ uint32, _ uint32) {}

Expand All @@ -206,6 +223,10 @@ func (pf *IdentityPidsFilter) Filter(inputSpans []request.Span) []request.Span {
return inputSpans
}

func (pf *IdentityPidsFilter) GetSymTab(_ uint32) *gosym.Table {
return nil
}

func serviceInfo(pid uint32) svc.ID {
cached, ok := activePids.Get(pid)
if ok {
Expand Down
Loading

0 comments on commit b4f9c50

Please sign in to comment.