Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(observability) Trace and log everything #438

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
034856c
Pass config for collector tracing to sats
javorszky May 12, 2023
9c6a536
Add curl to the e2core image
javorszky May 15, 2023
14feee6
Add the process ID to the returned arguments, use it in logs
javorszky May 15, 2023
3562751
Adjust how often things happen and how much timeouts we have
javorszky May 15, 2023
4c6dd1e
Break up parsing environment variables and constructing options into …
javorszky May 15, 2023
ae99094
Point sat to the correct collector
javorszky May 15, 2023
ad64625
Guard against a nil pointer on shutdown if source server doesn't exist
javorszky May 15, 2023
1102d55
Fix an import shadow variable issue
javorszky May 15, 2023
da8160d
Move putting a job on a syncer as the same step
javorszky May 15, 2023
b1beb8f
Add logger to pod
javorszky May 15, 2023
f24c273
Make an error chan buffered
javorszky May 15, 2023
1cedf20
Rework hub scan for failed messages loop
javorszky May 15, 2023
b757de7
Absolute metric ton of logging adds and adjusts
javorszky May 15, 2023
fa1f247
Fix tracing in e2core
javorszky May 15, 2023
a98dda7
Add spans to websocket transport handlerfunc
javorszky May 15, 2023
1d2c476
Add tracing to most of e2core
javorszky May 15, 2023
b31f994
Add request ID to main trace in handler func
javorszky May 16, 2023
4fd8d7e
Add a way to propagate trace data between service boundaries on message
javorszky May 16, 2023
6dbb514
Add contexts and start spans to connect the dots
javorszky May 16, 2023
7d8819c
Add context to the Job type
javorszky May 16, 2023
7b7b506
Trace all the way down to execution
javorszky May 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ RUN groupadd -g 999 e2core && \
chown -R e2core /home/e2core && \
chmod -R 700 /home/e2core
RUN apt-get update \
&& apt-get install -y ca-certificates
&& apt-get install -y ca-certificates \
&& apt-get install -y curl

# e2core binary
COPY --from=builder /go/src/github.com/suborbital/e2core/.bin/e2core /usr/local/bin/
Expand Down
6 changes: 6 additions & 0 deletions e2core/auth/access.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/suborbital/e2core/e2core/options"
"github.com/suborbital/e2core/foundation/common"
"github.com/suborbital/e2core/foundation/tracing"
"github.com/suborbital/systemspec/system"
)

Expand All @@ -28,6 +29,11 @@ func AuthorizationMiddleware(opts *options.Options) echo.MiddlewareFunc {

return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
ctx, span := tracing.Tracer.Start(c.Request().Context(), "authorization-middleware")
defer span.End()

c.SetRequest(c.Request().WithContext(ctx))

identifier := c.Param("ident")
namespace := c.Param("namespace")
name := c.Param("name")
Expand Down
6 changes: 3 additions & 3 deletions e2core/backend/satbackend/exec/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type WaitFunc func() error

// Run runs a command, outputting to terminal and returning the full output and/or error
// a channel is returned which, when sent on, will terminate the process that was started
func Run(cmd []string, env ...string) (string, context.CancelCauseFunc, WaitFunc, error) {
func Run(cmd []string, env ...string) (string, int, context.CancelCauseFunc, WaitFunc, error) {
procUUID := uuid.New().String()
uuidEnv := fmt.Sprintf("%s_UUID=%s", strings.ToUpper(cmd[0]), procUUID)
env = append(env, uuidEnv)
Expand All @@ -33,10 +33,10 @@ func Run(cmd []string, env ...string) (string, context.CancelCauseFunc, WaitFunc

err := command.Start()
if err != nil {
return "", nil, nil, errors.Wrap(err, "command.Start()")
return "", -1, nil, nil, errors.Wrap(err, "command.Start()")
}

return procUUID, cxl, command.Wait, nil
return procUUID, command.Process.Pid, cxl, command.Wait, nil
}

// this is unused but we may want to do logging-to-speficig-directory some time in the
Expand Down
39 changes: 32 additions & 7 deletions e2core/backend/satbackend/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (o *Orchestrator) Start() error {

var err error

ticker := time.NewTicker(time.Second)
ticker := time.NewTicker(5 * time.Second)
loop:
for {
select {
Expand Down Expand Up @@ -144,11 +144,15 @@ func (o *Orchestrator) reconcileConstellation(syncer *syncer.Syncer) {
}

// repeat forever in case the command does error out
processUUID, cxl, wait, err := exec.Run(
processUUID, pid, cxl, wait, err := exec.Run(
cmd,
"SAT_HTTP_PORT="+port,
"SAT_CONTROL_PLANE="+o.opts.ControlPlane,
"SAT_CONNECTIONS="+connectionsEnv,
"SAT_TRACER_TYPE=collector",
"SAT_TRACER_SERVICENAME=e2core_bebby-"+port,
"SAT_TRACER_PROBABILITY=1",
"SAT_TRACER_COLLECTOR_ENDPOINT=collector:4317",
)
if err != nil {
ll.Err(err).Str("moduleFQMN", module.FQMN).Msg("exec.Run failed for sat instance")
Expand All @@ -158,20 +162,41 @@ func (o *Orchestrator) reconcileConstellation(syncer *syncer.Syncer) {
go func() {
err := wait()
if err != nil {
ll.Err(err).Str("moduleFQMN", module.FQMN).Str("port", port).Msg("calling waitfunc for the module failed")
ll.Err(err).
Str("moduleFQMN", module.FQMN).
Str("port", port).
Int("pid", pid).
Str("uuid", processUUID).
Msg("waitfunc returned with an error")
}

ll.Info().
Str("moduleFQMN", module.FQMN).
Str("port", port).
Int("pid", pid).
Str("uuid", processUUID).
Msg("adding port to dead list")

err = satWatcher.addToDead(port)
if err != nil {
ll.Err(err).Str("moduleFQMN", module.FQMN).Str("port", port).Msg("adding the port to the dead list")
ll.Err(err).
Str("moduleFQMN", module.FQMN).
Str("port", port).
Int("pid", pid).
Str("uuid", processUUID).
Msg("adding the port to the dead list failed")
}

ll.Info().Str("moduleFQMN", module.FQMN).Str("port", port).Msg("added port to dead list")
}()

satWatcher.add(module.FQMN, port, processUUID, cxl)
satWatcher.add(module.FQMN, port, processUUID, pid, cxl)

ll.Debug().Str("moduleFQMN", module.FQMN).Str("port", port).Msg("successfully started sat")
ll.Info().
Str("moduleFQMN", module.FQMN).
Str("port", port).
Int("pid", pid).
Str("uuid", processUUID).
Msg("successfully started sat")
}

// we want to max out at 8 threads per instance
Expand Down
4 changes: 3 additions & 1 deletion e2core/backend/satbackend/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type instance struct {
fqmn string
metrics *MetricsResponse
uuid string
pid int
cxl context.CancelCauseFunc
}

Expand Down Expand Up @@ -76,7 +77,7 @@ func (w *watcher) addToDead(port string) error {
}

// add inserts a new instance to the watched pool.
func (w *watcher) add(fqmn, port, uuid string, cxl context.CancelCauseFunc) {
func (w *watcher) add(fqmn, port, uuid string, pid int, cxl context.CancelCauseFunc) {
w.log.Info().Str("port", port).Str("fqmn", fqmn).Msg("adding one to the waitgroup port")
w.instancesRunning.Add(1)

Expand All @@ -88,6 +89,7 @@ func (w *watcher) add(fqmn, port, uuid string, cxl context.CancelCauseFunc) {
w.instances[port] = &instance{
fqmn: fqmn,
uuid: uuid,
pid: pid,
cxl: cxl,
}
}
Expand Down
31 changes: 24 additions & 7 deletions e2core/command/mod_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@ import (

"github.com/pkg/errors"
"github.com/rs/zerolog"
"github.com/sethvargo/go-envconfig"
"github.com/spf13/cobra"

"github.com/suborbital/e2core/e2core/release"
"github.com/suborbital/e2core/foundation/tracing"
"github.com/suborbital/e2core/sat/sat"
"github.com/suborbital/e2core/sat/sat/metrics"
satOptions "github.com/suborbital/e2core/sat/sat/options"
)

func ModStart() *cobra.Command {
Expand All @@ -30,13 +33,23 @@ func ModStart() *cobra.Command {
path = args[0]
}

opts, err := satOptions.Resolve(envconfig.OsLookuper())
if err != nil {
return errors.Wrap(err, "options.Resolve")
}

zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
l := zerolog.New(os.Stderr).With().
Timestamp().
Str("command", "mod start").
Logger().Level(zerolog.InfoLevel)

config, err := sat.ConfigFromModuleArg(l, path)
Str("port", string(opts.Port)).
Str("procuuid", string(opts.ProcUUID)).
Int("pid", os.Getpid()).
Int("ppid", os.Getppid()).
Str("mode", "bebby").
Str("fqmn", path).
Logger()

config, err := sat.ConfigFromModuleArg(l, opts, path)
if err != nil {
return errors.Wrap(err, "failed to ConfigFromModuleArg")
}
Expand All @@ -49,14 +62,18 @@ func ModStart() *cobra.Command {
}
if httpPort > 0 {
config.Port = httpPort
l.Debug().Int("port", httpPort).Msg(fmt.Sprintf("Using port :%d for the sat backend", httpPort))
l.Info().Int("port", httpPort).Msg(fmt.Sprintf("Using port :%d for the sat backend", httpPort))
}

traceProvider, err := sat.SetupTracing(config.TracerConfig, l)
l.Info().Interface("sdkTrace-config", config.TracerConfig).Msg("this is the sdkTrace config we're using")

traceProvider, err := tracing.SetupTracing(config.TracerConfig, l)
if err != nil {
return errors.Wrap(err, "setup tracing")
}

l.Info().Msg("successfully set up tracing")

mctx, mcancel := context.WithTimeout(context.Background(), 5*time.Second)
defer mcancel()

Expand All @@ -67,7 +84,7 @@ func ModStart() *cobra.Command {

defer traceProvider.Shutdown(context.Background())

satInstance, err := sat.New(config, l, traceProvider, mtx)
satInstance, err := sat.New(config, l, mtx)
if err != nil {
return errors.Wrap(err, "failed to sat.New")
}
Expand Down
16 changes: 9 additions & 7 deletions e2core/command/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
)

const (
shutdownWaitTime = time.Second * 3
shutdownWaitTime = time.Second * 10
)

func Start() *cobra.Command {
Expand Down Expand Up @@ -116,8 +116,10 @@ func Start() *cobra.Command {
return errors.Wrap(err, "srv.Shutdown")
}

if err := sourceSrv.Shutdown(ctx); err != nil {
return errors.Wrap(err, "sourceSrv.Shutdown")
if sourceSrv != nil {
if err := sourceSrv.Shutdown(ctx); err != nil {
return errors.Wrap(err, "sourceSrv.Shutdown")
}
}

backend.Shutdown()
Expand All @@ -141,7 +143,7 @@ func setupLogger() zerolog.Logger {

logger := zerolog.New(os.Stderr).With().
Timestamp().
Str("command", "start").
Str("mode", "mothership").
Str("version", release.Version).
Logger().Level(zerolog.InfoLevel)

Expand Down Expand Up @@ -197,14 +199,14 @@ func setupSourceServer(logger zerolog.Logger, opts *options.Options) (*echo.Echo

ll.Debug().Msg("creating sourceserver from bundle: " + opts.BundlePath)

server, err := sourceserver.FromBundle(opts.BundlePath)
sourceSrv, err := sourceserver.FromBundle(opts.BundlePath)
if err != nil {
return nil, errors.Wrap(err, "failed to sourceserver.FromBundle")
}

server.HideBanner = true
sourceSrv.HideBanner = true

return server, nil
return sourceSrv, nil
}

// a nil server is ok if we don't need to run one
Expand Down
35 changes: 32 additions & 3 deletions e2core/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (

"github.com/pkg/errors"
"github.com/sethvargo/go-envconfig"

"github.com/suborbital/e2core/foundation/tracing"
)

const (
Expand All @@ -29,7 +31,8 @@ type Options struct {
Domain string `env:"E2CORE_DOMAIN"`
HTTPPort int `env:"E2CORE_HTTP_PORT,default=8080"`
TLSPort int `env:"E2CORE_TLS_PORT,default=443"`
TracerConfig TracerConfig `env:",prefix=E2CORE_TRACER_"`
EnvTracerConfig TracerConfig `env:",prefix=E2CORE_TRACER_"`
TracerConfig tracing.Config
}

// TracerConfig holds values specific to setting up the tracer. It's only used in proxy mode. All configuration options
Expand Down Expand Up @@ -149,11 +152,37 @@ func (o *Options) finalize() error {

o.Features = envOpts.Features
o.EnvironmentToken = ""
o.TracerConfig = TracerConfig{}
o.EnvTracerConfig = TracerConfig{}
o.StaticPeers = envOpts.StaticPeers

o.EnvironmentToken = envOpts.EnvironmentToken
o.TracerConfig = envOpts.TracerConfig
o.EnvTracerConfig = envOpts.EnvTracerConfig

tc := tracing.Config{
ServiceName: envOpts.EnvTracerConfig.ServiceName,
Probability: envOpts.EnvTracerConfig.Probability,
}

switch envOpts.EnvTracerConfig.TracerType {
case "collector":
tc.Type = tracing.ExporterCollector
case "honeycomb":
tc.Type = tracing.ExporterHoneycomb
}

if envOpts.EnvTracerConfig.HoneycombConfig != nil {
tc.Honeycomb = tracing.HoneycombConfig{
Endpoint: envOpts.EnvTracerConfig.HoneycombConfig.Endpoint,
APIKey: envOpts.EnvTracerConfig.HoneycombConfig.APIKey,
Dataset: envOpts.EnvTracerConfig.HoneycombConfig.Dataset,
}
}

if envOpts.EnvTracerConfig.Collector != nil {
tc.Collector = tracing.CollectorConfig{Endpoint: envOpts.EnvTracerConfig.Collector.Endpoint}
}

o.TracerConfig = tc

return nil
}
Loading