From 66fe21028ed30ab058b6883a01a19f47a77302d3 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 9 Dec 2024 17:00:55 -0500 Subject: [PATCH 01/22] factor out runPlatformProvisioning, setupProvisioningPaths, ignoredSignal (to platform-specific implementations) --- cmd/viam-agent/main.go | 90 +++++++--------------------- cmd/viam-agent/subsystems_linux.go | 83 +++++++++++++++++++++++++ cmd/viam-agent/subsystems_windows.go | 16 +++++ 3 files changed, 120 insertions(+), 69 deletions(-) create mode 100644 cmd/viam-agent/subsystems_linux.go create mode 100644 cmd/viam-agent/subsystems_windows.go diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index ccccb74..63eecf1 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "fmt" - "io/fs" "os" "os/exec" "os/signal" @@ -19,8 +18,6 @@ import ( "github.com/nightlyone/lockfile" "github.com/pkg/errors" "github.com/viamrobotics/agent" - "github.com/viamrobotics/agent/subsystems/provisioning" - _ "github.com/viamrobotics/agent/subsystems/syscfg" "github.com/viamrobotics/agent/subsystems/viamagent" "github.com/viamrobotics/agent/subsystems/viamserver" "go.viam.com/rdk/logging" @@ -34,6 +31,18 @@ var ( globalLogger = logging.NewLogger("viam-agent") ) +//nolint:lll +type agentOpts struct { + Config string `default:"/etc/viam.json" description:"Path to config file" long:"config" short:"c"` + ProvisioningConfig string `default:"/etc/viam-provisioning.json" description:"Path to provisioning (customization) config file" long:"provisioning" short:"p"` + Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` + Fast bool `description:"Enable fast start mode" env:"VIAM_AGENT_FAST_START" long:"fast" short:"f"` + Help bool `description:"Show this help message" long:"help" short:"h"` + Version bool `description:"Show version" long:"version" short:"v"` + Install bool `description:"Install systemd service" long:"install"` + DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` +} + //nolint:gocognit func main() { ctx, cancel := setupExitSignalHandling() @@ -43,17 +52,7 @@ func main() { activeBackgroundWorkers.Wait() }() - //nolint:lll - var opts struct { - Config string `default:"/etc/viam.json" description:"Path to config file" long:"config" short:"c"` - ProvisioningConfig string `default:"/etc/viam-provisioning.json" description:"Path to provisioning (customization) config file" long:"provisioning" short:"p"` - Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` - Fast bool `description:"Enable fast start mode" env:"VIAM_AGENT_FAST_START" long:"fast" short:"f"` - Help bool `description:"Show this help message" long:"help" short:"h"` - Version bool `description:"Show version" long:"version" short:"v"` - Install bool `description:"Install systemd service" long:"install"` - DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` - } + var opts agentOpts parser := flags.NewParser(&opts, flags.IgnoreUnknown) parser.Usage = "runs as a background service and manages updates and the process lifecycle for viam-server." @@ -117,63 +116,16 @@ func main() { } }() - // pass the provisioning path arg to the subsystem - absProvConfigPath, err := filepath.Abs(opts.ProvisioningConfig) - exitIfError(err) - provisioning.ProvisioningConfigFilePath = absProvConfigPath - globalLogger.Infof("provisioning config file path: %s", absProvConfigPath) - - // tie the manager config to the viam-server config - absConfigPath, err := filepath.Abs(opts.Config) - exitIfError(err) - viamserver.ConfigFilePath = absConfigPath - provisioning.AppConfigFilePath = absConfigPath - globalLogger.Infof("config file path: %s", absConfigPath) + absConfigPath := setupProvisioningPaths(opts) // main manager structure manager, err := agent.NewManager(ctx, globalLogger) exitIfError(err) - err = manager.LoadConfig(absConfigPath) + loadConfigErr := manager.LoadConfig(absConfigPath) //nolint:nestif - if err != nil { - // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. - // Rename the file (if it exists) and wait to provision a new one. - if !errors.Is(err, fs.ErrNotExist) { - if err := os.Rename(absConfigPath, absConfigPath+".old"); err != nil { - // if we can't rename the file, we're up a creek, and it's fatal - globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", absConfigPath)) - globalLogger.Error("unable to continue with provisioning, exiting") - manager.CloseAll() - return - } - } - - // We manually start the provisioning service to allow the user to update it and wait. - // The user may be updating it soon, so better to loop quietly than to exit and let systemd keep restarting infinitely. - globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", absConfigPath) - - if err := manager.StartSubsystem(ctx, provisioning.SubsysName); err != nil { - if errors.Is(err, agent.ErrSubsystemDisabled) { - globalLogger.Warn("provisioning subsystem disabled, please manually update /etc/viam.json and connect to internet") - } else { - globalLogger.Error(errors.Wrapf(err, - "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) - manager.CloseAll() - return - } - } - - for { - globalLogger.Warn("waiting for user provisioning") - if !utils.SelectContextOrWait(ctx, time.Second*10) { - manager.CloseAll() - return - } - if err := manager.LoadConfig(absConfigPath); err == nil { - break - } - } + if loadConfigErr != nil { + runPlatformProvisioning(ctx, manager, loadConfigErr, absConfigPath) } netAppender, err := manager.CreateNetAppender() if err != nil { @@ -268,12 +220,11 @@ func setupExitSignalHandling() (context.Context, func()) { // this will eventually be handled elsewhere as a restart, not exit case syscall.SIGHUP: - // ignore SIGURG entirely, it's used for real-time scheduling notifications - case syscall.SIGURG: - // log everything else default: - globalLogger.Debugw("received unknown signal", "signal", sig) + if !ignoredSignal(sig) { + globalLogger.Debugw("received unknown signal", "signal", sig) + } } } }() @@ -282,6 +233,7 @@ func setupExitSignalHandling() (context.Context, func()) { return ctx, cancel } +// helper to log.Fatal if error is non-nil. func exitIfError(err error) { if err != nil { globalLogger.Fatal(err) diff --git a/cmd/viam-agent/subsystems_linux.go b/cmd/viam-agent/subsystems_linux.go new file mode 100644 index 0000000..fca14a6 --- /dev/null +++ b/cmd/viam-agent/subsystems_linux.go @@ -0,0 +1,83 @@ +package main + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "syscall" + "time" + + "github.com/pkg/errors" + "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/subsystems/provisioning" + // register-only. + _ "github.com/viamrobotics/agent/subsystems/syscfg" + "github.com/viamrobotics/agent/subsystems/viamserver" + "go.viam.com/utils" +) + +// platform-specific provisioning logic. +func runPlatformProvisioning(ctx context.Context, manager *agent.Manager, loadConfigErr error, absConfigPath string) { + // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. + // Rename the file (if it exists) and wait to provision a new one. + if !errors.Is(loadConfigErr, fs.ErrNotExist) { + if err := os.Rename(absConfigPath, absConfigPath+".old"); err != nil { + // if we can't rename the file, we're up a creek, and it's fatal + globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", absConfigPath)) + globalLogger.Error("unable to continue with provisioning, exiting") + manager.CloseAll() + return + } + } + + // We manually start the provisioning service to allow the user to update it and wait. + // The user may be updating it soon, so better to loop quietly than to exit and let systemd keep restarting infinitely. + globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", absConfigPath) + + if err := manager.StartSubsystem(ctx, provisioning.SubsysName); err != nil { + if errors.Is(err, agent.ErrSubsystemDisabled) { + globalLogger.Warn("provisioning subsystem disabled, please manually update /etc/viam.json and connect to internet") + } else { + globalLogger.Error(errors.Wrapf(err, + "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) + manager.CloseAll() + return + } + } + + for { + globalLogger.Warn("waiting for user provisioning") + if !utils.SelectContextOrWait(ctx, time.Second*10) { + manager.CloseAll() + return + } + if err := manager.LoadConfig(absConfigPath); err == nil { + break + } + } +} + +// platform-specific path setup. +func setupProvisioningPaths(opts agentOpts) string { + // pass the provisioning path arg to the subsystem + absProvConfigPath, err := filepath.Abs(opts.ProvisioningConfig) + exitIfError(err) + provisioning.ProvisioningConfigFilePath = absProvConfigPath + globalLogger.Infof("provisioning config file path: %s", absProvConfigPath) + + // tie the manager config to the viam-server config + absConfigPath, err := filepath.Abs(opts.Config) + exitIfError(err) + viamserver.ConfigFilePath = absConfigPath + provisioning.AppConfigFilePath = absConfigPath + globalLogger.Infof("config file path: %s", absConfigPath) + + return absConfigPath +} + +// return true if this error is safe to ignore on this platform. +func ignoredSignal(sig os.Signal) bool { + // ignore SIGURG entirely, it's used for real-time scheduling notifications + return sig == syscall.SIGURG +} diff --git a/cmd/viam-agent/subsystems_windows.go b/cmd/viam-agent/subsystems_windows.go new file mode 100644 index 0000000..7fa94f9 --- /dev/null +++ b/cmd/viam-agent/subsystems_windows.go @@ -0,0 +1,16 @@ +package main + +import ( + "context" + "os" + + "github.com/viamrobotics/agent" +) + +func runPlatformProvisioning(ctx context.Context, manager *agent.Manager, loadConfigErr error, absConfigPath string) { + panic("todo fancy error for provisioning on windows") +} + +func setupProvisioningPaths(opts agentOpts) string { return "" } + +func ignoredSignal(sig os.Signal) bool { return false } From 758477b2ccc77854ace8a1e0b73a143b60732d3a Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 9 Dec 2024 18:14:18 -0500 Subject: [PATCH 02/22] utils.go platform imp --- utils.go | 23 ++--------------------- utils_linux.go | 37 +++++++++++++++++++++++++++++++++++++ utils_windows.go | 25 +++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 21 deletions(-) create mode 100644 utils_linux.go create mode 100644 utils_windows.go diff --git a/utils.go b/utils.go index 4872259..d4400f4 100644 --- a/utils.go +++ b/utils.go @@ -17,12 +17,10 @@ import ( "path" "path/filepath" "strings" - "syscall" "time" errw "github.com/pkg/errors" "github.com/ulikunitz/xz" - "golang.org/x/sys/unix" "google.golang.org/protobuf/types/known/structpb" ) @@ -71,13 +69,8 @@ func InitPaths() error { } return errw.Wrapf(err, "checking directory %s", p) } - stat, ok := info.Sys().(*syscall.Stat_t) - if !ok { - // should be impossible on Linux - return errw.New("cannot convert to syscall.Stat_t") - } - if uid != int(stat.Uid) { - return errw.Errorf("%s is owned by UID %d but the current UID is %d", p, stat.Uid, uid) + if err := checkPathOwner(uid, info); err != nil { + return err } if !info.IsDir() { return errw.Errorf("%s should be a directory, but is not", p) @@ -283,18 +276,6 @@ func ForceSymlink(orig, symlink string) error { return SyncFS(symlink) } -func SyncFS(syncPath string) (errRet error) { - file, errRet := os.Open(filepath.Dir(syncPath)) - if errRet != nil { - return errw.Wrapf(errRet, "syncing fs %s", syncPath) - } - _, _, err := unix.Syscall(unix.SYS_SYNCFS, file.Fd(), 0, 0) - if err != 0 { - errRet = errw.Wrapf(err, "syncing fs %s", syncPath) - } - return errors.Join(errRet, file.Close()) -} - func WriteFileIfNew(outPath string, data []byte) (bool, error) { //nolint:gosec curFileBytes, err := os.ReadFile(outPath) diff --git a/utils_linux.go b/utils_linux.go new file mode 100644 index 0000000..bafef5a --- /dev/null +++ b/utils_linux.go @@ -0,0 +1,37 @@ +package agent + +import ( + "errors" + "io/fs" + "os" + "path/filepath" + "syscall" + + errw "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// platform-specific UID check. +func checkPathOwner(uid int, info fs.FileInfo) error { + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + // should be impossible on Linux + return errw.New("cannot convert to syscall.Stat_t") + } + if uid != int(stat.Uid) { + return errw.Errorf("%s is owned by UID %d but the current UID is %d", info.Name(), stat.Uid, uid) + } + return nil +} + +func SyncFS(syncPath string) (errRet error) { + file, errRet := os.Open(filepath.Dir(syncPath)) + if errRet != nil { + return errw.Wrapf(errRet, "syncing fs %s", syncPath) + } + _, _, err := unix.Syscall(unix.SYS_SYNCFS, file.Fd(), 0, 0) + if err != 0 { + errRet = errw.Wrapf(err, "syncing fs %s", syncPath) + } + return errors.Join(errRet, file.Close()) +} diff --git a/utils_windows.go b/utils_windows.go new file mode 100644 index 0000000..f54c1ed --- /dev/null +++ b/utils_windows.go @@ -0,0 +1,25 @@ +package agent + +import ( + "io/fs" + "syscall" +) + +// platform-specific UID check. +func checkPathOwner(uid int, info fs.FileInfo) error { + // todo: figure this out on windows. + return nil +} + +func SyncFS(syncPath string) error { + handle, err := syscall.Open(syncPath, syscall.O_RDWR, 0) + if err != nil { + return err + } + defer syscall.CloseHandle(handle) + err = syscall.Fsync(handle) + if err != nil { + return err + } + return nil +} From 7f339562542af414fe5a6a2d06960c17980951aa Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 12 Dec 2024 17:52:14 -0500 Subject: [PATCH 03/22] windows build working --- Makefile | 7 +++++ subsystem.go | 46 ++------------------------- subsystem_linux.go | 48 +++++++++++++++++++++++++++++ subsystem_windows.go | 10 ++++++ subsystems/viamserver/viamserver.go | 8 ++--- utils/utils_linux.go | 21 +++++++++++++ utils/utils_windows.go | 13 ++++++++ 7 files changed, 105 insertions(+), 48 deletions(-) create mode 100644 subsystem_linux.go create mode 100644 subsystem_windows.go create mode 100644 utils/utils_linux.go create mode 100644 utils/utils_windows.go diff --git a/Makefile b/Makefile index de766b8..0c37091 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,13 @@ bin/viam-agent-$(PATH_VERSION)-$(LINUX_ARCH): go.* *.go */*.go */*/*.go subsyste go build -o $@ -trimpath -tags $(TAGS) -ldflags $(LDFLAGS) ./cmd/viam-agent/main.go test "$(PATH_VERSION)" != "custom" && cp $@ bin/viam-agent-stable-$(LINUX_ARCH) || true +windows: bin/viam-agent.exe + +bin/viam-agent.exe: + GOOS=windows GOARCH=amd64 go build -o $@ -trimpath -tags $(TAGS) -ldflags $(LDFLAGS) ./cmd/viam-agent + file $@ + du -hc $@ + .PHONY: clean clean: rm -rf bin/ diff --git a/subsystem.go b/subsystem.go index 013a795..ca0ac80 100644 --- a/subsystem.go +++ b/subsystem.go @@ -18,6 +18,7 @@ import ( "time" errw "github.com/pkg/errors" + autils "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/app/agent/v1" "go.viam.com/rdk/logging" ) @@ -438,7 +439,7 @@ func (is *InternalSubsystem) Start(ctx context.Context) error { //nolint:gosec is.cmd = exec.Command(path.Join(ViamDirs["bin"], is.name), is.cmdArgs...) is.cmd.Dir = ViamDirs["viam"] - is.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + autils.PlatformSubprocessSettings(is.cmd) is.cmd.Stdout = stdio is.cmd.Stderr = stderr @@ -522,10 +523,7 @@ func (is *InternalSubsystem) Stop(ctx context.Context) error { } is.logger.Warnf("%s refused to exit, killing", is.name) - err = syscall.Kill(-is.cmd.Process.Pid, syscall.SIGKILL) - if err != nil { - is.logger.Error(err) - } + autils.PlatformKill(is.logger, is.cmd) if is.waitForExit(ctx, StopKillTimeout) { is.logger.Infof("%s successfully killed", is.name) @@ -555,44 +553,6 @@ func (is *InternalSubsystem) waitForExit(ctx context.Context, timeout time.Durat } } -// HealthCheck sends a USR1 signal to the subsystem process, which should cause it to log "HEALTHY" to stdout. -func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { - is.startStopMu.Lock() - defer is.startStopMu.Unlock() - is.mu.Lock() - defer is.mu.Unlock() - if !is.running { - return errw.Errorf("%s not running", is.name) - } - - is.logger.Debugf("starting healthcheck for %s", is.name) - - checkChan, err := is.cmd.Stdout.(*MatchingLogger).AddMatcher("healthcheck", regexp.MustCompile(`HEALTHY`), true) - if err != nil { - return err - } - defer func() { - matcher, ok := is.cmd.Stdout.(*MatchingLogger) - if ok { - matcher.DeleteMatcher("healthcheck") - } - }() - - err = is.cmd.Process.Signal(syscall.SIGUSR1) - if err != nil { - is.logger.Error(err) - } - - select { - case <-time.After(time.Second * 30): - case <-ctx.Done(): - case <-checkChan: - is.logger.Debugf("healthcheck for %s is good", is.name) - return nil - } - return errw.Errorf("timeout waiting for healthcheck on %s", is.name) -} - func (is *InternalSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { jsonBytes, err := cfg.GetAttributes().MarshalJSON() if err != nil { diff --git a/subsystem_linux.go b/subsystem_linux.go new file mode 100644 index 0000000..10789fa --- /dev/null +++ b/subsystem_linux.go @@ -0,0 +1,48 @@ +package agent + +import ( + "context" + "regexp" + "syscall" + "time" + + errw "github.com/pkg/errors" +) + +// HealthCheck sends a USR1 signal to the subsystem process, which should cause it to log "HEALTHY" to stdout. +func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { + is.startStopMu.Lock() + defer is.startStopMu.Unlock() + is.mu.Lock() + defer is.mu.Unlock() + if !is.running { + return errw.Errorf("%s not running", is.name) + } + + is.logger.Debugf("starting healthcheck for %s", is.name) + + checkChan, err := is.cmd.Stdout.(*MatchingLogger).AddMatcher("healthcheck", regexp.MustCompile(`HEALTHY`), true) + if err != nil { + return err + } + defer func() { + matcher, ok := is.cmd.Stdout.(*MatchingLogger) + if ok { + matcher.DeleteMatcher("healthcheck") + } + }() + + err = is.cmd.Process.Signal(syscall.SIGUSR1) + if err != nil { + is.logger.Error(err) + } + + select { + case <-time.After(time.Second * 30): + case <-ctx.Done(): + case <-checkChan: + is.logger.Debugf("healthcheck for %s is good", is.name) + return nil + } + return errw.Errorf("timeout waiting for healthcheck on %s", is.name) +} diff --git a/subsystem_windows.go b/subsystem_windows.go new file mode 100644 index 0000000..1f199ae --- /dev/null +++ b/subsystem_windows.go @@ -0,0 +1,10 @@ +package agent + +import ( + "context" +) + +func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { + // todo: flesh this out. SIGUSR1 isn't available on windows. + return nil +} diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index efbba06..58514e7 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -20,6 +20,7 @@ import ( "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems" "github.com/viamrobotics/agent/subsystems/registry" + autils "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/app/agent/v1" "go.viam.com/rdk/logging" "go.viam.com/utils" @@ -131,7 +132,7 @@ func (s *viamServer) Start(ctx context.Context) error { //nolint:gosec s.cmd = exec.Command(path.Join(agent.ViamDirs["bin"], SubsysName), "-config", ConfigFilePath) s.cmd.Dir = agent.ViamDirs["viam"] - s.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + autils.PlatformSubprocessSettings(s.cmd) s.cmd.Stdout = stdio s.cmd.Stderr = stderr @@ -222,10 +223,7 @@ func (s *viamServer) Stop(ctx context.Context) error { } s.logger.Warnf("%s refused to exit, killing", SubsysName) - err = syscall.Kill(-s.cmd.Process.Pid, syscall.SIGKILL) - if err != nil { - s.logger.Error(err) - } + autils.PlatformKill(s.logger, s.cmd) if s.waitForExit(ctx, stopKillTimeout) { s.logger.Infof("%s successfully killed", SubsysName) diff --git a/utils/utils_linux.go b/utils/utils_linux.go new file mode 100644 index 0000000..1353327 --- /dev/null +++ b/utils/utils_linux.go @@ -0,0 +1,21 @@ +package utils + +import ( + "os/exec" + "syscall" + + "go.viam.com/rdk/logging" +) + +// PlatformSubprocessSettings sets platform-specific subprocess settings. +func PlatformSubprocessSettings(cmd *exec.Cmd) { + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} +} + +// PlatformKill does SIGKILL if available for the platform. +func PlatformKill(logger logging.Logger, cmd *exec.Cmd) { + err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + if err != nil { + logger.Error(err) + } +} diff --git a/utils/utils_windows.go b/utils/utils_windows.go new file mode 100644 index 0000000..1b89279 --- /dev/null +++ b/utils/utils_windows.go @@ -0,0 +1,13 @@ +package utils + +import ( + "os/exec" + + "go.viam.com/rdk/logging" +) + +// PlatformSubprocessSettings sets platform-specific subprocess settings. +func PlatformSubprocessSettings(cmd *exec.Cmd) {} + +// PlatformKill does SIGKILL if available for the platform. +func PlatformKill(logger logging.Logger, cmd *exec.Cmd) {} From 12110a7b6836f41d52ed42bf48a8566d03a9e7ae Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 12 Dec 2024 18:09:27 -0500 Subject: [PATCH 04/22] agent starts on windows, fails at starting viam-server --- cmd/viam-agent/main.go | 5 +++-- cmd/viam-agent/subsystems_windows.go | 2 +- utils.go | 17 +++++++++++++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index 63eecf1..5678619 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -9,6 +9,7 @@ import ( "os/signal" "os/user" "path/filepath" + "runtime" "strings" "sync" "syscall" @@ -81,7 +82,7 @@ func main() { // need to be root to go any further than this curUser, err := user.Current() exitIfError(err) - if curUser.Uid != "0" && !opts.DevMode { + if runtime.GOOS != "windows" && curUser.Uid != "0" && !opts.DevMode { //nolint:forbidigo fmt.Printf("viam-agent must be run as root (uid 0), but current user is %s (uid %s)\n", curUser.Username, curUser.Uid) return @@ -92,7 +93,7 @@ func main() { return } - if !opts.DevMode { + if !opts.DevMode && runtime.GOOS != "windows" { // confirm that we're running from a proper install if !strings.HasPrefix(os.Args[0], agent.ViamDirs["viam"]) { //nolint:forbidigo diff --git a/cmd/viam-agent/subsystems_windows.go b/cmd/viam-agent/subsystems_windows.go index 7fa94f9..a3d7985 100644 --- a/cmd/viam-agent/subsystems_windows.go +++ b/cmd/viam-agent/subsystems_windows.go @@ -8,7 +8,7 @@ import ( ) func runPlatformProvisioning(ctx context.Context, manager *agent.Manager, loadConfigErr error, absConfigPath string) { - panic("todo fancy error for provisioning on windows") + globalLogger.Warn("provisioning not available on windows yet") } func setupProvisioningPaths(opts agentOpts) string { return "" } diff --git a/utils.go b/utils.go index d4400f4..750e0d8 100644 --- a/utils.go +++ b/utils.go @@ -16,6 +16,7 @@ import ( "os" "path" "path/filepath" + "runtime" "strings" "time" @@ -49,6 +50,14 @@ func GetRevision() string { } func init() { + if runtime.GOOS == "windows" { + // note: forward slash isn't an abs path on windows, but resolves to one. + var err error + ViamDirs["viam"], err = filepath.Abs(ViamDirs["viam"]) + if err != nil { + panic(err) + } + } ViamDirs["bin"] = filepath.Join(ViamDirs["viam"], "bin") ViamDirs["cache"] = filepath.Join(ViamDirs["viam"], "cache") ViamDirs["tmp"] = filepath.Join(ViamDirs["viam"], "tmp") @@ -57,6 +66,10 @@ func init() { func InitPaths() error { uid := os.Getuid() + expectedPerms := 0o755 + if runtime.GOOS == "windows" { + expectedPerms = 0o777 + } for _, p := range ViamDirs { info, err := os.Stat(p) if err != nil { @@ -75,8 +88,8 @@ func InitPaths() error { if !info.IsDir() { return errw.Errorf("%s should be a directory, but is not", p) } - if info.Mode().Perm() != 0o755 { - return errw.Errorf("%s should be have permission set to 0755, but has permissions %d", p, info.Mode().Perm()) + if info.Mode().Perm() != fs.FileMode(expectedPerms) { + return errw.Errorf("%s should have permission set to %#o, but has permissions %#o", p, expectedPerms, info.Mode().Perm()) } } return nil From 767f492e6c39ccc5cb706ae1ccb2ffeb67b55c89 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 12 Dec 2024 18:31:48 -0500 Subject: [PATCH 05/22] factor out WaitOnline --- cmd/viam-agent/main.go | 21 ++------------------- cmd/viam-agent/subsystems_windows.go | 6 +++--- utils/utils_linux.go | 24 ++++++++++++++++++++++++ utils/utils_windows.go | 5 +++++ 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index 5678619..bae9807 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "os" - "os/exec" "os/signal" "os/user" "path/filepath" @@ -21,8 +20,8 @@ import ( "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems/viamagent" "github.com/viamrobotics/agent/subsystems/viamserver" + autils "github.com/viamrobotics/agent/utils" "go.viam.com/rdk/logging" - "go.viam.com/utils" ) var ( @@ -152,23 +151,7 @@ func main() { // wait to be online timeoutCtx, cancel := context.WithTimeout(ctx, time.Minute) defer cancel() - for { - cmd := exec.CommandContext(timeoutCtx, "systemctl", "is-active", "network-online.target") - _, err := cmd.CombinedOutput() - - if err == nil { - break - } - - if e := (&exec.ExitError{}); !errors.As(err, &e) { - // if it's not an ExitError, that means it didn't even start, so bail out - globalLogger.Error(errors.Wrap(err, "running 'systemctl is-active network-online.target'")) - break - } - if !utils.SelectContextOrWait(timeoutCtx, time.Second) { - break - } - } + autils.WaitOnline(globalLogger, timeoutCtx) // Check for self-update and restart if needed. needRestart, err := manager.SelfUpdate(ctx) diff --git a/cmd/viam-agent/subsystems_windows.go b/cmd/viam-agent/subsystems_windows.go index a3d7985..bf1cced 100644 --- a/cmd/viam-agent/subsystems_windows.go +++ b/cmd/viam-agent/subsystems_windows.go @@ -7,10 +7,10 @@ import ( "github.com/viamrobotics/agent" ) -func runPlatformProvisioning(ctx context.Context, manager *agent.Manager, loadConfigErr error, absConfigPath string) { +func runPlatformProvisioning(context.Context, *agent.Manager, error, string) { globalLogger.Warn("provisioning not available on windows yet") } -func setupProvisioningPaths(opts agentOpts) string { return "" } +func setupProvisioningPaths(agentOpts) string { return "" } -func ignoredSignal(sig os.Signal) bool { return false } +func ignoredSignal(os.Signal) bool { return false } diff --git a/utils/utils_linux.go b/utils/utils_linux.go index 1353327..6d8374b 100644 --- a/utils/utils_linux.go +++ b/utils/utils_linux.go @@ -1,10 +1,14 @@ package utils import ( + "context" "os/exec" "syscall" + "time" + "github.com/pkg/errors" "go.viam.com/rdk/logging" + "go.viam.com/utils" ) // PlatformSubprocessSettings sets platform-specific subprocess settings. @@ -19,3 +23,23 @@ func PlatformKill(logger logging.Logger, cmd *exec.Cmd) { logger.Error(err) } } + +func WaitOnline(logger logging.Logger, ctx context.Context) { + for { + cmd := exec.CommandContext(ctx, "systemctl", "is-active", "network-online.target") + _, err := cmd.CombinedOutput() + + if err == nil { + break + } + + if e := (&exec.ExitError{}); !errors.As(err, &e) { + // if it's not an ExitError, that means it didn't even start, so bail out + logger.Error(errors.Wrap(err, "running 'systemctl is-active network-online.target'")) + break + } + if !utils.SelectContextOrWait(ctx, time.Second) { + break + } + } +} diff --git a/utils/utils_windows.go b/utils/utils_windows.go index 1b89279..5fe98d8 100644 --- a/utils/utils_windows.go +++ b/utils/utils_windows.go @@ -1,6 +1,7 @@ package utils import ( + "context" "os/exec" "go.viam.com/rdk/logging" @@ -11,3 +12,7 @@ func PlatformSubprocessSettings(cmd *exec.Cmd) {} // PlatformKill does SIGKILL if available for the platform. func PlatformKill(logger logging.Logger, cmd *exec.Cmd) {} + +func WaitOnline(logger logging.Logger, ctx context.Context) { + logger.Warn("WaitOnline not available on windows yet") +} From d6368fc45bf7f2b4716c81d6e37e8fd61a5cca66 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 16 Dec 2024 12:41:11 -0500 Subject: [PATCH 06/22] checkpoint --- manager.go | 2 ++ subsystem.go | 1 + subsystems/viamserver/viamserver.go | 13 ++++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/manager.go b/manager.go index 55fb34e..1df9452 100644 --- a/manager.go +++ b/manager.go @@ -156,6 +156,7 @@ func (m *Manager) SubsystemUpdates(ctx context.Context, cfg map[string]*pb.Devic defer m.subsystemsMu.Unlock() // check updates and (re)start + println("SubsystemUpdates loop") for name, sub := range m.loadedSubsystems { if ctx.Err() != nil { return @@ -201,6 +202,7 @@ func (m *Manager) CheckUpdates(ctx context.Context) time.Duration { // SubsystemHealthChecks makes sure all subsystems are responding, and restarts them if not. func (m *Manager) SubsystemHealthChecks(ctx context.Context) { + println("top of subsys health checks") defer m.handlePanic() if ctx.Err() != nil { return diff --git a/subsystem.go b/subsystem.go index ca0ac80..b5a1c18 100644 --- a/subsystem.go +++ b/subsystem.go @@ -231,6 +231,7 @@ func (s *AgentSubsystem) SaveCache() error { // //nolint:gocognit func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig) (bool, error) { + println("update for", s.name) s.mu.Lock() defer s.mu.Unlock() diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index 58514e7..80fa4bc 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -7,6 +7,7 @@ import ( "encoding/json" "errors" "net/http" + "os" "os/exec" "path" "regexp" @@ -110,6 +111,12 @@ func configFromProto(logger logging.Logger, updateConf *pb.DeviceSubsystemConfig return ret } +func pathExists(path string) bool { + // todo: give the manager access to this + _, err := os.Stat(path) + return err == nil +} + func (s *viamServer) Start(ctx context.Context) error { s.startStopMu.Lock() defer s.startStopMu.Unlock() @@ -129,8 +136,12 @@ func (s *viamServer) Start(ctx context.Context) error { stdio := agent.NewMatchingLogger(s.logger, false, false) stderr := agent.NewMatchingLogger(s.logger, true, false) + binPath := path.Join(agent.ViamDirs["bin"], SubsysName) + if !pathExists(binPath) { + println("PATH DOESN'T EXIST") + } //nolint:gosec - s.cmd = exec.Command(path.Join(agent.ViamDirs["bin"], SubsysName), "-config", ConfigFilePath) + s.cmd = exec.Command(binPath, "-config", ConfigFilePath) s.cmd.Dir = agent.ViamDirs["viam"] autils.PlatformSubprocessSettings(s.cmd) s.cmd.Stdout = stdio From ad4966a8e3f2c426b0f7583a15da73d20f27409c Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Tue, 31 Dec 2024 18:44:57 -0500 Subject: [PATCH 07/22] don't return zero interval --- manager.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/manager.go b/manager.go index 1df9452..5cfc8fe 100644 --- a/manager.go +++ b/manager.go @@ -180,11 +180,14 @@ func (m *Manager) SubsystemUpdates(ctx context.Context, cfg map[string]*pb.Devic } } +const minInterval = 5*time.Second + // CheckUpdates retrieves an updated config from the cloud, and then passes it to SubsystemUpdates(). func (m *Manager) CheckUpdates(ctx context.Context) time.Duration { defer m.handlePanic() m.logger.Debug("Checking cloud for update") cfg, interval, err := m.GetConfig(ctx) + interval = max(interval, minInterval) // because zero causes bad loop in caller // randomly fuzz the interval by +/- 5% interval = fuzzTime(interval, 0.05) From 5f8fa2b5c85627d3403057163cdc71301dcc801c Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Tue, 31 Dec 2024 18:46:35 -0500 Subject: [PATCH 08/22] cleaner error when bin missing --- manager.go | 1 - subsystems/viamserver/viamserver.go | 11 +++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/manager.go b/manager.go index 5cfc8fe..abd3376 100644 --- a/manager.go +++ b/manager.go @@ -205,7 +205,6 @@ func (m *Manager) CheckUpdates(ctx context.Context) time.Duration { // SubsystemHealthChecks makes sure all subsystems are responding, and restarts them if not. func (m *Manager) SubsystemHealthChecks(ctx context.Context) { - println("top of subsys health checks") defer m.handlePanic() if ctx.Err() != nil { return diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index 80fa4bc..ddb1f49 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -127,6 +127,13 @@ func (s *viamServer) Start(ctx context.Context) error { s.mu.Unlock() return nil } + binPath := path.Join(agent.ViamDirs["bin"], SubsysName) + if !pathExists(binPath) { + s.logger.Warnf("viam-server binary missing at %s, not starting", binPath) + // todo: nested func so unlock is deferable + s.mu.Unlock() + return nil + } if s.shouldRun { s.logger.Warnf("Restarting %s after unexpected exit", SubsysName) } else { @@ -136,10 +143,6 @@ func (s *viamServer) Start(ctx context.Context) error { stdio := agent.NewMatchingLogger(s.logger, false, false) stderr := agent.NewMatchingLogger(s.logger, true, false) - binPath := path.Join(agent.ViamDirs["bin"], SubsysName) - if !pathExists(binPath) { - println("PATH DOESN'T EXIST") - } //nolint:gosec s.cmd = exec.Command(binPath, "-config", ConfigFilePath) s.cmd.Dir = agent.ViamDirs["viam"] From e852518648dcba80a034c3eba967e164f10e1003 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Tue, 31 Dec 2024 19:17:50 -0500 Subject: [PATCH 09/22] setupProvisioningPaths for win (which isn't just for provisioning it turns out) --- cmd/viam-agent/subsystems_windows.go | 13 ++++++++++++- utils/utils_linux.go | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cmd/viam-agent/subsystems_windows.go b/cmd/viam-agent/subsystems_windows.go index bf1cced..e023f19 100644 --- a/cmd/viam-agent/subsystems_windows.go +++ b/cmd/viam-agent/subsystems_windows.go @@ -3,14 +3,25 @@ package main import ( "context" "os" + "path/filepath" "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/subsystems/viamserver" ) func runPlatformProvisioning(context.Context, *agent.Manager, error, string) { globalLogger.Warn("provisioning not available on windows yet") } -func setupProvisioningPaths(agentOpts) string { return "" } +// platform-specific path setup. +func setupProvisioningPaths(opts agentOpts) string { + // tie the manager config to the viam-server config + absConfigPath, err := filepath.Abs(opts.Config) + exitIfError(err) + viamserver.ConfigFilePath = absConfigPath + globalLogger.Infof("config file path: %s", absConfigPath) + + return absConfigPath +} func ignoredSignal(os.Signal) bool { return false } diff --git a/utils/utils_linux.go b/utils/utils_linux.go index 6d8374b..85560b1 100644 --- a/utils/utils_linux.go +++ b/utils/utils_linux.go @@ -24,6 +24,7 @@ func PlatformKill(logger logging.Logger, cmd *exec.Cmd) { } } +// WaitOnline attempts to wait until the network comes up, with various bailout conditions. func WaitOnline(logger logging.Logger, ctx context.Context) { for { cmd := exec.CommandContext(ctx, "systemctl", "is-active", "network-online.target") From 0984c6e54b0e25a0326189077cbaa2a00f4a4a17 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Wed, 1 Jan 2025 15:11:11 -0500 Subject: [PATCH 10/22] return minInterval in nil GetConfig case --- manager.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/manager.go b/manager.go index abd3376..bbe8856 100644 --- a/manager.go +++ b/manager.go @@ -180,14 +180,11 @@ func (m *Manager) SubsystemUpdates(ctx context.Context, cfg map[string]*pb.Devic } } -const minInterval = 5*time.Second - // CheckUpdates retrieves an updated config from the cloud, and then passes it to SubsystemUpdates(). func (m *Manager) CheckUpdates(ctx context.Context) time.Duration { defer m.handlePanic() m.logger.Debug("Checking cloud for update") cfg, interval, err := m.GetConfig(ctx) - interval = max(interval, minInterval) // because zero causes bad loop in caller // randomly fuzz the interval by +/- 5% interval = fuzzTime(interval, 0.05) @@ -450,7 +447,7 @@ func (m *Manager) processConfig(cfg map[string]*pb.DeviceSubsystemConfig) { // GetConfig retrieves the configuration from the cloud, or returns a cached version if unable to communicate. func (m *Manager) GetConfig(ctx context.Context) (map[string]*pb.DeviceSubsystemConfig, time.Duration, error) { if m.cloudConfig == nil { - return nil, 0, errors.New("can't GetConfig until successful LoadConfig") + return nil, minimalCheckInterval, errors.New("can't GetConfig until successful LoadConfig") } timeoutCtx, cancelFunc := context.WithTimeout(ctx, defaultNetworkTimeout) defer cancelFunc() From 9c09ede0d6636361748de96aaa4eba35f641eeb5 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Wed, 1 Jan 2025 15:15:09 -0500 Subject: [PATCH 11/22] nil updateInfo is what is breaking download, aha --- subsystem.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/subsystem.go b/subsystem.go index b5a1c18..e6fda95 100644 --- a/subsystem.go +++ b/subsystem.go @@ -30,7 +30,10 @@ const ( StopKillTimeout = time.Second * 10 ) -var ErrSubsystemDisabled = errors.New("subsystem disabled") +var ( + ErrSubsystemDisabled = errors.New("subsystem disabled") + errNilUpdateInfo = errors.New("updateInfo is nil; are you on an unsupported platform?") +) // BasicSubsystem is the minimal interface. type BasicSubsystem interface { @@ -249,6 +252,9 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf } updateInfo := cfg.GetUpdateInfo() + if updateInfo == nil { + return false, errNilUpdateInfo + } // check if we already have the version given by the cloud verData, ok := s.CacheData.Versions[updateInfo.GetVersion()] From fbfc8e20b080c4ed325d076ccdb50a0b79d461c6 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Wed, 1 Jan 2025 16:41:59 -0500 Subject: [PATCH 12/22] try hardcoding win path --- subsystem.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/subsystem.go b/subsystem.go index e6fda95..4449860 100644 --- a/subsystem.go +++ b/subsystem.go @@ -13,6 +13,7 @@ import ( "path" "path/filepath" "regexp" + "runtime" "sync" "syscall" "time" @@ -230,6 +231,16 @@ func (s *AgentSubsystem) SaveCache() error { return s.saveCache() } +// hardcoding for now pending release process for windows rdk (not sure how discovery loop works). +// replace with normal process asap. +var staticWindowsViamServer = &pb.SubsystemUpdateInfo{ + Filename: "viam-server-amd64.exe", + Url: "https://storage.googleapis.com/packages.viam.com/temp/viam-server-windows-amd64-alpha-1-546e6603.exe", + Version: "alpha-1", + Sha256: []byte("b8b999ab26c7156a62beccbb7e039b66f5fdb762b535dbbc369c3865832ed179"), + Format: pb.PackageFormat_PACKAGE_FORMAT_EXECUTABLE, +} + // Update is the main function of the AgentSubsystem wrapper, as it's shared between subsystems. Returns true if a restart is needed. // //nolint:gocognit @@ -253,7 +264,11 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf updateInfo := cfg.GetUpdateInfo() if updateInfo == nil { - return false, errNilUpdateInfo + if runtime.GOOS == "windows" { + updateInfo = staticWindowsViamServer + } else { + return false, errNilUpdateInfo + } } // check if we already have the version given by the cloud From 9415af166f22ad6a89d6a8047d0d6ed80f356297 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 2 Jan 2025 00:21:44 -0500 Subject: [PATCH 13/22] correct sha, correct downloadfile control flow --- manager.go | 1 - subsystem.go | 17 +++++++++-------- utils.go | 12 +++++++++++- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/manager.go b/manager.go index bbe8856..253c9e7 100644 --- a/manager.go +++ b/manager.go @@ -156,7 +156,6 @@ func (m *Manager) SubsystemUpdates(ctx context.Context, cfg map[string]*pb.Devic defer m.subsystemsMu.Unlock() // check updates and (re)start - println("SubsystemUpdates loop") for name, sub := range m.loadedSubsystems { if ctx.Err() != nil { return diff --git a/subsystem.go b/subsystem.go index 4449860..c738503 100644 --- a/subsystem.go +++ b/subsystem.go @@ -31,10 +31,7 @@ const ( StopKillTimeout = time.Second * 10 ) -var ( - ErrSubsystemDisabled = errors.New("subsystem disabled") - errNilUpdateInfo = errors.New("updateInfo is nil; are you on an unsupported platform?") -) +var ErrSubsystemDisabled = errors.New("subsystem disabled") // BasicSubsystem is the minimal interface. type BasicSubsystem interface { @@ -231,13 +228,18 @@ func (s *AgentSubsystem) SaveCache() error { return s.saveCache() } +func mustDecodeBase64(s string) []byte { + ret, _ := base64.StdEncoding.DecodeString(s) + return ret +} + // hardcoding for now pending release process for windows rdk (not sure how discovery loop works). // replace with normal process asap. var staticWindowsViamServer = &pb.SubsystemUpdateInfo{ Filename: "viam-server-amd64.exe", Url: "https://storage.googleapis.com/packages.viam.com/temp/viam-server-windows-amd64-alpha-1-546e6603.exe", Version: "alpha-1", - Sha256: []byte("b8b999ab26c7156a62beccbb7e039b66f5fdb762b535dbbc369c3865832ed179"), + Sha256: mustDecodeBase64("uLmZqybHFWpivsy7fgObZvX9t2K1Ndu8Npw4ZYMu0Xk="), Format: pb.PackageFormat_PACKAGE_FORMAT_EXECUTABLE, } @@ -245,7 +247,6 @@ var staticWindowsViamServer = &pb.SubsystemUpdateInfo{ // //nolint:gocognit func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig) (bool, error) { - println("update for", s.name) s.mu.Lock() defer s.mu.Unlock() @@ -264,10 +265,10 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf updateInfo := cfg.GetUpdateInfo() if updateInfo == nil { - if runtime.GOOS == "windows" { + if runtime.GOOS == "windows" && s.name == "viam-server" { updateInfo = staticWindowsViamServer } else { - return false, errNilUpdateInfo + return false, fmt.Errorf("updateInfo for %s is nil. are you on an unsupported platform?", s.name) } } diff --git a/utils.go b/utils.go index 750e0d8..6cf20f5 100644 --- a/utils.go +++ b/utils.go @@ -164,8 +164,16 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er if err != nil { return "", err } + closed := false defer func() { - errRet = errors.Join(errRet, out.Close(), SyncFS(out.Name())) + if !closed { + errRet = errors.Join(errRet, out.Close()) + } + if runtime.GOOS != "windows" { + // note: error is different on windows (EBADF?). + // also this has in theory already synced in the success case. + errRet = errors.Join(errRet, SyncFS(out.Name())) + } if err := os.Remove(out.Name()); err != nil && !os.IsNotExist(err) { errRet = errors.Join(errRet, err) } @@ -175,6 +183,8 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er if err != nil && !os.IsNotExist(err) { errRet = errors.Join(errRet, err) } + errRet = errors.Join(errRet, out.Close()) + closed = true errRet = errors.Join(errRet, os.Rename(out.Name(), outPath), SyncFS(outPath)) return outPath, errRet From 914166ecf1c4a859623a17258484b2f34b858bd7 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 2 Jan 2025 00:36:14 -0500 Subject: [PATCH 14/22] rdk installing from network --- subsystem.go | 2 +- subsystems/viamserver/viamserver.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/subsystem.go b/subsystem.go index c738503..b0e2871 100644 --- a/subsystem.go +++ b/subsystem.go @@ -236,7 +236,7 @@ func mustDecodeBase64(s string) []byte { // hardcoding for now pending release process for windows rdk (not sure how discovery loop works). // replace with normal process asap. var staticWindowsViamServer = &pb.SubsystemUpdateInfo{ - Filename: "viam-server-amd64.exe", + Filename: "viam-server.exe", Url: "https://storage.googleapis.com/packages.viam.com/temp/viam-server-windows-amd64-alpha-1-546e6603.exe", Version: "alpha-1", Sha256: mustDecodeBase64("uLmZqybHFWpivsy7fgObZvX9t2K1Ndu8Npw4ZYMu0Xk="), diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index ddb1f49..a277d8a 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -11,6 +11,7 @@ import ( "os/exec" "path" "regexp" + "runtime" "strings" "sync" "sync/atomic" @@ -128,6 +129,9 @@ func (s *viamServer) Start(ctx context.Context) error { return nil } binPath := path.Join(agent.ViamDirs["bin"], SubsysName) + if runtime.GOOS == "windows" { + binPath += ".exe" + } if !pathExists(binPath) { s.logger.Warnf("viam-server binary missing at %s, not starting", binPath) // todo: nested func so unlock is deferable From 19db1cf9e02694f88947fe0e524c1010095dd648 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 2 Jan 2025 01:16:51 -0500 Subject: [PATCH 15/22] nonworking service install, notes for fixing --- Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile b/Makefile index 0c37091..3adfbfd 100644 --- a/Makefile +++ b/Makefile @@ -64,3 +64,10 @@ upload-stable: bin/viam-agent-$(PATH_VERSION)-x86_64 bin/viam-agent-$(PATH_VERSI .PHONY: upload-installer upload-installer: gsutil -h "Cache-Control:no-cache" cp preinstall.sh install.sh uninstall.sh gs://packages.viam.com/apps/viam-agent/ + +sc-create: + # create a windows service + # todo: move this to an installer script + # note: this works in cmd.exe but not powershell + # nope: follow example here https://pkg.go.dev/golang.org/x/sys/windows/svc/example + sc create viam-agent binpath= c:\opt\viam\bin\viam-agent.exe start= auto From f21dbb27a95761ad494879289515723c746443bd Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 2 Jan 2025 16:48:05 -0500 Subject: [PATCH 16/22] service stub --- cmd/viam-agent-win/README.md | 3 ++ cmd/viam-agent-win/main.go | 57 ++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 cmd/viam-agent-win/README.md create mode 100644 cmd/viam-agent-win/main.go diff --git a/cmd/viam-agent-win/README.md b/cmd/viam-agent-win/README.md new file mode 100644 index 0000000..5cdfd68 --- /dev/null +++ b/cmd/viam-agent-win/README.md @@ -0,0 +1,3 @@ +# viam-agent-win + +This is a windows service wrapper for the agent. diff --git a/cmd/viam-agent-win/main.go b/cmd/viam-agent-win/main.go new file mode 100644 index 0000000..0d1b4a3 --- /dev/null +++ b/cmd/viam-agent-win/main.go @@ -0,0 +1,57 @@ +package main + +import ( + "fmt" + + "golang.org/x/sys/windows/svc" + "golang.org/x/sys/windows/svc/debug" + "golang.org/x/sys/windows/svc/eventlog" +) + +var elog debug.Log + +const serviceName = "viam-agent" + +type agentService struct{} + +// control loop for a windows service +func (*agentService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { + changes <- svc.Status{State: svc.Running, Accepts: svc.AcceptStop | svc.AcceptShutdown} + for { + c := <-r + if c.Cmd == svc.Stop || c.Cmd == svc.Shutdown { + // testOutput := strings.Join(args, "-") + // testOutput += fmt.Sprintf("-%d", c.Context) + // elog.Info(1, testOutput) + break + } else { + elog.Error(1, fmt.Sprintf("unexpected control request #%d", c)) + } + } + changes <- svc.Status{State: svc.StopPending} + return +} + +func main() { + if inService, err := svc.IsWindowsService(); err != nil { + panic(err) + } else if !inService { + panic("this can only be run as a windows service") + } + + var err error + elog, err = eventlog.Open(serviceName) + if err != nil { + return + } + defer elog.Close() + + elog.Info(1, fmt.Sprintf("starting %s service", serviceName)) + err = svc.Run(serviceName, &agentService{}) + if err != nil { + elog.Error(1, fmt.Sprintf("%s service failed: %v", serviceName, err)) + return + } + // todo: start gorouting + elog.Info(1, fmt.Sprintf("%s service stopped", serviceName)) +} From 17bbf000fe8f746c7a0f820fdba8efac24eada81 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Thu, 2 Jan 2025 18:20:32 -0500 Subject: [PATCH 17/22] windows service working --- Makefile | 1 - cmd/viam-agent-win/README.md | 3 --- cmd/viam-agent/main.go | 2 +- cmd/{viam-agent-win/main.go => viam-agent/main_windows.go} | 6 ++++-- cmd/viam-agent/subsystems_linux.go | 5 +++++ 5 files changed, 10 insertions(+), 7 deletions(-) delete mode 100644 cmd/viam-agent-win/README.md rename cmd/{viam-agent-win/main.go => viam-agent/main_windows.go} (92%) diff --git a/Makefile b/Makefile index 3adfbfd..e335b9f 100644 --- a/Makefile +++ b/Makefile @@ -69,5 +69,4 @@ sc-create: # create a windows service # todo: move this to an installer script # note: this works in cmd.exe but not powershell - # nope: follow example here https://pkg.go.dev/golang.org/x/sys/windows/svc/example sc create viam-agent binpath= c:\opt\viam\bin\viam-agent.exe start= auto diff --git a/cmd/viam-agent-win/README.md b/cmd/viam-agent-win/README.md deleted file mode 100644 index 5cdfd68..0000000 --- a/cmd/viam-agent-win/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# viam-agent-win - -This is a windows service wrapper for the agent. diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index bae9807..ad699df 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -44,7 +44,7 @@ type agentOpts struct { } //nolint:gocognit -func main() { +func commonMain() { ctx, cancel := setupExitSignalHandling() defer func() { diff --git a/cmd/viam-agent-win/main.go b/cmd/viam-agent/main_windows.go similarity index 92% rename from cmd/viam-agent-win/main.go rename to cmd/viam-agent/main_windows.go index 0d1b4a3..62c268b 100644 --- a/cmd/viam-agent-win/main.go +++ b/cmd/viam-agent/main_windows.go @@ -36,7 +36,9 @@ func main() { if inService, err := svc.IsWindowsService(); err != nil { panic(err) } else if !inService { - panic("this can only be run as a windows service") + println("no service detected -- running as normal process") + commonMain() + return } var err error @@ -48,10 +50,10 @@ func main() { elog.Info(1, fmt.Sprintf("starting %s service", serviceName)) err = svc.Run(serviceName, &agentService{}) + go commonMain() if err != nil { elog.Error(1, fmt.Sprintf("%s service failed: %v", serviceName, err)) return } - // todo: start gorouting elog.Info(1, fmt.Sprintf("%s service stopped", serviceName)) } diff --git a/cmd/viam-agent/subsystems_linux.go b/cmd/viam-agent/subsystems_linux.go index fca14a6..d771862 100644 --- a/cmd/viam-agent/subsystems_linux.go +++ b/cmd/viam-agent/subsystems_linux.go @@ -11,12 +11,17 @@ import ( "github.com/pkg/errors" "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems/provisioning" + // register-only. _ "github.com/viamrobotics/agent/subsystems/syscfg" "github.com/viamrobotics/agent/subsystems/viamserver" "go.viam.com/utils" ) +func main() { + commonMain() +} + // platform-specific provisioning logic. func runPlatformProvisioning(ctx context.Context, manager *agent.Manager, loadConfigErr error, absConfigPath string) { // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. From 4cd3f44f6169fdec44da3329f9d96fa8f4463837 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 6 Jan 2025 00:36:02 -0500 Subject: [PATCH 18/22] rm static viam-server info on windows, working with pinURL --- subsystem.go | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/subsystem.go b/subsystem.go index b0e2871..2350bdb 100644 --- a/subsystem.go +++ b/subsystem.go @@ -228,21 +228,6 @@ func (s *AgentSubsystem) SaveCache() error { return s.saveCache() } -func mustDecodeBase64(s string) []byte { - ret, _ := base64.StdEncoding.DecodeString(s) - return ret -} - -// hardcoding for now pending release process for windows rdk (not sure how discovery loop works). -// replace with normal process asap. -var staticWindowsViamServer = &pb.SubsystemUpdateInfo{ - Filename: "viam-server.exe", - Url: "https://storage.googleapis.com/packages.viam.com/temp/viam-server-windows-amd64-alpha-1-546e6603.exe", - Version: "alpha-1", - Sha256: mustDecodeBase64("uLmZqybHFWpivsy7fgObZvX9t2K1Ndu8Npw4ZYMu0Xk="), - Format: pb.PackageFormat_PACKAGE_FORMAT_EXECUTABLE, -} - // Update is the main function of the AgentSubsystem wrapper, as it's shared between subsystems. Returns true if a restart is needed. // //nolint:gocognit @@ -265,11 +250,7 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf updateInfo := cfg.GetUpdateInfo() if updateInfo == nil { - if runtime.GOOS == "windows" && s.name == "viam-server" { - updateInfo = staticWindowsViamServer - } else { - return false, fmt.Errorf("updateInfo for %s is nil. are you on an unsupported platform?", s.name) - } + return false, fmt.Errorf("updateInfo for %s is nil. are you on an unsupported platform?", s.name) } // check if we already have the version given by the cloud @@ -364,6 +345,9 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf // symlink the extracted file to bin verData.SymlinkPath = path.Join(ViamDirs["bin"], updateInfo.GetFilename()) + if runtime.GOOS == "windows" { + verData.SymlinkPath += ".exe" + } if err = ForceSymlink(verData.UnpackedPath, verData.SymlinkPath); err != nil { return needRestart, errw.Wrap(err, "creating symlink") } From 6534c3e3280aa07cbf5bd0926f03e2915eeed75b Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 6 Jan 2025 00:50:16 -0500 Subject: [PATCH 19/22] service working (move up goroutine start) --- cmd/viam-agent/main_windows.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/viam-agent/main_windows.go b/cmd/viam-agent/main_windows.go index 62c268b..bc00a12 100644 --- a/cmd/viam-agent/main_windows.go +++ b/cmd/viam-agent/main_windows.go @@ -49,8 +49,8 @@ func main() { defer elog.Close() elog.Info(1, fmt.Sprintf("starting %s service", serviceName)) - err = svc.Run(serviceName, &agentService{}) go commonMain() + err = svc.Run(serviceName, &agentService{}) if err != nil { elog.Error(1, fmt.Sprintf("%s service failed: %v", serviceName, err)) return From 1a29820fee3e1902137ae276cc2cc78cc970e354 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 6 Jan 2025 01:21:07 -0500 Subject: [PATCH 20/22] working windows install script --- Makefile | 6 ------ agent.bat | 10 ++++++++++ subsystems/viamserver/viamserver.go | 3 ++- 3 files changed, 12 insertions(+), 7 deletions(-) create mode 100644 agent.bat diff --git a/Makefile b/Makefile index e335b9f..0c37091 100644 --- a/Makefile +++ b/Makefile @@ -64,9 +64,3 @@ upload-stable: bin/viam-agent-$(PATH_VERSION)-x86_64 bin/viam-agent-$(PATH_VERSI .PHONY: upload-installer upload-installer: gsutil -h "Cache-Control:no-cache" cp preinstall.sh install.sh uninstall.sh gs://packages.viam.com/apps/viam-agent/ - -sc-create: - # create a windows service - # todo: move this to an installer script - # note: this works in cmd.exe but not powershell - sc create viam-agent binpath= c:\opt\viam\bin\viam-agent.exe start= auto diff --git a/agent.bat b/agent.bat new file mode 100644 index 0000000..c093df1 --- /dev/null +++ b/agent.bat @@ -0,0 +1,10 @@ +@echo off +:: installer for agent on windows + +mkdir \opt\viam\cache +mkdir \opt\viam\bin +curl https://storage.googleapis.com/packages.viam.com/temp/viam-agent-windows-amd64-alpha-1-17bbf00.exe -o \opt\viam\cache\viam-agent-windows-amd64-alpha-1-17bbf00.exe +del \opt\viam\bin\viam-agent.exe +mklink \opt\viam\bin\viam-agent.exe \opt\viam\cache\viam-agent-windows-amd64-alpha-1-17bbf00.exe +sc create viam-agent binpath= c:\opt\viam\bin\viam-agent.exe start= auto +sc start viam-agent diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index a277d8a..095a1d7 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -232,7 +232,8 @@ func (s *viamServer) Stop(ctx context.Context) error { err := s.cmd.Process.Signal(syscall.SIGTERM) if err != nil { - s.logger.Error(err) + // todo(windows): I think this fails on windows; make sure stop/start works, potentially skip to kill(). + s.logger.Error(errw.Wrap(err, "terminating")) } if s.waitForExit(ctx, stopTermTimeout) { From 4a98019861e814762b67f962ef86cebdb09fd335 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 6 Jan 2025 02:43:37 -0500 Subject: [PATCH 21/22] use vars in batch script --- agent.bat | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/agent.bat b/agent.bat index c093df1..5baf9ff 100644 --- a/agent.bat +++ b/agent.bat @@ -1,10 +1,13 @@ @echo off :: installer for agent on windows -mkdir \opt\viam\cache -mkdir \opt\viam\bin -curl https://storage.googleapis.com/packages.viam.com/temp/viam-agent-windows-amd64-alpha-1-17bbf00.exe -o \opt\viam\cache\viam-agent-windows-amd64-alpha-1-17bbf00.exe -del \opt\viam\bin\viam-agent.exe -mklink \opt\viam\bin\viam-agent.exe \opt\viam\cache\viam-agent-windows-amd64-alpha-1-17bbf00.exe -sc create viam-agent binpath= c:\opt\viam\bin\viam-agent.exe start= auto +set root=\opt\viam +set fname=viam-agent-windows-amd64-alpha-1-17bbf00.exe +mkdir %root%\cache +mkdir %root%\bin +curl https://storage.googleapis.com/packages.viam.com/temp/%fname% -o %root%\cache\%fname% +:: netsh %root%\cache\%fname% +del %root%\bin\viam-agent.exe +mklink %root%\bin\viam-agent.exe %root%\cache\%fname% +sc create viam-agent binpath= c:%root%\bin\viam-agent.exe start= auto sc start viam-agent From d637d1d2196cafaf151b3f645b202aace0bc4924 Mon Sep 17 00:00:00 2001 From: Abe Winter Date: Mon, 6 Jan 2025 19:23:21 -0500 Subject: [PATCH 22/22] skip healthcheck error --- subsystems/viamserver/viamserver.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index 095a1d7..17930e7 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -281,6 +281,10 @@ func (s *viamServer) HealthCheck(ctx context.Context) (errRet error) { return errw.Errorf("%s not running", SubsysName) } if s.checkURL == "" { + if runtime.GOOS == "windows" { + // todo(windows): we hit this case on windows; debug why. note: it also can't signal the subprocess to stop. + return nil + } return errw.Errorf("can't find listening URL for %s", SubsysName) }