diff --git a/Makefile b/Makefile index de766b8..0c37091 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,13 @@ bin/viam-agent-$(PATH_VERSION)-$(LINUX_ARCH): go.* *.go */*.go */*/*.go subsyste go build -o $@ -trimpath -tags $(TAGS) -ldflags $(LDFLAGS) ./cmd/viam-agent/main.go test "$(PATH_VERSION)" != "custom" && cp $@ bin/viam-agent-stable-$(LINUX_ARCH) || true +windows: bin/viam-agent.exe + +bin/viam-agent.exe: + GOOS=windows GOARCH=amd64 go build -o $@ -trimpath -tags $(TAGS) -ldflags $(LDFLAGS) ./cmd/viam-agent + file $@ + du -hc $@ + .PHONY: clean clean: rm -rf bin/ diff --git a/agent.bat b/agent.bat new file mode 100644 index 0000000..5baf9ff --- /dev/null +++ b/agent.bat @@ -0,0 +1,13 @@ +@echo off +:: installer for agent on windows + +set root=\opt\viam +set fname=viam-agent-windows-amd64-alpha-1-17bbf00.exe +mkdir %root%\cache +mkdir %root%\bin +curl https://storage.googleapis.com/packages.viam.com/temp/%fname% -o %root%\cache\%fname% +:: netsh %root%\cache\%fname% +del %root%\bin\viam-agent.exe +mklink %root%\bin\viam-agent.exe %root%\cache\%fname% +sc create viam-agent binpath= c:%root%\bin\viam-agent.exe start= auto +sc start viam-agent diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index ccccb74..ad699df 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -4,12 +4,11 @@ import ( "bytes" "context" "fmt" - "io/fs" "os" - "os/exec" "os/signal" "os/user" "path/filepath" + "runtime" "strings" "sync" "syscall" @@ -19,12 +18,10 @@ import ( "github.com/nightlyone/lockfile" "github.com/pkg/errors" "github.com/viamrobotics/agent" - "github.com/viamrobotics/agent/subsystems/provisioning" - _ "github.com/viamrobotics/agent/subsystems/syscfg" "github.com/viamrobotics/agent/subsystems/viamagent" "github.com/viamrobotics/agent/subsystems/viamserver" + autils "github.com/viamrobotics/agent/utils" "go.viam.com/rdk/logging" - "go.viam.com/utils" ) var ( @@ -34,8 +31,20 @@ var ( globalLogger = logging.NewLogger("viam-agent") ) +//nolint:lll +type agentOpts struct { + Config string `default:"/etc/viam.json" description:"Path to config file" long:"config" short:"c"` + ProvisioningConfig string `default:"/etc/viam-provisioning.json" description:"Path to provisioning (customization) config file" long:"provisioning" short:"p"` + Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` + Fast bool `description:"Enable fast start mode" env:"VIAM_AGENT_FAST_START" long:"fast" short:"f"` + Help bool `description:"Show this help message" long:"help" short:"h"` + Version bool `description:"Show version" long:"version" short:"v"` + Install bool `description:"Install systemd service" long:"install"` + DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` +} + //nolint:gocognit -func main() { +func commonMain() { ctx, cancel := setupExitSignalHandling() defer func() { @@ -43,17 +52,7 @@ func main() { activeBackgroundWorkers.Wait() }() - //nolint:lll - var opts struct { - Config string `default:"/etc/viam.json" description:"Path to config file" long:"config" short:"c"` - ProvisioningConfig string `default:"/etc/viam-provisioning.json" description:"Path to provisioning (customization) config file" long:"provisioning" short:"p"` - Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` - Fast bool `description:"Enable fast start mode" env:"VIAM_AGENT_FAST_START" long:"fast" short:"f"` - Help bool `description:"Show this help message" long:"help" short:"h"` - Version bool `description:"Show version" long:"version" short:"v"` - Install bool `description:"Install systemd service" long:"install"` - DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` - } + var opts agentOpts parser := flags.NewParser(&opts, flags.IgnoreUnknown) parser.Usage = "runs as a background service and manages updates and the process lifecycle for viam-server." @@ -82,7 +81,7 @@ func main() { // need to be root to go any further than this curUser, err := user.Current() exitIfError(err) - if curUser.Uid != "0" && !opts.DevMode { + if runtime.GOOS != "windows" && curUser.Uid != "0" && !opts.DevMode { //nolint:forbidigo fmt.Printf("viam-agent must be run as root (uid 0), but current user is %s (uid %s)\n", curUser.Username, curUser.Uid) return @@ -93,7 +92,7 @@ func main() { return } - if !opts.DevMode { + if !opts.DevMode && runtime.GOOS != "windows" { // confirm that we're running from a proper install if !strings.HasPrefix(os.Args[0], agent.ViamDirs["viam"]) { //nolint:forbidigo @@ -117,63 +116,16 @@ func main() { } }() - // pass the provisioning path arg to the subsystem - absProvConfigPath, err := filepath.Abs(opts.ProvisioningConfig) - exitIfError(err) - provisioning.ProvisioningConfigFilePath = absProvConfigPath - globalLogger.Infof("provisioning config file path: %s", absProvConfigPath) - - // tie the manager config to the viam-server config - absConfigPath, err := filepath.Abs(opts.Config) - exitIfError(err) - viamserver.ConfigFilePath = absConfigPath - provisioning.AppConfigFilePath = absConfigPath - globalLogger.Infof("config file path: %s", absConfigPath) + absConfigPath := setupProvisioningPaths(opts) // main manager structure manager, err := agent.NewManager(ctx, globalLogger) exitIfError(err) - err = manager.LoadConfig(absConfigPath) + loadConfigErr := manager.LoadConfig(absConfigPath) //nolint:nestif - if err != nil { - // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. - // Rename the file (if it exists) and wait to provision a new one. - if !errors.Is(err, fs.ErrNotExist) { - if err := os.Rename(absConfigPath, absConfigPath+".old"); err != nil { - // if we can't rename the file, we're up a creek, and it's fatal - globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", absConfigPath)) - globalLogger.Error("unable to continue with provisioning, exiting") - manager.CloseAll() - return - } - } - - // We manually start the provisioning service to allow the user to update it and wait. - // The user may be updating it soon, so better to loop quietly than to exit and let systemd keep restarting infinitely. - globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", absConfigPath) - - if err := manager.StartSubsystem(ctx, provisioning.SubsysName); err != nil { - if errors.Is(err, agent.ErrSubsystemDisabled) { - globalLogger.Warn("provisioning subsystem disabled, please manually update /etc/viam.json and connect to internet") - } else { - globalLogger.Error(errors.Wrapf(err, - "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) - manager.CloseAll() - return - } - } - - for { - globalLogger.Warn("waiting for user provisioning") - if !utils.SelectContextOrWait(ctx, time.Second*10) { - manager.CloseAll() - return - } - if err := manager.LoadConfig(absConfigPath); err == nil { - break - } - } + if loadConfigErr != nil { + runPlatformProvisioning(ctx, manager, loadConfigErr, absConfigPath) } netAppender, err := manager.CreateNetAppender() if err != nil { @@ -199,23 +151,7 @@ func main() { // wait to be online timeoutCtx, cancel := context.WithTimeout(ctx, time.Minute) defer cancel() - for { - cmd := exec.CommandContext(timeoutCtx, "systemctl", "is-active", "network-online.target") - _, err := cmd.CombinedOutput() - - if err == nil { - break - } - - if e := (&exec.ExitError{}); !errors.As(err, &e) { - // if it's not an ExitError, that means it didn't even start, so bail out - globalLogger.Error(errors.Wrap(err, "running 'systemctl is-active network-online.target'")) - break - } - if !utils.SelectContextOrWait(timeoutCtx, time.Second) { - break - } - } + autils.WaitOnline(globalLogger, timeoutCtx) // Check for self-update and restart if needed. needRestart, err := manager.SelfUpdate(ctx) @@ -268,12 +204,11 @@ func setupExitSignalHandling() (context.Context, func()) { // this will eventually be handled elsewhere as a restart, not exit case syscall.SIGHUP: - // ignore SIGURG entirely, it's used for real-time scheduling notifications - case syscall.SIGURG: - // log everything else default: - globalLogger.Debugw("received unknown signal", "signal", sig) + if !ignoredSignal(sig) { + globalLogger.Debugw("received unknown signal", "signal", sig) + } } } }() @@ -282,6 +217,7 @@ func setupExitSignalHandling() (context.Context, func()) { return ctx, cancel } +// helper to log.Fatal if error is non-nil. func exitIfError(err error) { if err != nil { globalLogger.Fatal(err) diff --git a/cmd/viam-agent/main_windows.go b/cmd/viam-agent/main_windows.go new file mode 100644 index 0000000..bc00a12 --- /dev/null +++ b/cmd/viam-agent/main_windows.go @@ -0,0 +1,59 @@ +package main + +import ( + "fmt" + + "golang.org/x/sys/windows/svc" + "golang.org/x/sys/windows/svc/debug" + "golang.org/x/sys/windows/svc/eventlog" +) + +var elog debug.Log + +const serviceName = "viam-agent" + +type agentService struct{} + +// control loop for a windows service +func (*agentService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { + changes <- svc.Status{State: svc.Running, Accepts: svc.AcceptStop | svc.AcceptShutdown} + for { + c := <-r + if c.Cmd == svc.Stop || c.Cmd == svc.Shutdown { + // testOutput := strings.Join(args, "-") + // testOutput += fmt.Sprintf("-%d", c.Context) + // elog.Info(1, testOutput) + break + } else { + elog.Error(1, fmt.Sprintf("unexpected control request #%d", c)) + } + } + changes <- svc.Status{State: svc.StopPending} + return +} + +func main() { + if inService, err := svc.IsWindowsService(); err != nil { + panic(err) + } else if !inService { + println("no service detected -- running as normal process") + commonMain() + return + } + + var err error + elog, err = eventlog.Open(serviceName) + if err != nil { + return + } + defer elog.Close() + + elog.Info(1, fmt.Sprintf("starting %s service", serviceName)) + go commonMain() + err = svc.Run(serviceName, &agentService{}) + if err != nil { + elog.Error(1, fmt.Sprintf("%s service failed: %v", serviceName, err)) + return + } + elog.Info(1, fmt.Sprintf("%s service stopped", serviceName)) +} diff --git a/cmd/viam-agent/subsystems_linux.go b/cmd/viam-agent/subsystems_linux.go new file mode 100644 index 0000000..d771862 --- /dev/null +++ b/cmd/viam-agent/subsystems_linux.go @@ -0,0 +1,88 @@ +package main + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "syscall" + "time" + + "github.com/pkg/errors" + "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/subsystems/provisioning" + + // register-only. + _ "github.com/viamrobotics/agent/subsystems/syscfg" + "github.com/viamrobotics/agent/subsystems/viamserver" + "go.viam.com/utils" +) + +func main() { + commonMain() +} + +// platform-specific provisioning logic. +func runPlatformProvisioning(ctx context.Context, manager *agent.Manager, loadConfigErr error, absConfigPath string) { + // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. + // Rename the file (if it exists) and wait to provision a new one. + if !errors.Is(loadConfigErr, fs.ErrNotExist) { + if err := os.Rename(absConfigPath, absConfigPath+".old"); err != nil { + // if we can't rename the file, we're up a creek, and it's fatal + globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", absConfigPath)) + globalLogger.Error("unable to continue with provisioning, exiting") + manager.CloseAll() + return + } + } + + // We manually start the provisioning service to allow the user to update it and wait. + // The user may be updating it soon, so better to loop quietly than to exit and let systemd keep restarting infinitely. + globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", absConfigPath) + + if err := manager.StartSubsystem(ctx, provisioning.SubsysName); err != nil { + if errors.Is(err, agent.ErrSubsystemDisabled) { + globalLogger.Warn("provisioning subsystem disabled, please manually update /etc/viam.json and connect to internet") + } else { + globalLogger.Error(errors.Wrapf(err, + "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) + manager.CloseAll() + return + } + } + + for { + globalLogger.Warn("waiting for user provisioning") + if !utils.SelectContextOrWait(ctx, time.Second*10) { + manager.CloseAll() + return + } + if err := manager.LoadConfig(absConfigPath); err == nil { + break + } + } +} + +// platform-specific path setup. +func setupProvisioningPaths(opts agentOpts) string { + // pass the provisioning path arg to the subsystem + absProvConfigPath, err := filepath.Abs(opts.ProvisioningConfig) + exitIfError(err) + provisioning.ProvisioningConfigFilePath = absProvConfigPath + globalLogger.Infof("provisioning config file path: %s", absProvConfigPath) + + // tie the manager config to the viam-server config + absConfigPath, err := filepath.Abs(opts.Config) + exitIfError(err) + viamserver.ConfigFilePath = absConfigPath + provisioning.AppConfigFilePath = absConfigPath + globalLogger.Infof("config file path: %s", absConfigPath) + + return absConfigPath +} + +// return true if this error is safe to ignore on this platform. +func ignoredSignal(sig os.Signal) bool { + // ignore SIGURG entirely, it's used for real-time scheduling notifications + return sig == syscall.SIGURG +} diff --git a/cmd/viam-agent/subsystems_windows.go b/cmd/viam-agent/subsystems_windows.go new file mode 100644 index 0000000..e023f19 --- /dev/null +++ b/cmd/viam-agent/subsystems_windows.go @@ -0,0 +1,27 @@ +package main + +import ( + "context" + "os" + "path/filepath" + + "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/subsystems/viamserver" +) + +func runPlatformProvisioning(context.Context, *agent.Manager, error, string) { + globalLogger.Warn("provisioning not available on windows yet") +} + +// platform-specific path setup. +func setupProvisioningPaths(opts agentOpts) string { + // tie the manager config to the viam-server config + absConfigPath, err := filepath.Abs(opts.Config) + exitIfError(err) + viamserver.ConfigFilePath = absConfigPath + globalLogger.Infof("config file path: %s", absConfigPath) + + return absConfigPath +} + +func ignoredSignal(os.Signal) bool { return false } diff --git a/manager.go b/manager.go index 55fb34e..253c9e7 100644 --- a/manager.go +++ b/manager.go @@ -446,7 +446,7 @@ func (m *Manager) processConfig(cfg map[string]*pb.DeviceSubsystemConfig) { // GetConfig retrieves the configuration from the cloud, or returns a cached version if unable to communicate. func (m *Manager) GetConfig(ctx context.Context) (map[string]*pb.DeviceSubsystemConfig, time.Duration, error) { if m.cloudConfig == nil { - return nil, 0, errors.New("can't GetConfig until successful LoadConfig") + return nil, minimalCheckInterval, errors.New("can't GetConfig until successful LoadConfig") } timeoutCtx, cancelFunc := context.WithTimeout(ctx, defaultNetworkTimeout) defer cancelFunc() diff --git a/subsystem.go b/subsystem.go index 013a795..2350bdb 100644 --- a/subsystem.go +++ b/subsystem.go @@ -13,11 +13,13 @@ import ( "path" "path/filepath" "regexp" + "runtime" "sync" "syscall" "time" errw "github.com/pkg/errors" + autils "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/app/agent/v1" "go.viam.com/rdk/logging" ) @@ -247,6 +249,9 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf } updateInfo := cfg.GetUpdateInfo() + if updateInfo == nil { + return false, fmt.Errorf("updateInfo for %s is nil. are you on an unsupported platform?", s.name) + } // check if we already have the version given by the cloud verData, ok := s.CacheData.Versions[updateInfo.GetVersion()] @@ -340,6 +345,9 @@ func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConf // symlink the extracted file to bin verData.SymlinkPath = path.Join(ViamDirs["bin"], updateInfo.GetFilename()) + if runtime.GOOS == "windows" { + verData.SymlinkPath += ".exe" + } if err = ForceSymlink(verData.UnpackedPath, verData.SymlinkPath); err != nil { return needRestart, errw.Wrap(err, "creating symlink") } @@ -438,7 +446,7 @@ func (is *InternalSubsystem) Start(ctx context.Context) error { //nolint:gosec is.cmd = exec.Command(path.Join(ViamDirs["bin"], is.name), is.cmdArgs...) is.cmd.Dir = ViamDirs["viam"] - is.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + autils.PlatformSubprocessSettings(is.cmd) is.cmd.Stdout = stdio is.cmd.Stderr = stderr @@ -522,10 +530,7 @@ func (is *InternalSubsystem) Stop(ctx context.Context) error { } is.logger.Warnf("%s refused to exit, killing", is.name) - err = syscall.Kill(-is.cmd.Process.Pid, syscall.SIGKILL) - if err != nil { - is.logger.Error(err) - } + autils.PlatformKill(is.logger, is.cmd) if is.waitForExit(ctx, StopKillTimeout) { is.logger.Infof("%s successfully killed", is.name) @@ -555,44 +560,6 @@ func (is *InternalSubsystem) waitForExit(ctx context.Context, timeout time.Durat } } -// HealthCheck sends a USR1 signal to the subsystem process, which should cause it to log "HEALTHY" to stdout. -func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { - is.startStopMu.Lock() - defer is.startStopMu.Unlock() - is.mu.Lock() - defer is.mu.Unlock() - if !is.running { - return errw.Errorf("%s not running", is.name) - } - - is.logger.Debugf("starting healthcheck for %s", is.name) - - checkChan, err := is.cmd.Stdout.(*MatchingLogger).AddMatcher("healthcheck", regexp.MustCompile(`HEALTHY`), true) - if err != nil { - return err - } - defer func() { - matcher, ok := is.cmd.Stdout.(*MatchingLogger) - if ok { - matcher.DeleteMatcher("healthcheck") - } - }() - - err = is.cmd.Process.Signal(syscall.SIGUSR1) - if err != nil { - is.logger.Error(err) - } - - select { - case <-time.After(time.Second * 30): - case <-ctx.Done(): - case <-checkChan: - is.logger.Debugf("healthcheck for %s is good", is.name) - return nil - } - return errw.Errorf("timeout waiting for healthcheck on %s", is.name) -} - func (is *InternalSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { jsonBytes, err := cfg.GetAttributes().MarshalJSON() if err != nil { diff --git a/subsystem_linux.go b/subsystem_linux.go new file mode 100644 index 0000000..10789fa --- /dev/null +++ b/subsystem_linux.go @@ -0,0 +1,48 @@ +package agent + +import ( + "context" + "regexp" + "syscall" + "time" + + errw "github.com/pkg/errors" +) + +// HealthCheck sends a USR1 signal to the subsystem process, which should cause it to log "HEALTHY" to stdout. +func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { + is.startStopMu.Lock() + defer is.startStopMu.Unlock() + is.mu.Lock() + defer is.mu.Unlock() + if !is.running { + return errw.Errorf("%s not running", is.name) + } + + is.logger.Debugf("starting healthcheck for %s", is.name) + + checkChan, err := is.cmd.Stdout.(*MatchingLogger).AddMatcher("healthcheck", regexp.MustCompile(`HEALTHY`), true) + if err != nil { + return err + } + defer func() { + matcher, ok := is.cmd.Stdout.(*MatchingLogger) + if ok { + matcher.DeleteMatcher("healthcheck") + } + }() + + err = is.cmd.Process.Signal(syscall.SIGUSR1) + if err != nil { + is.logger.Error(err) + } + + select { + case <-time.After(time.Second * 30): + case <-ctx.Done(): + case <-checkChan: + is.logger.Debugf("healthcheck for %s is good", is.name) + return nil + } + return errw.Errorf("timeout waiting for healthcheck on %s", is.name) +} diff --git a/subsystem_windows.go b/subsystem_windows.go new file mode 100644 index 0000000..1f199ae --- /dev/null +++ b/subsystem_windows.go @@ -0,0 +1,10 @@ +package agent + +import ( + "context" +) + +func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { + // todo: flesh this out. SIGUSR1 isn't available on windows. + return nil +} diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index efbba06..17930e7 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -7,9 +7,11 @@ import ( "encoding/json" "errors" "net/http" + "os" "os/exec" "path" "regexp" + "runtime" "strings" "sync" "sync/atomic" @@ -20,6 +22,7 @@ import ( "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems" "github.com/viamrobotics/agent/subsystems/registry" + autils "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/app/agent/v1" "go.viam.com/rdk/logging" "go.viam.com/utils" @@ -109,6 +112,12 @@ func configFromProto(logger logging.Logger, updateConf *pb.DeviceSubsystemConfig return ret } +func pathExists(path string) bool { + // todo: give the manager access to this + _, err := os.Stat(path) + return err == nil +} + func (s *viamServer) Start(ctx context.Context) error { s.startStopMu.Lock() defer s.startStopMu.Unlock() @@ -119,6 +128,16 @@ func (s *viamServer) Start(ctx context.Context) error { s.mu.Unlock() return nil } + binPath := path.Join(agent.ViamDirs["bin"], SubsysName) + if runtime.GOOS == "windows" { + binPath += ".exe" + } + if !pathExists(binPath) { + s.logger.Warnf("viam-server binary missing at %s, not starting", binPath) + // todo: nested func so unlock is deferable + s.mu.Unlock() + return nil + } if s.shouldRun { s.logger.Warnf("Restarting %s after unexpected exit", SubsysName) } else { @@ -129,9 +148,9 @@ func (s *viamServer) Start(ctx context.Context) error { stdio := agent.NewMatchingLogger(s.logger, false, false) stderr := agent.NewMatchingLogger(s.logger, true, false) //nolint:gosec - s.cmd = exec.Command(path.Join(agent.ViamDirs["bin"], SubsysName), "-config", ConfigFilePath) + s.cmd = exec.Command(binPath, "-config", ConfigFilePath) s.cmd.Dir = agent.ViamDirs["viam"] - s.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + autils.PlatformSubprocessSettings(s.cmd) s.cmd.Stdout = stdio s.cmd.Stderr = stderr @@ -213,7 +232,8 @@ func (s *viamServer) Stop(ctx context.Context) error { err := s.cmd.Process.Signal(syscall.SIGTERM) if err != nil { - s.logger.Error(err) + // todo(windows): I think this fails on windows; make sure stop/start works, potentially skip to kill(). + s.logger.Error(errw.Wrap(err, "terminating")) } if s.waitForExit(ctx, stopTermTimeout) { @@ -222,10 +242,7 @@ func (s *viamServer) Stop(ctx context.Context) error { } s.logger.Warnf("%s refused to exit, killing", SubsysName) - err = syscall.Kill(-s.cmd.Process.Pid, syscall.SIGKILL) - if err != nil { - s.logger.Error(err) - } + autils.PlatformKill(s.logger, s.cmd) if s.waitForExit(ctx, stopKillTimeout) { s.logger.Infof("%s successfully killed", SubsysName) @@ -264,6 +281,10 @@ func (s *viamServer) HealthCheck(ctx context.Context) (errRet error) { return errw.Errorf("%s not running", SubsysName) } if s.checkURL == "" { + if runtime.GOOS == "windows" { + // todo(windows): we hit this case on windows; debug why. note: it also can't signal the subprocess to stop. + return nil + } return errw.Errorf("can't find listening URL for %s", SubsysName) } diff --git a/utils.go b/utils.go index 4872259..6cf20f5 100644 --- a/utils.go +++ b/utils.go @@ -16,13 +16,12 @@ import ( "os" "path" "path/filepath" + "runtime" "strings" - "syscall" "time" errw "github.com/pkg/errors" "github.com/ulikunitz/xz" - "golang.org/x/sys/unix" "google.golang.org/protobuf/types/known/structpb" ) @@ -51,6 +50,14 @@ func GetRevision() string { } func init() { + if runtime.GOOS == "windows" { + // note: forward slash isn't an abs path on windows, but resolves to one. + var err error + ViamDirs["viam"], err = filepath.Abs(ViamDirs["viam"]) + if err != nil { + panic(err) + } + } ViamDirs["bin"] = filepath.Join(ViamDirs["viam"], "bin") ViamDirs["cache"] = filepath.Join(ViamDirs["viam"], "cache") ViamDirs["tmp"] = filepath.Join(ViamDirs["viam"], "tmp") @@ -59,6 +66,10 @@ func init() { func InitPaths() error { uid := os.Getuid() + expectedPerms := 0o755 + if runtime.GOOS == "windows" { + expectedPerms = 0o777 + } for _, p := range ViamDirs { info, err := os.Stat(p) if err != nil { @@ -71,19 +82,14 @@ func InitPaths() error { } return errw.Wrapf(err, "checking directory %s", p) } - stat, ok := info.Sys().(*syscall.Stat_t) - if !ok { - // should be impossible on Linux - return errw.New("cannot convert to syscall.Stat_t") - } - if uid != int(stat.Uid) { - return errw.Errorf("%s is owned by UID %d but the current UID is %d", p, stat.Uid, uid) + if err := checkPathOwner(uid, info); err != nil { + return err } if !info.IsDir() { return errw.Errorf("%s should be a directory, but is not", p) } - if info.Mode().Perm() != 0o755 { - return errw.Errorf("%s should be have permission set to 0755, but has permissions %d", p, info.Mode().Perm()) + if info.Mode().Perm() != fs.FileMode(expectedPerms) { + return errw.Errorf("%s should have permission set to %#o, but has permissions %#o", p, expectedPerms, info.Mode().Perm()) } } return nil @@ -158,8 +164,16 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er if err != nil { return "", err } + closed := false defer func() { - errRet = errors.Join(errRet, out.Close(), SyncFS(out.Name())) + if !closed { + errRet = errors.Join(errRet, out.Close()) + } + if runtime.GOOS != "windows" { + // note: error is different on windows (EBADF?). + // also this has in theory already synced in the success case. + errRet = errors.Join(errRet, SyncFS(out.Name())) + } if err := os.Remove(out.Name()); err != nil && !os.IsNotExist(err) { errRet = errors.Join(errRet, err) } @@ -169,6 +183,8 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er if err != nil && !os.IsNotExist(err) { errRet = errors.Join(errRet, err) } + errRet = errors.Join(errRet, out.Close()) + closed = true errRet = errors.Join(errRet, os.Rename(out.Name(), outPath), SyncFS(outPath)) return outPath, errRet @@ -283,18 +299,6 @@ func ForceSymlink(orig, symlink string) error { return SyncFS(symlink) } -func SyncFS(syncPath string) (errRet error) { - file, errRet := os.Open(filepath.Dir(syncPath)) - if errRet != nil { - return errw.Wrapf(errRet, "syncing fs %s", syncPath) - } - _, _, err := unix.Syscall(unix.SYS_SYNCFS, file.Fd(), 0, 0) - if err != 0 { - errRet = errw.Wrapf(err, "syncing fs %s", syncPath) - } - return errors.Join(errRet, file.Close()) -} - func WriteFileIfNew(outPath string, data []byte) (bool, error) { //nolint:gosec curFileBytes, err := os.ReadFile(outPath) diff --git a/utils/utils_linux.go b/utils/utils_linux.go new file mode 100644 index 0000000..85560b1 --- /dev/null +++ b/utils/utils_linux.go @@ -0,0 +1,46 @@ +package utils + +import ( + "context" + "os/exec" + "syscall" + "time" + + "github.com/pkg/errors" + "go.viam.com/rdk/logging" + "go.viam.com/utils" +) + +// PlatformSubprocessSettings sets platform-specific subprocess settings. +func PlatformSubprocessSettings(cmd *exec.Cmd) { + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} +} + +// PlatformKill does SIGKILL if available for the platform. +func PlatformKill(logger logging.Logger, cmd *exec.Cmd) { + err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + if err != nil { + logger.Error(err) + } +} + +// WaitOnline attempts to wait until the network comes up, with various bailout conditions. +func WaitOnline(logger logging.Logger, ctx context.Context) { + for { + cmd := exec.CommandContext(ctx, "systemctl", "is-active", "network-online.target") + _, err := cmd.CombinedOutput() + + if err == nil { + break + } + + if e := (&exec.ExitError{}); !errors.As(err, &e) { + // if it's not an ExitError, that means it didn't even start, so bail out + logger.Error(errors.Wrap(err, "running 'systemctl is-active network-online.target'")) + break + } + if !utils.SelectContextOrWait(ctx, time.Second) { + break + } + } +} diff --git a/utils/utils_windows.go b/utils/utils_windows.go new file mode 100644 index 0000000..5fe98d8 --- /dev/null +++ b/utils/utils_windows.go @@ -0,0 +1,18 @@ +package utils + +import ( + "context" + "os/exec" + + "go.viam.com/rdk/logging" +) + +// PlatformSubprocessSettings sets platform-specific subprocess settings. +func PlatformSubprocessSettings(cmd *exec.Cmd) {} + +// PlatformKill does SIGKILL if available for the platform. +func PlatformKill(logger logging.Logger, cmd *exec.Cmd) {} + +func WaitOnline(logger logging.Logger, ctx context.Context) { + logger.Warn("WaitOnline not available on windows yet") +} diff --git a/utils_linux.go b/utils_linux.go new file mode 100644 index 0000000..bafef5a --- /dev/null +++ b/utils_linux.go @@ -0,0 +1,37 @@ +package agent + +import ( + "errors" + "io/fs" + "os" + "path/filepath" + "syscall" + + errw "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// platform-specific UID check. +func checkPathOwner(uid int, info fs.FileInfo) error { + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + // should be impossible on Linux + return errw.New("cannot convert to syscall.Stat_t") + } + if uid != int(stat.Uid) { + return errw.Errorf("%s is owned by UID %d but the current UID is %d", info.Name(), stat.Uid, uid) + } + return nil +} + +func SyncFS(syncPath string) (errRet error) { + file, errRet := os.Open(filepath.Dir(syncPath)) + if errRet != nil { + return errw.Wrapf(errRet, "syncing fs %s", syncPath) + } + _, _, err := unix.Syscall(unix.SYS_SYNCFS, file.Fd(), 0, 0) + if err != 0 { + errRet = errw.Wrapf(err, "syncing fs %s", syncPath) + } + return errors.Join(errRet, file.Close()) +} diff --git a/utils_windows.go b/utils_windows.go new file mode 100644 index 0000000..f54c1ed --- /dev/null +++ b/utils_windows.go @@ -0,0 +1,25 @@ +package agent + +import ( + "io/fs" + "syscall" +) + +// platform-specific UID check. +func checkPathOwner(uid int, info fs.FileInfo) error { + // todo: figure this out on windows. + return nil +} + +func SyncFS(syncPath string) error { + handle, err := syscall.Open(syncPath, syscall.O_RDWR, 0) + if err != nil { + return err + } + defer syscall.CloseHandle(handle) + err = syscall.Fsync(handle) + if err != nil { + return err + } + return nil +}