Skip to content

Commit

Permalink
keep status value after completion (#275)
Browse files Browse the repository at this point in the history
Deleting the status value that a controller uses to track its work can
lead to that controller continuing to work after the orchestrator's
reconciler goroutine has marked the condition as failed. In a situation
where the controller attempts a status update and the orchestrator has
already failed the condition, the only acceptable response is for the
controller to stop work.
  • Loading branch information
DoctorVin authored Nov 4, 2024
1 parent dd89460 commit d03f704
Showing 1 changed file with 3 additions and 23 deletions.
26 changes: 3 additions & 23 deletions internal/orchestrator/updates.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,10 +425,9 @@ func (o *Orchestrator) eventUpdate(ctx context.Context, evt *v1types.ConditionUp
}

func (o *Orchestrator) finalizeCondition(ctx context.Context, cond *rctypes.Condition) error {
// if we fail to update the event history or to delete this event from the KV,
// the reconciler will catch it later and walk this code, so return early. It
// is kosher to replay event history iff the contents of that history (id,
// condition kind, target, parameters, state and status) are identical.
// if we fail to update the event history the reconciler will catch it later and walk
// this code, so return early. It is kosher to replay event history iff the contents of
// that history (id, condition kind, target, parameters, state and status) are identical.
if err := o.db.WriteEventHistory(ctx, cond); err != nil {
o.logger.WithError(err).WithFields(logrus.Fields{
"condition.id": cond.ID.String(),
Expand All @@ -441,19 +440,6 @@ func (o *Orchestrator) finalizeCondition(ctx context.Context, cond *rctypes.Cond
return errors.Wrap(errCompleteEvent, err.Error())
}

delErr := status.DeleteCondition(cond.Kind, o.facility, cond.ID.String())
if delErr != nil {
o.logger.WithError(delErr).WithFields(logrus.Fields{
"condition.id": cond.ID.String(),
"server.id": cond.Target.String(),
"condition.kind": cond.Kind,
}).Warn("removing completed condition data")

metrics.DependencyError("nats", "remove completed condition condition")

return errors.Wrap(errCompleteEvent, delErr.Error())
}

metrics.ConditionCompleted.With(
prometheus.Labels{
"conditionKind": string(cond.Kind),
Expand Down Expand Up @@ -609,12 +595,6 @@ func (o *Orchestrator) reconcileStatusKVEntries(ctx context.Context) {
// arguably dependencies are in a weird state, maybe return?
continue
}
// if we're here there has been a terminal error trying to reconcile this
// status value. Get rid of it now.
if err = status.DeleteCondition(evt.Kind, o.facility, evt.ConditionID.String()); err != nil {
le.WithError(err).Warn("deleting condition on reconciliation")
continue
}
}

le.Info("condition reconciled")
Expand Down

0 comments on commit d03f704

Please sign in to comment.