From 5120ce468889011d1033298b06272d5f3bb19895 Mon Sep 17 00:00:00 2001 From: Siyuan Zhang Date: Tue, 23 Jul 2024 16:19:52 -0700 Subject: [PATCH] migrate experimental-stop-grpc-service-on-defrag flag to feature gate. Signed-off-by: Siyuan Zhang --- server/config/config.go | 3 - server/embed/config.go | 30 +++++++++ server/embed/config_test.go | 87 ++++++++++++++++++++++++++ server/embed/etcd.go | 1 - server/etcdmain/config.go | 24 +++++++ server/etcdmain/config_test.go | 50 ++++++++++++++- server/etcdmain/help.go | 2 +- server/etcdserver/api/v3rpc/health.go | 3 +- server/features/etcd_features.go | 5 ++ tests/e2e/failover_test.go | 38 +++++++++++ tests/framework/e2e/cluster.go | 9 +++ tests/framework/integration/cluster.go | 60 +++++++++--------- 12 files changed, 272 insertions(+), 40 deletions(-) diff --git a/server/config/config.go b/server/config/config.go index f1cd47bffa5c..5f3a0d8e9f51 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -193,9 +193,6 @@ type ServerConfig struct { // a shared buffer in its readonly check operations. ExperimentalTxnModeWriteWithSharedBuffer bool `json:"experimental-txn-mode-write-with-shared-buffer"` - // ExperimentalStopGRPCServiceOnDefrag enables etcd gRPC service to stop serving client requests on defragmentation. - ExperimentalStopGRPCServiceOnDefrag bool `json:"experimental-stop-grpc-service-on-defrag"` - // ExperimentalBootstrapDefragThresholdMegabytes is the minimum number of megabytes needed to be freed for etcd server to // consider running defrag during bootstrap. Needs to be set to non-zero value to take effect. ExperimentalBootstrapDefragThresholdMegabytes uint `json:"experimental-bootstrap-defrag-threshold-megabytes"` diff --git a/server/embed/config.go b/server/embed/config.go index 12be43d4d6bb..6c4931be32ac 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -888,6 +888,36 @@ func (cfg *configYAML) configFromFile(path string) error { return cfg.Validate() } +// SetFeatureGatesFromExperimentalFlags sets the feature gate values if the feature gate is not explicitly set +// while their corresponding experimental flags are explicitly set. +// TODO: remove after all experimental flags are deprecated. +func SetFeatureGatesFromExperimentalFlags(fg featuregate.FeatureGate, getExperimentalFlagVal func(string) (bool, bool), featureGatesFlagName, featureGatesVal string) error { + m := make(map[featuregate.Feature]bool) + // verify that the feature gate and its experimental flag are not both set at the same time. + for expFlagName, featureName := range features.ExperimentalFlagToFeatureMap { + flagVal, explicitlySet := getExperimentalFlagVal(expFlagName) + if !explicitlySet { + continue + } + if strings.Contains(featureGatesVal, string(featureName)) { + return fmt.Errorf("cannot specify both flags: --%s=(true|false) and --%s=%s=(true|false) at the same time, please just use --%s=%s=(true|false)", + expFlagName, featureGatesFlagName, featureName, featureGatesFlagName, featureName) + } + m[featureName] = flagVal + } + + // filter out unknown features for fg, because we could use SetFeatureGatesFromExperimentalFlags both for + // server and cluster level feature gates. + allFeatures := fg.(featuregate.MutableFeatureGate).GetAll() + mFiltered := make(map[string]bool) + for k, v := range m { + if _, ok := allFeatures[k]; ok { + mFiltered[string(k)] = v + } + } + return fg.(featuregate.MutableFeatureGate).SetFromMap(mFiltered) +} + func updateCipherSuites(tls *transport.TLSInfo, ss []string) error { if len(tls.CipherSuites) > 0 && len(ss) > 0 { return fmt.Errorf("TLSInfo.CipherSuites is already specified (given %v)", ss) diff --git a/server/embed/config_test.go b/server/embed/config_test.go index 9153feb06e68..961c86ce2b2f 100644 --- a/server/embed/config_test.go +++ b/server/embed/config_test.go @@ -21,6 +21,7 @@ import ( "net" "net/url" "os" + "strconv" "testing" "time" @@ -31,6 +32,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/srv" "go.etcd.io/etcd/client/pkg/v3/transport" "go.etcd.io/etcd/client/pkg/v3/types" + "go.etcd.io/etcd/pkg/v3/featuregate" + "go.etcd.io/etcd/server/v3/features" ) func notFoundErr(service, domain string) error { @@ -669,3 +672,87 @@ func TestUndefinedAutoCompactionModeValidate(t *testing.T) { err := cfg.Validate() require.Error(t, err) } + +func TestSetFeatureGatesFromExperimentalFlags(t *testing.T) { + testCases := []struct { + name string + featureGatesFlag string + experimentalStopGRPCServiceOnDefrag string + expectErr bool + expectedFeatures map[featuregate.Feature]bool + }{ + { + name: "default", + expectedFeatures: map[featuregate.Feature]bool{ + features.DistributedTracing: false, + features.StopGRPCServiceOnDefrag: false, + }, + }, + { + name: "cannot set experimental flag and feature gate to true at the same time", + featureGatesFlag: "StopGRPCServiceOnDefrag=true", + experimentalStopGRPCServiceOnDefrag: "true", + expectErr: true, + }, + { + name: "cannot set experimental flag and feature gate to false at the same time", + featureGatesFlag: "StopGRPCServiceOnDefrag=false", + experimentalStopGRPCServiceOnDefrag: "false", + expectErr: true, + }, + { + name: "cannot set experimental flag and feature gate to different values at the same time", + featureGatesFlag: "StopGRPCServiceOnDefrag=true", + experimentalStopGRPCServiceOnDefrag: "false", + expectErr: true, + }, + { + name: "can set experimental flag", + featureGatesFlag: "DistributedTracing=true", + experimentalStopGRPCServiceOnDefrag: "true", + expectedFeatures: map[featuregate.Feature]bool{ + features.DistributedTracing: true, + features.StopGRPCServiceOnDefrag: true, + }, + }, + { + name: "can set feature gate", + featureGatesFlag: "DistributedTracing=true,StopGRPCServiceOnDefrag=true", + expectedFeatures: map[featuregate.Feature]bool{ + features.DistributedTracing: true, + features.StopGRPCServiceOnDefrag: true, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fg := features.NewDefaultServerFeatureGate("test", nil) + fg.(featuregate.MutableFeatureGate).Set(tc.featureGatesFlag) + getExperimentalFlagVal := func(flagName string) (bool, bool) { + if flagName != "experimental-stop-grpc-service-on-defrag" || tc.experimentalStopGRPCServiceOnDefrag == "" { + return false, false + } + flagVal, err := strconv.ParseBool(tc.experimentalStopGRPCServiceOnDefrag) + if err != nil { + t.Fatal(err) + } + return flagVal, true + } + err := SetFeatureGatesFromExperimentalFlags(fg, getExperimentalFlagVal, "feature-gates", tc.featureGatesFlag) + if tc.expectErr { + if err == nil { + t.Fatal("expect error") + } + return + } + if err != nil { + t.Fatal(err) + } + for k, v := range tc.expectedFeatures { + if fg.Enabled(k) != v { + t.Errorf("expected feature gate %s=%v, got %v", k, v, fg.Enabled(k)) + } + } + }) + } +} diff --git a/server/embed/etcd.go b/server/embed/etcd.go index a82e21c79d8b..2970a804d110 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -223,7 +223,6 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { WarningUnaryRequestDuration: cfg.WarningUnaryRequestDuration, ExperimentalMemoryMlock: cfg.ExperimentalMemoryMlock, ExperimentalTxnModeWriteWithSharedBuffer: cfg.ExperimentalTxnModeWriteWithSharedBuffer, - ExperimentalStopGRPCServiceOnDefrag: cfg.ExperimentalStopGRPCServiceOnDefrag, ExperimentalBootstrapDefragThresholdMegabytes: cfg.ExperimentalBootstrapDefragThresholdMegabytes, ExperimentalMaxLearners: cfg.ExperimentalMaxLearners, V2Deprecation: cfg.V2DeprecationEffective(), diff --git a/server/etcdmain/config.go b/server/etcdmain/config.go index 4a46192cf0ff..c70e4bb33fdb 100644 --- a/server/etcdmain/config.go +++ b/server/etcdmain/config.go @@ -22,6 +22,7 @@ import ( "fmt" "os" "runtime" + "strconv" "time" "go.uber.org/zap" @@ -238,6 +239,29 @@ func (cfg *config) configFromCmdLine() error { cfg.ec.InitialCluster = "" } + // getBoolFlagVal checks if the a given flag is explicitly set in the cmd line arguments. + getBoolFlagVal := func(flagName string) (flagVal, explicitlySet bool) { + cfg.cf.flagSet.Visit(func(f *flag.Flag) { + if f.Name == flagName { + explicitlySet = true + } + }) + if explicitlySet { + flagVal, parseErr := strconv.ParseBool(cfg.cf.flagSet.Lookup(flagName).Value.String()) + if parseErr != nil { + panic(parseErr) + } + return flagVal, explicitlySet + } + return flagVal, explicitlySet + } + // SetFeatureGatesFromExperimentalFlags validates that cmd line flags for experimental feature and their feature gates are not explicitly set simultaneously, + // and passes the values of cmd line flags for experimental feature to the server feature gate. + err = embed.SetFeatureGatesFromExperimentalFlags(cfg.ec.ServerFeatureGate, getBoolFlagVal, embed.ServerFeatureGateFlagName, cfg.cf.flagSet.Lookup(embed.ServerFeatureGateFlagName).Value.String()) + if err != nil { + return err + } + return cfg.validate() } diff --git a/server/etcdmain/config_test.go b/server/etcdmain/config_test.go index b4aa39b2a6b5..ef25b5faa2c7 100644 --- a/server/etcdmain/config_test.go +++ b/server/etcdmain/config_test.go @@ -412,16 +412,60 @@ func TestParseFeatureGateFlags(t *testing.T) { }, }, { - name: "can set feature gate flag", + name: "cannot set both experimental flag and feature gate flag", + args: []string{ + "--experimental-stop-grpc-service-on-defrag=false", + "--feature-gates=StopGRPCServiceOnDefrag=true", + }, + expectErr: true, + }, + { + name: "ok to set different experimental flag and feature gate flag", + args: []string{ + "--experimental-stop-grpc-service-on-defrag=true", + "--feature-gates=DistributedTracing=false", + }, + expectedFeatures: map[featuregate.Feature]bool{ + features.DistributedTracing: false, + features.StopGRPCServiceOnDefrag: true, + }, + }, + { + name: "can set feature gate to true from experimental flag", + args: []string{ + "--experimental-stop-grpc-service-on-defrag=true", + }, + expectedFeatures: map[featuregate.Feature]bool{ + features.StopGRPCServiceOnDefrag: true, + }, + }, + { + name: "can set feature gate to false from experimental flag", args: []string{ "--experimental-stop-grpc-service-on-defrag=false", - fmt.Sprintf("--%s=DistributedTracing=true,StopGRPCServiceOnDefrag=true", embed.ServerFeatureGateFlagName), }, expectedFeatures: map[featuregate.Feature]bool{ - features.DistributedTracing: true, + features.StopGRPCServiceOnDefrag: false, + }, + }, + { + name: "can set feature gate to true from feature gate flag", + args: []string{ + "--feature-gates=StopGRPCServiceOnDefrag=true", + }, + expectedFeatures: map[featuregate.Feature]bool{ features.StopGRPCServiceOnDefrag: true, }, }, + { + name: "can set feature gate to false from feature gate flag", + args: []string{ + "--feature-gates=StopGRPCServiceOnDefrag=false", + }, + expectedFeatures: map[featuregate.Feature]bool{ + features.StopGRPCServiceOnDefrag: false, + }, + }, } for _, tc := range testCases { diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index a4fe80cea28e..668b1cde815e 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -312,7 +312,7 @@ Experimental feature: --experimental-snapshot-catchup-entries Number of entries for a slow follower to catch up after compacting the raft storage entries. --experimental-stop-grpc-service-on-defrag - Enable etcd gRPC service to stop serving client requests on defragmentation. + Enable etcd gRPC service to stop serving client requests on defragmentation. TO BE DEPRECATED, use '--feature-gates=StopGRPCServiceOnDefrag=true' instead. Unsafe feature: --force-new-cluster 'false' diff --git a/server/etcdserver/api/v3rpc/health.go b/server/etcdserver/api/v3rpc/health.go index e87140d17432..2861e11e6d3f 100644 --- a/server/etcdserver/api/v3rpc/health.go +++ b/server/etcdserver/api/v3rpc/health.go @@ -20,6 +20,7 @@ import ( healthpb "google.golang.org/grpc/health/grpc_health_v1" "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/etcd/server/v3/features" ) const ( @@ -35,7 +36,7 @@ func newHealthNotifier(hs *health.Server, s *etcdserver.EtcdServer) notifier { if hs == nil { panic("unexpected nil gRPC health server") } - hc := &healthNotifier{hs: hs, lg: s.Logger(), stopGRPCServiceOnDefrag: s.Cfg.ExperimentalStopGRPCServiceOnDefrag} + hc := &healthNotifier{hs: hs, lg: s.Logger(), stopGRPCServiceOnDefrag: s.FeatureEnabled(features.StopGRPCServiceOnDefrag)} // set grpc health server as serving status blindly since // the grpc server will serve iff s.ReadyNotify() is closed. hc.startServe() diff --git a/server/features/etcd_features.go b/server/features/etcd_features.go index 28db2aaa2cb3..db3702ecbaa5 100644 --- a/server/features/etcd_features.go +++ b/server/features/etcd_features.go @@ -52,6 +52,11 @@ var ( DistributedTracing: {Default: false, PreRelease: featuregate.Alpha}, StopGRPCServiceOnDefrag: {Default: false, PreRelease: featuregate.Alpha}, } + // ExperimentalFlagToFeatureMap is the map from the cmd line flags of experimental features + // to their corresponding feature gates. + ExperimentalFlagToFeatureMap = map[string]featuregate.Feature{ + "experimental-stop-grpc-service-on-defrag": StopGRPCServiceOnDefrag, + } ) func NewDefaultServerFeatureGate(name string, lg *zap.Logger) featuregate.FeatureGate { diff --git a/tests/e2e/failover_test.go b/tests/e2e/failover_test.go index aec467fcc9cc..878603673485 100644 --- a/tests/e2e/failover_test.go +++ b/tests/e2e/failover_test.go @@ -93,6 +93,44 @@ func TestFailoverOnDefrag(t *testing.T) { expectedMinQPS: 20, expectedMinFailureRate: 0.25, }, + { + name: "defrag failover happy case with feature gate", + clusterOptions: []e2e.EPClusterOption{ + e2e.WithClusterSize(3), + e2e.WithServerFeatureGate("StopGRPCServiceOnDefrag", true), + e2e.WithGoFailEnabled(true), + }, + gRPCDialOptions: []grpc.DialOption{ + grpc.WithDisableServiceConfig(), + grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy": "round_robin", "healthCheckConfig": {"serviceName": ""}}`), + }, + expectedMinQPS: 20, + expectedMaxFailureRate: 0.01, + }, + { + name: "defrag blocks one-third of requests with StopGRPCServiceOnDefrag feature gate set to false", + clusterOptions: []e2e.EPClusterOption{ + e2e.WithClusterSize(3), + e2e.WithServerFeatureGate("StopGRPCServiceOnDefrag", false), + e2e.WithGoFailEnabled(true), + }, + gRPCDialOptions: []grpc.DialOption{ + grpc.WithDisableServiceConfig(), + grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy": "round_robin", "healthCheckConfig": {"serviceName": ""}}`), + }, + expectedMinQPS: 20, + expectedMinFailureRate: 0.25, + }, + { + name: "defrag blocks one-third of requests with StopGRPCServiceOnDefrag feature gate set to true and client health check disabled", + clusterOptions: []e2e.EPClusterOption{ + e2e.WithClusterSize(3), + e2e.WithServerFeatureGate("StopGRPCServiceOnDefrag", true), + e2e.WithGoFailEnabled(true), + }, + expectedMinQPS: 20, + expectedMinFailureRate: 0.25, + }, } for _, tc := range tcs { diff --git a/tests/framework/e2e/cluster.go b/tests/framework/e2e/cluster.go index cb8b35d7fd85..23d422aa3130 100644 --- a/tests/framework/e2e/cluster.go +++ b/tests/framework/e2e/cluster.go @@ -32,6 +32,7 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" clientv3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/pkg/v3/featuregate" "go.etcd.io/etcd/pkg/v3/proxy" "go.etcd.io/etcd/server/v3/embed" "go.etcd.io/etcd/server/v3/etcdserver" @@ -358,6 +359,14 @@ func WithExperimentalStopGRPCServiceOnDefrag(stopGRPCServiceOnDefrag bool) EPClu } } +func WithServerFeatureGate(featureName string, val bool) EPClusterOption { + return func(c *EtcdProcessClusterConfig) { + if err := c.ServerConfig.ServerFeatureGate.(featuregate.MutableFeatureGate).Set(fmt.Sprintf("%s=%v", featureName, val)); err != nil { + panic(err) + } + } +} + func WithCompactionBatchLimit(limit int) EPClusterOption { return func(c *EtcdProcessClusterConfig) { c.ServerConfig.ExperimentalCompactionBatchLimit = limit } } diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index e3ef2a448ad3..19ba82570ff5 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -63,6 +63,7 @@ import ( "go.etcd.io/etcd/server/v3/etcdserver/api/v3lock" lockpb "go.etcd.io/etcd/server/v3/etcdserver/api/v3lock/v3lockpb" "go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc" + "go.etcd.io/etcd/server/v3/features" "go.etcd.io/etcd/server/v3/verify" framecfg "go.etcd.io/etcd/tests/v3/framework/config" "go.etcd.io/etcd/tests/v3/framework/testutils" @@ -266,34 +267,33 @@ func (c *Cluster) mustNewMember(t testutil.TB) *Member { m := MustNewMember(t, MemberConfig{ - Name: fmt.Sprintf("m%v", memberNumber), - MemberNumber: memberNumber, - AuthToken: c.Cfg.AuthToken, - PeerTLS: c.Cfg.PeerTLS, - ClientTLS: c.Cfg.ClientTLS, - QuotaBackendBytes: c.Cfg.QuotaBackendBytes, - BackendBatchInterval: c.Cfg.BackendBatchInterval, - MaxTxnOps: c.Cfg.MaxTxnOps, - MaxRequestBytes: c.Cfg.MaxRequestBytes, - SnapshotCount: c.Cfg.SnapshotCount, - SnapshotCatchUpEntries: c.Cfg.SnapshotCatchUpEntries, - GRPCKeepAliveMinTime: c.Cfg.GRPCKeepAliveMinTime, - GRPCKeepAliveInterval: c.Cfg.GRPCKeepAliveInterval, - GRPCKeepAliveTimeout: c.Cfg.GRPCKeepAliveTimeout, - GRPCAdditionalServerOptions: c.Cfg.GRPCAdditionalServerOptions, - ClientMaxCallSendMsgSize: c.Cfg.ClientMaxCallSendMsgSize, - ClientMaxCallRecvMsgSize: c.Cfg.ClientMaxCallRecvMsgSize, - UseIP: c.Cfg.UseIP, - UseBridge: c.Cfg.UseBridge, - UseTCP: c.Cfg.UseTCP, - EnableLeaseCheckpoint: c.Cfg.EnableLeaseCheckpoint, - LeaseCheckpointInterval: c.Cfg.LeaseCheckpointInterval, - LeaseCheckpointPersist: c.Cfg.LeaseCheckpointPersist, - WatchProgressNotifyInterval: c.Cfg.WatchProgressNotifyInterval, - ExperimentalMaxLearners: c.Cfg.ExperimentalMaxLearners, - DisableStrictReconfigCheck: c.Cfg.DisableStrictReconfigCheck, - CorruptCheckTime: c.Cfg.CorruptCheckTime, - ExperimentalStopGRPCServiceOnDefrag: c.Cfg.ExperimentalStopGRPCServiceOnDefrag, + Name: fmt.Sprintf("m%v", memberNumber), + MemberNumber: memberNumber, + AuthToken: c.Cfg.AuthToken, + PeerTLS: c.Cfg.PeerTLS, + ClientTLS: c.Cfg.ClientTLS, + QuotaBackendBytes: c.Cfg.QuotaBackendBytes, + BackendBatchInterval: c.Cfg.BackendBatchInterval, + MaxTxnOps: c.Cfg.MaxTxnOps, + MaxRequestBytes: c.Cfg.MaxRequestBytes, + SnapshotCount: c.Cfg.SnapshotCount, + SnapshotCatchUpEntries: c.Cfg.SnapshotCatchUpEntries, + GRPCKeepAliveMinTime: c.Cfg.GRPCKeepAliveMinTime, + GRPCKeepAliveInterval: c.Cfg.GRPCKeepAliveInterval, + GRPCKeepAliveTimeout: c.Cfg.GRPCKeepAliveTimeout, + GRPCAdditionalServerOptions: c.Cfg.GRPCAdditionalServerOptions, + ClientMaxCallSendMsgSize: c.Cfg.ClientMaxCallSendMsgSize, + ClientMaxCallRecvMsgSize: c.Cfg.ClientMaxCallRecvMsgSize, + UseIP: c.Cfg.UseIP, + UseBridge: c.Cfg.UseBridge, + UseTCP: c.Cfg.UseTCP, + EnableLeaseCheckpoint: c.Cfg.EnableLeaseCheckpoint, + LeaseCheckpointInterval: c.Cfg.LeaseCheckpointInterval, + LeaseCheckpointPersist: c.Cfg.LeaseCheckpointPersist, + WatchProgressNotifyInterval: c.Cfg.WatchProgressNotifyInterval, + ExperimentalMaxLearners: c.Cfg.ExperimentalMaxLearners, + DisableStrictReconfigCheck: c.Cfg.DisableStrictReconfigCheck, + CorruptCheckTime: c.Cfg.CorruptCheckTime, }) m.DiscoveryURL = c.Cfg.DiscoveryURL return m @@ -619,8 +619,6 @@ type MemberConfig struct { ExperimentalMaxLearners int DisableStrictReconfigCheck bool CorruptCheckTime time.Duration - - ExperimentalStopGRPCServiceOnDefrag bool } // MustNewMember return an inited member with the given name. If peerTLS is @@ -729,7 +727,6 @@ func MustNewMember(t testutil.TB, mcfg MemberConfig) *Member { if mcfg.CorruptCheckTime > time.Duration(0) { m.CorruptCheckTime = mcfg.CorruptCheckTime } - m.ExperimentalStopGRPCServiceOnDefrag = mcfg.ExperimentalStopGRPCServiceOnDefrag m.WarningApplyDuration = embed.DefaultWarningApplyDuration m.WarningUnaryRequestDuration = embed.DefaultWarningUnaryRequestDuration m.ExperimentalMaxLearners = membership.DefaultMaxLearners @@ -740,6 +737,7 @@ func MustNewMember(t testutil.TB, mcfg MemberConfig) *Member { m.GRPCServerRecorder = &grpctesting.GRPCRecorder{} m.Logger, m.LogObserver = memberLogger(t, mcfg.Name) + m.ServerFeatureGate = features.NewDefaultServerFeatureGate(m.Name, m.Logger) m.StrictReconfigCheck = !mcfg.DisableStrictReconfigCheck if err := m.listenGRPC(); err != nil {