From 501167623d1a9c537a4dfb508d480acc9b293b73 Mon Sep 17 00:00:00 2001 From: Yeray Borges Date: Wed, 24 Apr 2024 15:13:22 +0100 Subject: [PATCH] [#301] Replace leader-for-life leader election with leader-for-lease Fixes #301 --- config/manager/kustomization.yaml | 2 +- config/rbac/kustomization.yaml | 4 +- config/rbac/leader_election_role.yaml | 2 +- config/rbac/leader_election_role_binding.yaml | 6 +- main.go | 18 +- test/e2e/suite_test.go | 29 +- test/e2e/wildflyserver_test.go | 20 +- .../operator-framework/operator-lib/LICENSE | 201 ------------ .../operator-lib/internal/utils/utils.go | 44 --- .../operator-lib/leader/doc.go | 54 ---- .../operator-lib/leader/leader.go | 304 ------------------ vendor/modules.txt | 2 - 12 files changed, 47 insertions(+), 639 deletions(-) delete mode 100644 vendor/github.com/operator-framework/operator-lib/LICENSE delete mode 100644 vendor/github.com/operator-framework/operator-lib/internal/utils/utils.go delete mode 100644 vendor/github.com/operator-framework/operator-lib/leader/doc.go delete mode 100644 vendor/github.com/operator-framework/operator-lib/leader/leader.go diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 88fed04c0..1c2d9c0d1 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -12,5 +12,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: quay.io/wildfly/wildfly-operator + newName: localhost:5000/wildfly-operator newTag: latest diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index b93e819f0..cdade3b17 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -2,8 +2,8 @@ resources: - service_account.yaml - role.yaml - role_binding.yaml -#- leader_election_role.yaml -#- leader_election_role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml # Comment the following 4 lines if you want to disable # the auth proxy (https://github.com/brancz/kube-rbac-proxy) # which protects your /metrics endpoint. diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml index 6334cc51c..5d487455c 100644 --- a/config/rbac/leader_election_role.yaml +++ b/config/rbac/leader_election_role.yaml @@ -2,7 +2,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - name: leader-election-role + name: wildfly-op-leader-election-role rules: - apiGroups: - "" diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index eed16906f..8a25327bc 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -5,8 +5,8 @@ metadata: roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: leader-election-role + name: wildfly-op-leader-election-role subjects: - kind: ServiceAccount - name: default - namespace: system + name: wildfly-operator + diff --git a/main.go b/main.go index e55851f94..08c0414ef 100644 --- a/main.go +++ b/main.go @@ -17,19 +17,19 @@ limitations under the License. package main import ( - "context" "flag" "fmt" "github.com/RHsyseng/operator-utils/pkg/utils/openshift" "go.uber.org/zap/zapcore" "os" goruntime "runtime" + "time" + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" route "github.com/openshift/api/route/v1" - "github.com/operator-framework/operator-lib/leader" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" wildflyv1alpha1 "github.com/wildfly/wildfly-operator/api/v1alpha1" "github.com/wildfly/wildfly-operator/controllers" @@ -90,13 +90,8 @@ func main() { } setupLog.Info("Watching namespace", "namespace", namespace) - if enableLeaderElection { - err = leader.Become(context.TODO(), "t3dv81741s.wildfly-operator-lock") - if err != nil { - setupLog.Error(err, "Failed to retry for leader lock") - os.Exit(1) - } - } + leaseDuration := 30 * time.Second + renewDeadline := 20 * time.Second setupLog.Info("Starting the Manager") mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ @@ -105,7 +100,10 @@ func main() { Port: 9443, Namespace: namespace, HealthProbeBindAddress: probeAddr, - LeaderElection: false, + LeaderElection: enableLeaderElection, + LeaderElectionID: "8kr4rta7hq.wildfly-operator-lock", + LeaseDuration: &leaseDuration, + RenewDeadline: &renewDeadline, Logger: ctrl.Log.WithName("manager"), }) if err != nil { diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go index 1fb9eedb7..d49068bb7 100644 --- a/test/e2e/suite_test.go +++ b/test/e2e/suite_test.go @@ -27,7 +27,6 @@ import ( wildflyv1alpha1 "github.com/wildfly/wildfly-operator/api/v1alpha1" "github.com/wildfly/wildfly-operator/controllers" "io" - "io/ioutil" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -37,7 +36,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" "log" "os" "path/filepath" @@ -45,6 +44,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/yaml" "strings" "testing" "time" @@ -66,8 +66,8 @@ var ( testEnv *envtest.Environment operator *appsv1.Deployment serviceAccount *corev1.ServiceAccount - role *rbacv1.Role - roleBinding *rbacv1.RoleBinding + roles []*rbacv1.Role + roleBindings []*rbacv1.RoleBinding ) const ( @@ -94,7 +94,7 @@ var _ = BeforeSuite(func() { log.Printf("Start testing deploying the Operator") testEnv = &envtest.Environment{ - UseExistingCluster: pointer.BoolPtr(true), + UseExistingCluster: ptr.To(true), CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, CRDInstallOptions: envtest.CRDInstallOptions{CleanUpAfterUse: true}, ErrorIfCRDPathMissing: true, @@ -104,7 +104,7 @@ var _ = BeforeSuite(func() { cfg, k8sClient = initialSetup() // load resources for tests generated by kustomize - data, err := ioutil.ReadFile("../../dry-run/test-resources.yaml") + data, err := os.ReadFile("../../dry-run/test-resources.yaml") if err != nil { log.Fatal(err) } @@ -118,20 +118,29 @@ var _ = BeforeSuite(func() { case "ServiceAccount": serviceAccount = obj.(*corev1.ServiceAccount) case "Role": - role = obj.(*rbacv1.Role) + roles = append(roles, obj.(*rbacv1.Role)) case "RoleBinding": - roleBinding = obj.(*rbacv1.RoleBinding) + roleBindings = append(roleBindings, obj.(*rbacv1.RoleBinding)) default: // unexpected type, ignore } } + // Print the deployment YAML to the console to helps debugging + deploymentYAML, err := yaml.Marshal(operator) + if err != nil { + fmt.Println("Error converting deployment to YAML:", err) + return + } + + // Print the deployment YAML + fmt.Println(string(deploymentYAML)) } else { // When we are running the test in local, we do not deploy the Operator // web run the manager directly from the test suite testEnv = &envtest.Environment{ - UseExistingCluster: pointer.BoolPtr(true), + UseExistingCluster: ptr.To(true), CRDDirectoryPaths: []string{ filepath.Join("..", "..", "config", "crd", "bases"), }, @@ -146,7 +155,7 @@ var _ = BeforeSuite(func() { os.Setenv("OPERATOR_NAME", "wildfly-operator") os.Setenv("JBOSS_BOOTABLE_HOME", "/opt/jboss/container/wildfly-bootable-jar-server") - // start the manager and reconciler + // start the manager and reconcile k8sManager, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme.Scheme, MetricsBindAddress: "0.0.0.0:8383", diff --git a/test/e2e/wildflyserver_test.go b/test/e2e/wildflyserver_test.go index 7273f1f14..dda8b4049 100644 --- a/test/e2e/wildflyserver_test.go +++ b/test/e2e/wildflyserver_test.go @@ -46,14 +46,19 @@ var _ = Describe("WildFly Server tests", func() { Expect(err).ToNot(HaveOccurred()) log.Printf("Creating the Role") - roleHolder := role.DeepCopy() - err = k8sClient.Create(ctx, roleHolder) - Expect(err).ToNot(HaveOccurred()) + for _, role := range roles { + log.Printf("Creating the Role %s", role.Name) + roleHolder := role.DeepCopy() + err = k8sClient.Create(ctx, roleHolder) + Expect(err).ToNot(HaveOccurred()) + } - log.Printf("Creating the Role Binding") - rbHolder := roleBinding.DeepCopy() - err = k8sClient.Create(ctx, rbHolder) - Expect(err).ToNot(HaveOccurred()) + for _, roleBindings := range roleBindings { + log.Printf("Creating the Role Binding %s", roleBindings.Name) + rbHolder := roleBindings.DeepCopy() + err = k8sClient.Create(ctx, rbHolder) + Expect(err).ToNot(HaveOccurred()) + } log.Printf("Creating the operator") opHolder := operator.DeepCopy() @@ -97,6 +102,7 @@ var _ = Describe("WildFly Server tests", func() { log.Printf("Creating %s resource", server.Name) Expect(k8sClient.Create(ctx, server)).Should(Succeed()) + WaitUntilReady(ctx, k8sClient, server) log.Printf("Scalling the server to 2 replicas") diff --git a/vendor/github.com/operator-framework/operator-lib/LICENSE b/vendor/github.com/operator-framework/operator-lib/LICENSE deleted file mode 100644 index 261eeb9e9..000000000 --- a/vendor/github.com/operator-framework/operator-lib/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/vendor/github.com/operator-framework/operator-lib/internal/utils/utils.go b/vendor/github.com/operator-framework/operator-lib/internal/utils/utils.go deleted file mode 100644 index ec1b4deb5..000000000 --- a/vendor/github.com/operator-framework/operator-lib/internal/utils/utils.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2020 The Operator-SDK Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "fmt" - "io/ioutil" - "os" - "strings" -) - -// ErrNoNamespace indicates that a namespace could not be found for the current -// environment -var ErrNoNamespace = fmt.Errorf("namespace not found for current environment") - -var readSAFile = func() ([]byte, error) { - return ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace") -} - -// GetOperatorNamespace returns the namespace the operator should be running in from -// the associated service account secret. -var GetOperatorNamespace = func() (string, error) { - nsBytes, err := readSAFile() - if err != nil { - if os.IsNotExist(err) { - return "", ErrNoNamespace - } - return "", err - } - ns := strings.TrimSpace(string(nsBytes)) - return ns, nil -} diff --git a/vendor/github.com/operator-framework/operator-lib/leader/doc.go b/vendor/github.com/operator-framework/operator-lib/leader/doc.go deleted file mode 100644 index 3eee3e353..000000000 --- a/vendor/github.com/operator-framework/operator-lib/leader/doc.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2020 The Operator-SDK Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* -Package leader implements Leader For Life, a simple alternative to lease-based -leader election. - -Both the Leader For Life and lease-based approaches to leader election are -built on the concept that each candidate will attempt to create a resource with -the same GVK, namespace, and name. Whichever candidate succeeds becomes the -leader. The rest receive "already exists" errors and wait for a new -opportunity. - -Leases provide a way to indirectly observe whether the leader still exists. The -leader must periodically renew its lease, usually by updating a timestamp in -its lock record. If it fails to do so, it is presumed dead, and a new election -takes place. If the leader is in fact still alive but unreachable, it is -expected to gracefully step down. A variety of factors can cause a leader to -fail at updating its lease, but continue acting as the leader before succeeding -at stepping down. - -In the "leader for life" approach, a specific Pod is the leader. Once -established (by creating a lock record), the Pod is the leader until it is -destroyed. There is no possibility for multiple pods to think they are the -leader at the same time. The leader does not need to renew a lease, consider -stepping down, or do anything related to election activity once it becomes the -leader. - -The lock record in this case is a ConfigMap whose OwnerReference is set to the -Pod that is the leader. When the leader is destroyed, the ConfigMap gets -garbage-collected, enabling a different candidate Pod to become the leader. - -Leader for Life requires that all candidate Pods be in the same Namespace. It -uses the downwards API to determine the pod name, as hostname is not reliable. -You should run it configured with: - -env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name -*/ -package leader diff --git a/vendor/github.com/operator-framework/operator-lib/leader/leader.go b/vendor/github.com/operator-framework/operator-lib/leader/leader.go deleted file mode 100644 index 5fdcfa3de..000000000 --- a/vendor/github.com/operator-framework/operator-lib/leader/leader.go +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright 2020 The Operator-SDK Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package leader - -import ( - "context" - "fmt" - "os" - "time" - - "github.com/operator-framework/operator-lib/internal/utils" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" - crclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/config" - logf "sigs.k8s.io/controller-runtime/pkg/log" -) - -// ErrNoNamespace indicates that a namespace could not be found for the current -// environment -var ErrNoNamespace = utils.ErrNoNamespace - -// podNameEnvVar is the constant for env variable POD_NAME -// which is the name of the current pod. -const podNameEnvVar = "POD_NAME" - -var readNamespace = utils.GetOperatorNamespace - -var log = logf.Log.WithName("leader") - -// maxBackoffInterval defines the maximum amount of time to wait between -// attempts to become the leader. -const maxBackoffInterval = time.Second * 16 - -// Option is a function that can modify Become's Config -type Option func(*Config) error - -// Config defines the configuration for Become -type Config struct { - Client crclient.Client -} - -func (c *Config) setDefaults() error { - if c.Client == nil { - config, err := config.GetConfig() - if err != nil { - return err - } - - client, err := crclient.New(config, crclient.Options{}) - if err != nil { - return err - } - c.Client = client - } - return nil -} - -// WithClient returns an Option that sets the Client used by Become -func WithClient(cl crclient.Client) Option { - return func(c *Config) error { - c.Client = cl - return nil - } -} - -// Become ensures that the current pod is the leader within its namespace. If -// run outside a cluster, it will skip leader election and return nil. It -// continuously tries to create a ConfigMap with the provided name and the -// current pod set as the owner reference. Only one can exist at a time with -// the same name, so the pod that successfully creates the ConfigMap is the -// leader. Upon termination of that pod, the garbage collector will delete the -// ConfigMap, enabling a different pod to become the leader. -func Become(ctx context.Context, lockName string, opts ...Option) error { - log.Info("Trying to become the leader.") - - config := Config{} - - for _, opt := range opts { - if err := opt(&config); err != nil { - return err - } - } - - if err := config.setDefaults(); err != nil { - return err - } - - ns, err := readNamespace() - if err != nil { - return err - } - - owner, err := myOwnerRef(ctx, config.Client, ns) - if err != nil { - return err - } - - // check for existing lock from this pod, in case we got restarted - existing := &corev1.ConfigMap{} - key := crclient.ObjectKey{Namespace: ns, Name: lockName} - err = config.Client.Get(ctx, key, existing) - - switch { - case err == nil: - for _, existingOwner := range existing.GetOwnerReferences() { - if existingOwner.Name == owner.Name { - log.Info("Found existing lock with my name. I was likely restarted.") - log.Info("Continuing as the leader.") - return nil - } - log.Info("Found existing lock", "LockOwner", existingOwner.Name) - } - case apierrors.IsNotFound(err): - log.Info("No pre-existing lock was found.") - default: - log.Error(err, "Unknown error trying to get ConfigMap") - return err - } - - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: lockName, - Namespace: ns, - OwnerReferences: []metav1.OwnerReference{*owner}, - }, - } - - // try to create a lock - backoff := time.Second - for { - err := config.Client.Create(ctx, cm) - switch { - case err == nil: - log.Info("Became the leader.") - return nil - case apierrors.IsAlreadyExists(err): - // refresh the lock so we use current leader - key := crclient.ObjectKey{Namespace: ns, Name: lockName} - if err := config.Client.Get(ctx, key, existing); err != nil { - log.Info("Leader lock configmap not found.") - continue // configmap got lost ... just wait a bit - } - - existingOwners := existing.GetOwnerReferences() - switch { - case len(existingOwners) != 1: - log.Info("Leader lock configmap must have exactly one owner reference.", "ConfigMap", existing) - case existingOwners[0].Kind != "Pod": - log.Info("Leader lock configmap owner reference must be a pod.", "OwnerReference", existingOwners[0]) - default: - leaderPod := &corev1.Pod{} - key = crclient.ObjectKey{Namespace: ns, Name: existingOwners[0].Name} - err = config.Client.Get(ctx, key, leaderPod) - switch { - case apierrors.IsNotFound(err): - log.Info("Leader pod has been deleted, waiting for garbage collection to remove the lock.") - case err != nil: - return err - case isPodEvicted(*leaderPod) && leaderPod.GetDeletionTimestamp() == nil: - log.Info("Operator pod with leader lock has been evicted.", "leader", leaderPod.Name) - log.Info("Deleting evicted leader.") - // Pod may not delete immediately, continue with backoff - err := config.Client.Delete(ctx, leaderPod) - if err != nil { - log.Error(err, "Leader pod could not be deleted.") - } - case isNotReadyNode(ctx, config.Client, leaderPod.Spec.NodeName): - log.Info("the status of the node where operator pod with leader lock was running has been 'notReady'") - log.Info("Deleting the leader.") - - //Mark the termainating status to the leaderPod and Delete the configmap lock - if err := deleteLeader(ctx, config.Client, leaderPod, existing); err != nil { - return err - } - - default: - log.Info("Not the leader. Waiting.") - } - } - - select { - case <-time.After(wait.Jitter(backoff, .2)): - if backoff < maxBackoffInterval { - backoff *= 2 - } - continue - case <-ctx.Done(): - return ctx.Err() - } - default: - log.Error(err, "Unknown error creating ConfigMap") - return err - } - } -} - -// myOwnerRef returns an OwnerReference that corresponds to the pod in which -// this code is currently running. -// It expects the environment variable POD_NAME to be set by the downwards API -func myOwnerRef(ctx context.Context, client crclient.Client, ns string) (*metav1.OwnerReference, error) { - myPod, err := getPod(ctx, client, ns) - if err != nil { - return nil, err - } - - owner := &metav1.OwnerReference{ - APIVersion: "v1", - Kind: "Pod", - Name: myPod.ObjectMeta.Name, - UID: myPod.ObjectMeta.UID, - } - return owner, nil -} - -func isPodEvicted(pod corev1.Pod) bool { - podFailed := pod.Status.Phase == corev1.PodFailed - podEvicted := pod.Status.Reason == "Evicted" - return podFailed && podEvicted -} - -// getPod returns a Pod object that corresponds to the pod in which the code -// is currently running. -// It expects the environment variable POD_NAME to be set by the downwards API. -func getPod(ctx context.Context, client crclient.Client, ns string) (*corev1.Pod, error) { - podName := os.Getenv(podNameEnvVar) - if podName == "" { - return nil, fmt.Errorf("required env %s not set, please configure downward API", podNameEnvVar) - } - - log.V(1).Info("Found podname", "Pod.Name", podName) - - pod := &corev1.Pod{} - key := crclient.ObjectKey{Namespace: ns, Name: podName} - err := client.Get(ctx, key, pod) - if err != nil { - log.Error(err, "Failed to get Pod", "Pod.Namespace", ns, "Pod.Name", podName) - return nil, err - } - - // .Get() clears the APIVersion and Kind, - // so we need to set them before returning the object. - pod.TypeMeta.APIVersion = "v1" - pod.TypeMeta.Kind = "Pod" - - log.V(1).Info("Found Pod", "Pod.Namespace", ns, "Pod.Name", pod.Name) - - return pod, nil -} - -func getNode(ctx context.Context, client crclient.Client, nodeName string, node *corev1.Node) error { - key := crclient.ObjectKey{Namespace: "", Name: nodeName} - err := client.Get(ctx, key, node) - if err != nil { - log.Error(err, "Failed to get Node", "Node.Name", nodeName) - return err - } - return nil -} - -func isNotReadyNode(ctx context.Context, client crclient.Client, nodeName string) bool { - leaderNode := &corev1.Node{} - if err := getNode(ctx, client, nodeName, leaderNode); err != nil { - return false - } - for _, condition := range leaderNode.Status.Conditions { - if condition.Type == corev1.NodeReady && condition.Status != corev1.ConditionTrue { - return true - } - } - return false - -} - -func deleteLeader(ctx context.Context, client crclient.Client, leaderPod *corev1.Pod, existing *corev1.ConfigMap) error { - err := client.Delete(ctx, leaderPod) - if err != nil { - log.Error(err, "Leader pod could not be deleted.") - return err - } - err = client.Delete(ctx, existing) - switch { - case apierrors.IsNotFound(err): - log.Info("ConfigMap has been deleted by prior operator.") - return err - case err != nil: - return err - } - return nil -} diff --git a/vendor/modules.txt b/vendor/modules.txt index eb34dd6b3..d2d44b9ae 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -165,8 +165,6 @@ github.com/openshift/api/console/v1 github.com/openshift/api/route/v1 # github.com/operator-framework/operator-lib v0.11.0 ## explicit; go 1.17 -github.com/operator-framework/operator-lib/internal/utils -github.com/operator-framework/operator-lib/leader # github.com/pkg/errors v0.9.1 ## explicit github.com/pkg/errors