Skip to content

Commit a29a2a0

Browse files
authored
Fix couple multicluster flaky tests (#562)
# Summary Couple small fixes that together with @lsierant we have found during recent investigation. Details are provided in the PR comments. ## Proof of Work Passing CI. ## Checklist - [ ] Have you linked a jira ticket and/or is the ticket in the title? - [ ] Have you checked whether your jira ticket required DOCSP changes? - [x] Have you added changelog file? - use `skip-changelog` label if not needed - refer to [Changelog files and Release Notes](https://github.com/mongodb/mongodb-kubernetes/blob/master/CONTRIBUTING.md#changelog-files-and-release-notes) section in CONTRIBUTING.md for more details
1 parent 8a156c9 commit a29a2a0

File tree

3 files changed

+14
-31
lines changed

3 files changed

+14
-31
lines changed

.evergreen-tasks.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1116,7 +1116,6 @@ tasks:
11161116

11171117
- name: e2e_multi_cluster_validation
11181118
tags: [ "patch-run" ]
1119-
exec_timeout_secs: 1000
11201119
commands:
11211120
- func: e2e_test
11221121

docker/mongodb-kubernetes-tests/tests/multicluster_appdb/multicluster_appdb_disaster_recovery.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,12 @@ def test_delete_om_and_appdb_statefulset_in_failed_cluster(
152152
# delete OM to simulate losing Ops Manager application
153153
# this is only for testing unavailability of the OM application, it's not testing losing OM cluster
154154
# we don't delete here any additional resources (secrets, configmaps) that are required for a proper OM recovery testing
155+
# it will be immediately recreated by the operator, so we cannot check if it was deleted
155156
delete_statefulset(
156157
ops_manager.namespace,
157158
ops_manager.name,
158159
propagation_policy="Background",
159-
api_client=central_cluster_client,
160+
api_client=get_member_cluster_api_client(OM_MEMBER_CLUSTER_NAME),
160161
)
161162
except kubernetes.client.ApiException as e:
162163
if e.status != 404:
@@ -184,14 +185,6 @@ def statefulset_is_deleted(namespace: str, name: str, api_client=Optional[kubern
184185
else:
185186
raise e
186187

187-
run_periodically(
188-
lambda: statefulset_is_deleted(
189-
ops_manager.namespace,
190-
ops_manager.name,
191-
api_client=get_member_cluster_api_client(OM_MEMBER_CLUSTER_NAME),
192-
),
193-
timeout=120,
194-
)
195188
run_periodically(
196189
lambda: statefulset_is_deleted(
197190
ops_manager.namespace,

scripts/funcs/multicluster

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -128,26 +128,6 @@ EOF
128128
sleep 1
129129

130130
local service_account_name="operator-tests-multi-cluster-service-account"
131-
132-
local secret_name
133-
secret_name="$(kubectl --context "${CENTRAL_CLUSTER}" get secret -n "${NAMESPACE}" | { grep "${service_account_name}" || test $? = 1; } | awk '{ print $1 }')"
134-
if [[ "${secret_name}" == "" ]]; then
135-
secret_name="${service_account_name}-token-secret"
136-
create_service_account_token_secret "${CENTRAL_CLUSTER}" "${service_account_name}" "${secret_name}"
137-
fi
138-
139-
local central_cluster_token
140-
central_cluster_token="$(kubectl --context "${CENTRAL_CLUSTER}" get secret "${secret_name}" -o jsonpath='{ .data.token}' -n "${NAMESPACE}" | base64 -d)"
141-
echo "Creating Multi Cluster configuration secret"
142-
143-
configuration_params=(
144-
"--from-literal=central_cluster=${CENTRAL_CLUSTER}"
145-
)
146-
147-
configuration_params+=(
148-
"--from-literal=${CENTRAL_CLUSTER}=${central_cluster_token}"
149-
)
150-
151131
local secret_name
152132
secret_name="$(kubectl --context "${CENTRAL_CLUSTER}" get secret -n "${NAMESPACE}" | { grep "${service_account_name}" || test $? = 1; } | awk '{ print $1 }')"
153133
if [[ "${secret_name}" == "" ]]; then
@@ -175,7 +155,18 @@ EOF
175155
create_service_account_token_secret "${member_cluster}" "${service_account_name}" "${secret_name}"
176156
fi
177157

178-
member_cluster_token="$(kubectl --context "${member_cluster}" get secret "${secret_name}" -o jsonpath='{ .data.token}' -n "${NAMESPACE}" | base64 -d)"
158+
# Retry up to 10 times if .data.token is not yet populated
159+
for _ in {1..10}; do
160+
member_cluster_token="$(kubectl --context "${member_cluster}" get secret "${secret_name}" -o jsonpath='{ .data.token }' -n "${NAMESPACE}" | base64 -d)"
161+
if [[ -n "${member_cluster_token}" ]]; then
162+
break
163+
fi
164+
sleep 1
165+
done
166+
if [[ -z "${member_cluster_token}" ]]; then
167+
echo "Error: .data.token not populated for secret ${secret_name} in cluster ${member_cluster}"
168+
exit 1
169+
fi
179170
# for 2 cluster tests central cluster is the first member, so we cannot add this as it will result in duplicate key and error in create secret
180171
if [[ "${member_cluster}" != "${CENTRAL_CLUSTER}" ]]; then
181172
configuration_params+=(

0 commit comments

Comments
 (0)