From fe83015286d7258cb8b96379ad713febf1b89688 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Thu, 2 Apr 2026 06:28:11 +0200 Subject: [PATCH 1/5] HDDS-14896. Use separate container for clients in rolling-upgrade suite --- .../dist/src/main/compose/upgrade/README.md | 1 + .../upgrade/compose/ha/docker-compose.yaml | 22 +++++++++++++ .../main/compose/upgrade/compose/ha/load.sh | 1 + .../dist/src/main/compose/upgrade/test.sh | 4 +-- .../dist/src/main/compose/upgrade/testlib.sh | 10 +++--- .../callbacks/common/callback.sh | 32 +++++++++---------- .../callbacks/common/callback.sh | 4 +-- .../upgrades/rolling-upgrade/driver.sh | 14 ++------ 8 files changed, 52 insertions(+), 36 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/README.md b/hadoop-ozone/dist/src/main/compose/upgrade/README.md index 3565d0e067ea..6138a7b38305 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/README.md +++ b/hadoop-ozone/dist/src/main/compose/upgrade/README.md @@ -100,6 +100,7 @@ Docker compose cluster definitions to be used in upgrade testing are defined in - `SCM`: The name of the SCM container to run robot tests from. - This can be passed as the first argument to `execute_robot_test`. - This allows the same tests to work with and without SCM HA. + - `CLIENT`: The dedicated client container used to run Robot tests in rolling-upgrade HA. ### Testing New Versions diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml index 20c93493a14d..7fef08f9b763 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml @@ -281,6 +281,28 @@ services: - *krb5conf - *ozone-dir - *transformation + upgrade_client: + command: ["sleep", "1000000"] + dns_search: . + env_file: + - docker-config + - ../../../common/security.conf + extra_hosts: + - "om1:10.9.0.11" + - "om2:10.9.0.12" + - "om3:10.9.0.13" + - "scm1.org:10.9.0.14" + - "scm2.org:10.9.0.15" + - "scm3.org:10.9.0.16" + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: upgrade-client + networks: + net: + volumes: + - *keytabs + - *krb5conf + - ../../../..:/opt/hadoop + - ../../../..:/opt/ozone networks: net: diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/load.sh b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/load.sh index b1063db05440..39622ecd21db 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/load.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/load.sh @@ -24,6 +24,7 @@ set +u source "$TEST_DIR/testlib.sh" export COMPOSE_FILE="$TEST_DIR/compose/ha/docker-compose.yaml" +export CLIENT=upgrade_client export OM_SERVICE_ID=omservice export SECURITY_ENABLED="true" diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh index 014d1ba81e21..3c079cd884fd 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh @@ -33,7 +33,7 @@ RESULT_DIR="$ALL_RESULT_DIR" create_results_dir # This is the version of Ozone that should use the runner image to run the # code that was built. Other versions will pull images from docker hub. -run_test ha non-rolling-upgrade 2.1.0 "$OZONE_CURRENT_VERSION" +# run_test ha non-rolling-upgrade 2.1.0 "$OZONE_CURRENT_VERSION" # run_test ha non-rolling-upgrade 2.0.0 "$OZONE_CURRENT_VERSION" #run_test non-ha non-rolling-upgrade 1.4.1 "$OZONE_CURRENT_VERSION" #run_test ha non-rolling-upgrade 1.4.1 "$OZONE_CURRENT_VERSION" @@ -43,7 +43,7 @@ run_test ha non-rolling-upgrade 2.1.0 "$OZONE_CURRENT_VERSION" # run_test om-ha non-rolling-upgrade 1.1.0 "$OZONE_CURRENT_VERSION" # Rolling upgrade test, commented out for now -# run_test ha rolling-upgrade "$OZONE_CURRENT_VERSION" "$OZONE_CURRENT_VERSION" + run_test ha rolling-upgrade "$OZONE_CURRENT_VERSION" "$OZONE_CURRENT_VERSION" generate_report "upgrade" "$ALL_RESULT_DIR" diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh b/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh index a418a96423a2..c83fc2850144 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/testlib.sh @@ -117,16 +117,18 @@ run_test() { ## @description Generates data on the cluster. ## @param The prefix to use for data generated. -## @param All parameters after the first one are passed directly to the robot command, +## @param The container to run the robot test in. +## @param All remaining parameters are passed directly to the robot command, ## see https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#all-command-line-options generate() { - execute_robot_test "$SCM" -N "${OUTPUT_NAME}-generate-${1}" -v PREFIX:"$1" ${@:2} upgrade/generate.robot + execute_robot_test "${2}" -N "${OUTPUT_NAME}-generate-${1}" -v PREFIX:"$1" ${@:3} upgrade/generate.robot } ## @description Validates that data exists on the cluster. ## @param The prefix of the data to be validated. -## @param All parameters after the first one are passed directly to the robot command, +## @param The container to run the robot test in. +## @param All remaining parameters are passed directly to the robot command, ## see https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#all-command-line-options validate() { - execute_robot_test "$SCM" -N "${OUTPUT_NAME}-validate-${1}" -v PREFIX:"$1" ${@:2} upgrade/validate.robot + execute_robot_test "${2}" -N "${OUTPUT_NAME}-validate-${1}" -v PREFIX:"$1" ${@:3} upgrade/validate.robot } diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh index c2159ca0cd24..e4a419f00df1 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/non-rolling-upgrade/callbacks/common/callback.sh @@ -21,40 +21,40 @@ source "$TEST_DIR"/testlib.sh with_old_version() { execute_robot_test "$SCM" -N "${OUTPUT_NAME}-check-finalization" --include finalized upgrade/check-finalization.robot - generate old1 - validate old1 + generate old1 "$SCM" + validate old1 "$SCM" } with_this_version_pre_finalized() { # No check for pre-finalized status here, because the release may not have # added layout features to OM or HDDS. - validate old1 + validate old1 "$SCM" # HDDS-6261: overwrite the same keys intentionally - generate old1 --exclude create-volume-and-bucket + generate old1 "$SCM" --exclude create-volume-and-bucket - generate new1 - validate new1 + generate new1 "$SCM" + validate new1 "$SCM" } with_old_version_downgraded() { execute_robot_test "$SCM" -N "${OUTPUT_NAME}-check-finalization" --include finalized upgrade/check-finalization.robot - validate old1 - validate new1 + validate old1 "$SCM" + validate new1 "$SCM" - generate old2 - validate old2 + generate old2 "$SCM" + validate old2 "$SCM" # HDDS-6261: overwrite the same keys again to trigger the precondition check # that exists <= 1.1.0 OM - generate old1 --exclude create-volume-and-bucket + generate old1 "$SCM" --exclude create-volume-and-bucket } with_this_version_finalized() { execute_robot_test "$SCM" -N "${OUTPUT_NAME}-check-finalization" --include finalized upgrade/check-finalization.robot - validate old1 - validate new1 - validate old2 + validate old1 "$SCM" + validate new1 "$SCM" + validate old2 "$SCM" - generate new2 - validate new2 + generate new2 "$SCM" + validate new2 "$SCM" } diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh index 5160b4ef384b..e5e693e41b51 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/callbacks/common/callback.sh @@ -20,9 +20,9 @@ source "$TEST_DIR"/testlib.sh ### CALLBACKS ### before_service_restart() { - generate "generate-${SERVICE}" + generate "generate-${SERVICE}" "$CLIENT" } after_service_restart() { - validate "generate-${SERVICE}" + validate "generate-${SERVICE}" "$CLIENT" } diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh index 73663dc53614..1621e2b42d90 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/upgrades/rolling-upgrade/driver.sh @@ -27,6 +27,7 @@ set -u : "${OZONE_UPGRADE_TO}" : "${TEST_DIR}" : "${SCM}" +: "${CLIENT}" : "${OZONE_CURRENT_VERSION}" set +u @@ -43,17 +44,6 @@ rolling_restart_service() { # Stop service stop_containers "${SERVICE}" - # Check if this SCM container is running, as during a rolling upgrade it does stop-start one-by-one and - # we want to run write/read tests while one service is unavailable. Choose SCM (the container where the generate and - # validate robot tests are running) considering availability. - if [[ "$(docker inspect -f '{{.State.Running}}' "ha-${SCM}-1" 2>/dev/null)" != "true" ]]; then - local fallback_scm - fallback_scm="$(docker-compose --project-directory="$TEST_DIR/compose/ha" config --services | grep scm | grep -v "^${SCM}$" | head -n1)" - if [[ -n "$fallback_scm" ]]; then - export SCM="$fallback_scm" - fi - fi - # The data generation/validation is doing S3 API tests, so skip it in case the S3 gateway is updated # TODO find a better solution if [[ ${SERVICE} != "s3g" ]]; then @@ -129,4 +119,4 @@ OUTPUT_NAME="${OZONE_UPGRADE_FROM}-${OZONE_UPGRADE_TO}-3-finalized" # TODO Add validation for pre-finalized state # Sends commands to finalize OM and SCM. -execute_robot_test "$SCM" -N "${OUTPUT_NAME}-finalize" upgrade/finalize.robot +execute_robot_test "$CLIENT" -N "${OUTPUT_NAME}-finalize" upgrade/finalize.robot From a4db747031af57e4c77f38f58a328a3e5a32f281 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Thu, 2 Apr 2026 16:29:20 +0200 Subject: [PATCH 2/5] Add address to client container --- .../dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml index 7fef08f9b763..02d5ec4351fe 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml @@ -298,6 +298,7 @@ services: hostname: upgrade-client networks: net: + ipv4_address: 10.9.0.24 volumes: - *keytabs - *krb5conf From d8e657cb0db3c85929afdf70b712b1a6272c4390 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Thu, 2 Apr 2026 17:42:19 +0200 Subject: [PATCH 3/5] Change client hostname --- .../src/main/compose/upgrade/compose/ha/docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml index 02d5ec4351fe..2259c2710345 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml @@ -295,7 +295,7 @@ services: - "scm2.org:10.9.0.15" - "scm3.org:10.9.0.16" image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: upgrade-client + hostname: scm networks: net: ipv4_address: 10.9.0.24 From fe1b9dab7a0008cbe55895c5a61329f5b2568365 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Thu, 2 Apr 2026 20:29:33 +0200 Subject: [PATCH 4/5] Fix upgrade_client command --- .../src/main/compose/upgrade/compose/ha/docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml index 2259c2710345..426ea3bbadad 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-compose.yaml @@ -282,7 +282,7 @@ services: - *ozone-dir - *transformation upgrade_client: - command: ["sleep", "1000000"] + command: ["tail", "-f", "/dev/null"] dns_search: . env_file: - docker-config From ef4faac6814f0f5e3395f10bc5e6f47e59c0530d Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Fri, 3 Apr 2026 00:01:00 +0200 Subject: [PATCH 5/5] Comment out test run --- hadoop-ozone/dist/src/main/compose/upgrade/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh index 3c079cd884fd..014d1ba81e21 100755 --- a/hadoop-ozone/dist/src/main/compose/upgrade/test.sh +++ b/hadoop-ozone/dist/src/main/compose/upgrade/test.sh @@ -33,7 +33,7 @@ RESULT_DIR="$ALL_RESULT_DIR" create_results_dir # This is the version of Ozone that should use the runner image to run the # code that was built. Other versions will pull images from docker hub. -# run_test ha non-rolling-upgrade 2.1.0 "$OZONE_CURRENT_VERSION" +run_test ha non-rolling-upgrade 2.1.0 "$OZONE_CURRENT_VERSION" # run_test ha non-rolling-upgrade 2.0.0 "$OZONE_CURRENT_VERSION" #run_test non-ha non-rolling-upgrade 1.4.1 "$OZONE_CURRENT_VERSION" #run_test ha non-rolling-upgrade 1.4.1 "$OZONE_CURRENT_VERSION" @@ -43,7 +43,7 @@ RESULT_DIR="$ALL_RESULT_DIR" create_results_dir # run_test om-ha non-rolling-upgrade 1.1.0 "$OZONE_CURRENT_VERSION" # Rolling upgrade test, commented out for now - run_test ha rolling-upgrade "$OZONE_CURRENT_VERSION" "$OZONE_CURRENT_VERSION" +# run_test ha rolling-upgrade "$OZONE_CURRENT_VERSION" "$OZONE_CURRENT_VERSION" generate_report "upgrade" "$ALL_RESULT_DIR"