diff --git a/RELEASE.md b/RELEASE.md index 71358ac633..95117e7ed3 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,29 @@ +# Version 1.17.3 + +## Major Features and Improvements + +* N/A + +## Breaking Changes + +* N/A + +### For Pipeline Authors + +* N/A + +### For Component Authors + +* N/A + +## Deprecations + +* N/A + +## Bug Fixes and Other Changes + +* Patched security vulnerabilities in the TFX docker image and fixed Vertex AI tests. + # Version 1.17.2 ## Major Features and Improvements diff --git a/patches/tfx.patch b/patches/tfx.patch index d29d0fc312..7e395a3047 100644 --- a/patches/tfx.patch +++ b/patches/tfx.patch @@ -57,11 +57,11 @@ index de61db74f..d45f53ae7 100644 time-machine==2.16.0 tinycss2==1.3.0 diff --git a/tfx/dependencies.py b/tfx/dependencies.py -index 19489315a..05b464767 100644 +index ba0ddab4c..d46a8b74e 100644 --- a/tfx/dependencies.py +++ b/tfx/dependencies.py @@ -108,31 +108,7 @@ def make_required_install_packages(): - "tensorflow" + select_constraint(">=2.17.0,<2.18"), + "tensorflow" + select_constraint(">=2.17.1,<2.18"), # pylint: enable=line-too-long "tensorflow-hub>=0.15.0,<0.16", - "tensorflow-data-validation" @@ -90,50 +90,21 @@ index 19489315a..05b464767 100644 - git_master="@git+https://github.com/tensorflow/tfx-bsl@master", - ), ] - - -@@ -173,7 +149,6 @@ def make_extra_packages_docker_image(): + + +@@ -172,8 +148,6 @@ def make_extra_packages_docker_image(): + "kfp-pipeline-spec>=0.3.0,<0.4.0", "mmh>=2.2,<3", "python-snappy>=0.7", - # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py +- # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py - "tensorflow-cloud>=0.1,<0.2", "tensorflow-io>=0.9.0, <=0.24.0", ] - -@@ -246,7 +221,6 @@ def make_extra_packages_examples(): + +@@ -245,6 +219,4 @@ def make_extra_packages_examples(): + "scikit-learn>=1.0,<2", # LINT.ThenChange( # examples/penguin/experimental/penguin_pipeline_sklearn_gcp.py) - # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py +- # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py - "tensorflow-cloud>=0.1,<0.2", ] - - -diff --git a/tfx/tools/docker/requirements.txt b/tfx/tools/docker/requirements.txt -index d7b3e960e..7e7862afc 100644 ---- a/tfx/tools/docker/requirements.txt -+++ b/tfx/tools/docker/requirements.txt -@@ -300,8 +300,6 @@ tenacity==9.0.0 - tensorboard==2.17.1 - tensorboard-data-server==0.7.2 - tensorflow==2.17.1 --tensorflow-cloud==0.1.16 --tensorflow-data-validation==1.17.0 - tensorflow-datasets==4.9.3 - tensorflow-decision-forests==1.10.1 - tensorflow-estimator==2.15.0 -@@ -312,7 +310,6 @@ tensorflow-metadata>=1.17.1 - # tensorflow-ranking==0.5.5 - tensorflow-serving-api==2.17.1 - tensorflow-text==2.17.0 --tensorflow-transform>=1.17.0 - tensorflow_model_analysis>=0.48.0 - tensorflowjs==4.17.0 - tensorstore==0.1.66 -@@ -320,7 +317,6 @@ termcolor==2.5.0 - terminado==0.18.1 - text-unidecode==1.3 - tflite-support==0.4.4 --tfx-bsl>=1.17.1 - threadpoolctl==3.5.0 - time-machine==2.16.0 - tinycss2==1.3.0 diff --git a/tfx/dependencies.py b/tfx/dependencies.py index 19489315ab..ba0ddab4c7 100644 --- a/tfx/dependencies.py +++ b/tfx/dependencies.py @@ -105,7 +105,7 @@ def make_required_install_packages(): # Pip might stuck in a TF 1.15 dependency although there is a working # dependency set with TF 2.x without the sync. # pylint: disable=line-too-long - "tensorflow" + select_constraint(">=2.17.0,<2.18"), + "tensorflow" + select_constraint(">=2.17.1,<2.18"), # pylint: enable=line-too-long "tensorflow-hub>=0.15.0,<0.16", "tensorflow-data-validation" diff --git a/tfx/tools/docker/Dockerfile b/tfx/tools/docker/Dockerfile index a5201f1e34..73c3d85fc1 100644 --- a/tfx/tools/docker/Dockerfile +++ b/tfx/tools/docker/Dockerfile @@ -17,61 +17,117 @@ ARG BASE_IMAGE # Apache beam version to get Beam binaries. ARG BEAM_VERSION -FROM gcr.io/tfx-oss-public/tfx_base:py310-20230921 as wheel-builder -# docker build command should be run under root directory of github checkout. -ENV TFX_DIR=/tfx -ADD . ${TFX_DIR}/src -WORKDIR ${TFX_DIR} +FROM ${BASE_IMAGE} AS base-with-gcc13 +RUN /opt/conda/bin/conda install -y --override-channels -c conda-forge \ + gcc_linux-64=13 \ + gxx_linux-64=13 \ + binutils_linux-64=2.40 \ + ld_impl_linux-64=2.40 + +ENV CC=/opt/conda/bin/x86_64-conda-linux-gnu-gcc +ENV CXX=/opt/conda/bin/x86_64-conda-linux-gnu-g++ +ENV LD=/opt/conda/bin/x86_64-conda-linux-gnu-ld +ENV AR=/opt/conda/bin/x86_64-conda-linux-gnu-ar +ENV NM=/opt/conda/bin/x86_64-conda-linux-gnu-nm +ENV OBJCOPY=/opt/conda/bin/x86_64-conda-linux-gnu-objcopy +ENV OBJDUMP=/opt/conda/bin/x86_64-conda-linux-gnu-objdump +ENV RANLIB=/opt/conda/bin/x86_64-conda-linux-gnu-ranlib +ENV STRIP=/opt/conda/bin/x86_64-conda-linux-gnu-strip + +ENV BAZEL_COMPILER=gcc + +FROM base-with-gcc13 AS wheel-builder +ARG BASE_IMAGE +ARG BEAM_VERSION +# Whether to use C++ wheels (tfdv, tfx_bsl) from the host cache (/tmp/wheels). +# If true, cached wheels are used if available (this saves time when debugging the docker build issues). +# If false, wheels are built from source even if they exist in the cache. +ARG USE_CPP_WHEELS_FROM_TEMP=false +# Whether to clear the persistent build caches (/tmp/wheels and bazel cache). +ARG CLEAN_CPP_TEMP_CACHE=false # Specify what version of dependent libraries will be used. See dependencies.py. ARG TFX_DEPENDENCY_SELECTOR ENV TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR} + ENV USE_BAZEL_VERSION=6.5.0 -RUN python -m pip install --upgrade pip wheel setuptools==70.0.0 tomli +RUN apt-get update && apt-get install -y curl git && \ + (find /opt/conda/bin -name "python3-config" | head -n 1 | xargs -I {} ln -sf {} /usr/bin/python-config) && \ + (find /opt/conda/bin -name "python3-config" | head -n 1 | xargs -I {} ln -sf {} /opt/conda/bin/python-config) +RUN mkdir -p /usr/local/lib/bazel/bin && \ + curl -fLO https://github.com/bazelbuild/bazel/releases/download/${USE_BAZEL_VERSION}/bazel-${USE_BAZEL_VERSION}-linux-x86_64 && \ + mv bazel-${USE_BAZEL_VERSION}-linux-x86_64 /usr/local/lib/bazel/bin/bazel && \ + chmod +x /usr/local/lib/bazel/bin/bazel && \ + ln -sf /usr/local/lib/bazel/bin/bazel /usr/local/bin/bazel +ENV PATH="/usr/local/lib/bazel/bin:${PATH}" + + +COPY . /tfx/src/ +WORKDIR /tfx/src/ + +# 1. C++ Wheels (tfdv, tfx_bsl) - Normal production build path +RUN if [ "$USE_CPP_WHEELS_FROM_TEMP" = "false" ]; then \ + echo "Rebuild of C++ wheels (tfdv, tfx_bsl)..." && \ + cp tfx/tools/docker/build_tfdv_wheels.sh /tmp/ && \ + cp tfx/tools/docker/build_tfx_bsl_wheels.sh /tmp/ && \ + cp tfx/tools/docker/*.patch /tmp/ && \ + mkdir -p /tfx/src/dist_wheels && \ + bash /tmp/build_tfdv_wheels.sh /tfx/src/dist_wheels && \ + bash /tmp/build_tfx_bsl_wheels.sh /tfx/src/dist_wheels ; \ + fi + +# 2. C++ Wheels (tfdv, tfx_bsl) - Cached Path to avoid any CPP rebuilds +RUN --mount=type=cache,target=/tmp/wheels --mount=type=cache,target=/root/.cache/bazel \ + if [ "$USE_CPP_WHEELS_FROM_TEMP" = "true" ]; then \ + echo "Re-use cached build of C++ wheels (tfdv, tfx_bsl)..." && \ + cp tfx/tools/docker/build_tfdv_wheels.sh /tmp/ && \ + cp tfx/tools/docker/build_tfx_bsl_wheels.sh /tmp/ && \ + cp tfx/tools/docker/*.patch /tmp/ && \ + if [ ! -f /tmp/wheels/tensorflow_data_validation-*.whl ]; then bash /tmp/build_tfdv_wheels.sh /tmp/wheels; fi && \ + if [ ! -f /tmp/wheels/tfx_bsl-*.whl ]; then bash /tmp/build_tfx_bsl_wheels.sh /tmp/wheels; fi && \ + mkdir -p /tfx/src/dist_wheels && \ + cp /tmp/wheels/tensorflow_data_validation-*.whl /tfx/src/dist_wheels/ && \ + cp /tmp/wheels/tfx_bsl-*.whl /tfx/src/dist_wheels/ ; \ + fi # TODO(b/175089240): clean up conditional checks on whether ml-pipelines-sdk is # built after TFX versions <= 0.25 are no longer eligible for cherry-picks. -RUN cd ${TFX_DIR}/src; \ +RUN echo "Building TFX and SDK wheels from source..." && \ + mkdir -p /tfx/src/dist_wheels && \ if [ -e "package_build" ]; then \ - bash -x package_build/initialize.sh; \ - cd package_build/ml-pipelines-sdk; \ - CFLAGS=$(/usr/bin/python-config --cflags) \ - python setup.py bdist_wheel; \ - cd ../../package_build/tfx; \ - CFLAGS=$(/usr/bin/python-config --cflags) \ - python setup.py bdist_wheel; \ - cd ../..; \ - MLSDK_WHEEL=$(find dist -name "ml_pipelines_sdk-*.whl"); \ - TFX_WHEEL=$(find dist -name "tfx-*.whl"); \ + bash -x package_build/initialize.sh && \ + CFLAGS=$(python-config --cflags) python package_build/ml-pipelines-sdk/setup.py bdist_wheel && \ + CFLAGS=$(python-config --cflags) python package_build/tfx/setup.py bdist_wheel && \ + cp package_build/ml-pipelines-sdk/dist/*.whl /tfx/src/dist_wheels/ && \ + cp package_build/tfx/dist/*.whl /tfx/src/dist_wheels/ ; \ else \ - CFLAGS=$(/usr/bin/python-config --cflags) \ - python setup.py bdist_wheel; \ - MLSDK_WHEEL=; \ - TFX_WHEEL=$(find dist -name "tfx-*.whl"); \ - fi; \ - if [ "${TFX_DEPENDENCY_SELECTOR}" = "NIGHTLY" ]; then \ - CFLAGS=$(/usr/bin/python-config --cflags) \ - python -m pip install \ - --extra-index-url https://pypi-nightly.tensorflow.org/simple \ - ${MLSDK_WHEEL} ${TFX_WHEEL}[docker-image] -c tfx/tools/docker/requirements.txt -c tfx/tools/docker/build_constraints.txt; \ - else \ - CFLAGS=$(/usr/bin/python-config --cflags) \ - python -m pip install ${MLSDK_WHEEL} ${TFX_WHEEL}[docker-image] -c tfx/tools/docker/requirements.txt -c tfx/tools/docker/build_constraints.txt; \ - fi; - -# We need to name this step for the next COPY --from command. -FROM apache/beam_python3.10_sdk:${BEAM_VERSION} as beam-worker + CFLAGS=$(python-config --cflags) python setup.py bdist_wheel && \ + cp dist/*.whl /tfx/src/dist_wheels/ ; \ + fi -# Build stage to extend DLVM image. -FROM ${BASE_IMAGE} as install +# --- Stage 2: beam-worker --- +FROM apache/beam_python3.10_sdk:${BEAM_VERSION} AS beam-worker -# Additional Python packages which will be installed. +# --- Stage 3: install (Final) --- +FROM base-with-gcc13 AS install +ARG BASE_IMAGE ARG ADDITIONAL_PACKAGES - -# Specify what version of dependent libraries will be used. See dependencies.py. +ENV TF_CPP_MIN_LOG_LEVEL=3 +# Disables Legacy "Virtual" Devices +ENV TF_XLA_FLAGS="--tf_xla_enable_xla_devices=false" ARG TFX_DEPENDENCY_SELECTOR ENV TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR} +ENV TF_USE_LEGACY_KERAS=1 + +# 1. Apply OS security updates and install required system libraries +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + libsnappy-dev \ + libcurl4-openssl-dev \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Copy from image matching installed version of 'apache-beam'. COPY --from=beam-worker /opt/apache/beam /opt/apache/beam @@ -84,60 +140,48 @@ ENTRYPOINT ["/opt/apache/beam/boot"] LABEL maintainer="tensorflow-extended-dev@googlegroups.com" -RUN sed -i 's/python3/python/g' /usr/bin/pip - -RUN python -m pip install --upgrade pip setuptools==70.0.0 wheel - -RUN CURRENT_DIR="$(pwd)" && \ - mkdir -p /usr/local/lib/bazel/bin && \ - cd "/usr/local/lib/bazel/bin" && \ - curl -fLO https://releases.bazel.build/6.5.0/release/bazel-6.5.0-linux-x86_64 && \ - chmod +x bazel-6.5.0-linux-x86_64 && \ - ln -sf /usr/local/lib/bazel/bin/bazel-6.5.0-linux-x86_64 /usr/local/bin/bazel && \ - cd "$CURRENT_DIR" - -# Build tensorflow-data-validation wheels from source with patch -RUN /opt/conda/bin/conda install -y -c conda-forge \ - gcc_linux-64=13 \ - gxx_linux-64=13 \ - binutils_linux-64=2.40 \ - ld_impl_linux-64=2.40 - - ENV CC=/opt/conda/bin/x86_64-conda-linux-gnu-gcc - - ENV CXX=/opt/conda/bin/x86_64-conda-linux-gnu-g++ - - ENV LD=/opt/conda/bin/x86_64-conda-linux-gnu-ld - -COPY tfx/tools/docker/build_tfdv_wheels.sh /tmp/ -COPY tfx/tools/docker/tfdv.patch /tmp/ -RUN mkdir -p /tmp/wheels && \ - bash /tmp/build_tfdv_wheels.sh /tmp/wheels - -# Build tfx-bsl wheels from source with patch -COPY tfx/tools/docker/build_tfx_bsl_wheels.sh /tmp/ -COPY tfx/tools/docker/tfx_bsl.patch /tmp/ -RUN bash /tmp/build_tfx_bsl_wheels.sh /tmp/wheels - -# Copy constraint file and wheels -COPY tfx/tools/docker/build_constraints.txt /tmp/ -COPY tfx/tools/docker/wheels/*.whl /tmp/wheels/ - -# Install tensorflow-data-validation, tfx-bsl, tensorflow-model-analysis, tensorflow-transform, and tensorflow-cloud first -RUN PIP_CONSTRAINT=/tmp/build_constraints.txt python -m pip install /tmp/wheels/tensorflow_data_validation-*.whl /tmp/wheels/tfx_bsl-*.whl /tmp/wheels/tensorflow_model_analysis-*.whl /tmp/wheels/tensorflow_transform-*.whl /tmp/wheels/tensorflow_cloud-*.whl - -COPY --from=wheel-builder /tfx/src/dist/*.whl /tfx/src/dist/ -WORKDIR /tfx/src - -RUN MLSDK_WHEEL=$(find dist -name "ml_pipelines_sdk-*.whl"); \ - TFX_WHEEL=$(find dist -name "tfx-*.whl"); \ - if [ "${TFX_DEPENDENCY_SELECTOR}" = "NIGHTLY" ]; then \ - PIP_CONSTRAINT=/tmp/build_constraints.txt python -m pip install \ - --extra-index-url https://pypi-nightly.tensorflow.org/simple \ - ${MLSDK_WHEEL} ${TFX_WHEEL}[docker-image] ${ADDITIONAL_PACKAGES} ; \ +COPY --from=wheel-builder /tfx/src /tfx/src + +# 2. Upgrade core python build tools and remove unused vulnerable components +# setuptools==78.1.1 is required for the pkg_resources shim (needed by apache-beam), +# while providing the security fix for CVE-2025-47273. +RUN python -m pip install --upgrade pip setuptools==78.1.1 wheel \ + -c /tfx/src/tfx/tools/docker/requirements.txt \ + -c /tfx/src/tfx/tools/docker/build_constraints.txt + +# 3. Main installation: consolidated to a single RUN for single-pass resolution. +# We explicitly include setuptools==78.1.1 as a top-level requirement here +# to ensure the resolver doesn't downgrade it or use a broken version +# that lacks the pkg_resources shim (needed by apache-beam). +RUN if [ "${TFX_DEPENDENCY_SELECTOR}" = "NIGHTLY" ]; then \ + python -m pip install --no-cache-dir \ + --extra-index-url https://pypi-nightly.tensorflow.org/simple \ + -c /tfx/src/tfx/tools/docker/requirements.txt \ + -c /tfx/src/tfx/tools/docker/build_constraints.txt \ + /tfx/src/dist_wheels/*.whl \ + /tfx/src/tfx/tools/docker/wheels/*.whl \ + "$(find /tfx/src/dist_wheels/ \( -name 'tfx_dev-*.whl' -o -name 'tfx-*.whl' \) | head -n 1)[docker-image]" \ + tf_keras setuptools==78.1.1 ${ADDITIONAL_PACKAGES} ; \ else \ - PIP_CONSTRAINT=/tmp/build_constraints.txt python -m pip install ${MLSDK_WHEEL} ${TFX_WHEEL}[docker-image] \ - ${ADDITIONAL_PACKAGES} ; \ + python -m pip install --no-cache-dir \ + -c /tfx/src/tfx/tools/docker/requirements.txt \ + -c /tfx/src/tfx/tools/docker/build_constraints.txt \ + /tfx/src/dist_wheels/*.whl \ + /tfx/src/tfx/tools/docker/wheels/*.whl \ + "$(find /tfx/src/dist_wheels/ \( -name 'tfx_dev-*.whl' -o -name 'tfx-*.whl' \) | head -n 1)[docker-image]" \ + tf_keras setuptools==78.1.1 ${ADDITIONAL_PACKAGES} ; \ fi && \ - echo "Installed python packages:\n" && python -m pip list && \ + (python -m pip uninstall -y jupyter jupyter-server jupyterlab notebook nbconvert jaraco-context jaraco.context || true) + +# 4. Final OS cleanup: remove Go toolchain and other unused tools to fix Go-related CVEs +# Many High/Critical CVEs are in the Go stdlib/toolchain which we don't need at runtime. +# We use a loop to avoid build failures if a package name is not found in the repo. +RUN for pkg in golang-go golang git binutils wget policykit-1 packagekit gnupg2 gcc-12; do apt-get purge -y $pkg || echo "Package $pkg not found, skipping"; done && \ + rm -rf /usr/local/go && \ + rm -rf /opt/apache/beam && \ + find /opt/conda/lib/python3.10/site-packages/apache_beam -type f -name "boot" -delete || true && \ + apt-get autoremove -y && \ + apt-get clean + +RUN echo "Installed python packages:\n" && python -m pip list && \ echo "Setuptools version:" && python -c "import setuptools; print(setuptools.__version__)" diff --git a/tfx/tools/docker/Dockerfile.beam_discovery b/tfx/tools/docker/Dockerfile.beam_discovery new file mode 100644 index 0000000000..00024b982d --- /dev/null +++ b/tfx/tools/docker/Dockerfile.beam_discovery @@ -0,0 +1,19 @@ +FROM python:3.10-slim@sha256:c299e10e0070171113f9a1f109dd05e7e634fa94589b056e0e87bb22b2b382a2 + +WORKDIR /src + +COPY tfx/dependencies.py . +COPY tfx/tools/docker/discover_versions.py . +COPY tfx/tools/docker/requirements.txt tfx/tools/docker/ +COPY tfx/tools/docker/build_constraints.txt tfx/tools/docker/ + +ENV PIP_CONSTRAINT="/src/tfx/tools/docker/build_constraints.txt" + +RUN pip install --upgrade pip setuptools wheel tomli \ + -c tfx/tools/docker/requirements.txt \ + -c tfx/tools/docker/build_constraints.txt + +# Ensure dependencies.py can be imported +ENV PYTHONPATH="/src" + +ENTRYPOINT ["python3", "discover_versions.py"] diff --git a/tfx/tools/docker/build_constraints.txt b/tfx/tools/docker/build_constraints.txt index cc90fd32be..4984755fbe 100644 --- a/tfx/tools/docker/build_constraints.txt +++ b/tfx/tools/docker/build_constraints.txt @@ -1,2 +1,2 @@ -setuptools==70.0.0 -wheel==0.43.0 +setuptools==78.1.1 +wheel==0.45.1 diff --git a/tfx/tools/docker/build_docker_image.sh b/tfx/tools/docker/build_docker_image.sh index fe6f1ec258..1aec19e1e3 100755 --- a/tfx/tools/docker/build_docker_image.sh +++ b/tfx/tools/docker/build_docker_image.sh @@ -16,6 +16,32 @@ # Convenience script to build TFX docker image. set -ex +# Parse arguments for USE_CPP_WHEELS_FROM_TEMP and other custom flags +USE_CPP_WHEELS_FROM_TEMP=false +CLEAN_CPP_TEMP_CACHE=false +NEW_ARGS=() +while [[ $# -gt 0 ]]; do + case $1 in + --no-rebuild|--skip-rebuild) + USE_CPP_WHEELS_FROM_TEMP=true + shift + ;; + --clean-cache) + CLEAN_CPP_TEMP_CACHE=true + shift + ;; + *) + NEW_ARGS+=("$1") + shift + ;; + esac +done +set -- "${NEW_ARGS[@]}" + +export BEAM_VERSION=${BEAM_VERSION} +export BASE_IMAGE=${BASE_IMAGE} + + DOCKER_IMAGE_REPO=${DOCKER_IMAGE_REPO:-"tensorflow/tfx"} DOCKER_IMAGE_TAG=${DOCKER_IMAGE_TAG:-"latest"} DOCKER_FILE=${DOCKER_FILE:-"Dockerfile"} @@ -23,6 +49,7 @@ DOCKER_FILE=${DOCKER_FILE:-"Dockerfile"} TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR:-""} echo "Env for TFX_DEPENDENCY_SELECTOR is set as ${TFX_DEPENDENCY_SELECTOR}" + # Apply the patch before building echo "Applying tfx.patch..." if [[ -f patches/tfx.patch ]]; then @@ -33,6 +60,19 @@ else patch_applied=false fi +# Programmatically remove pins for components built from source or downloaded as wheels +# This replicates the logic previously in tfx.patch for requirements.txt and constraints files +for f in nightly_test_constraints.txt test_constraints.txt tfx/tools/docker/requirements.txt; do + if [[ -f "$f" ]]; then + echo "Removing pins from $f..." + # Remove exact version pins or range constraints for the following packages + sed -i '/tensorflow-cloud/d' "$f" + sed -i '/tensorflow-data-validation/d' "$f" + sed -i '/tensorflow-transform/d' "$f" + sed -i '/tfx-bsl/d' "$f" + fi +done + mkdir -p tfx/tools/docker/wheels # Download tensorflow-model-analysis wheel @@ -58,26 +98,40 @@ function _get_tf_version_of_image() { docker run --rm --entrypoint=python ${img} -c 'import tensorflow as tf; print(tf.__version__)' } +function _get_beam_version_of_image() { + local img="$1" + docker run --rm --entrypoint=python ${img} -c 'import apache_beam as beam; print(beam.version.__version__)' +} + # Base image to extend: This should be a deep learning image with a compatible # TensorFlow version. See # https://cloud.google.com/ai-platform/deep-learning-containers/docs/choosing-container # for possible images to use here. -# Use timestmap-rand for tag, to avoid collision of concurrent runs. -wheel_builder_tag="tfx-wheel-builder:$(date +%s)-$RANDOM" -# Run docker build command to build the wheel-builder first. We have to extract -# TF version from it. -docker build --target wheel-builder\ - -t ${wheel_builder_tag} \ - -f tfx/tools/docker/${DOCKER_FILE} \ - --build-arg TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR} \ - . "$@" +if [ "$CLEAN_CPP_TEMP_CACHE" = "true" ]; then + echo "Pruning Docker builder cache..." + docker builder prune -a -f +fi -# TensorFlow current TFX code depends on here and use that instead. -if [[ -n "$BASE_IMAGE" ]]; then - echo "Using override base image $BASE_IMAGE" +# Use timestmap-rand for tag, to avoid collision of concurrent runs. +if [[ -z "$BASE_IMAGE" || -z "$BEAM_VERSION" ]]; then + echo "Discovering versions using lightweight container..." + discovery_tag="tfx-beam-discovery:$(date +%s)-$RANDOM" + docker build -t ${discovery_tag} -f tfx/tools/docker/Dockerfile.beam_discovery . + discovery_output=$(docker run --rm ${discovery_tag}) + tf_version=$(echo "${discovery_output}" | cut -d'|' -f1) + beam_version_detected=$(echo "${discovery_output}" | cut -d'|' -f2) + docker rmi ${discovery_tag} + + if [[ -z "$BEAM_VERSION" ]]; then + BEAM_VERSION=${beam_version_detected} + fi + echo "Detected Beam version as ${BEAM_VERSION}" else - tf_version=$(_get_tf_version_of_image "${wheel_builder_tag}") + echo "Using override base image $BASE_IMAGE" +fi + +if [[ -z "$BASE_IMAGE" ]]; then arr_version=(${tf_version//./ }) echo "Detected TensorFlow version as ${tf_version}" DLVM_REPO=gcr.io/deeplearning-platform-release @@ -108,14 +162,15 @@ else echo "Using compatible tf2-gpu image $BASE_IMAGE as base" fi -beam_version=$(docker run --rm --entrypoint=python ${wheel_builder_tag} -c 'import apache_beam as beam; print(beam.version.__version__)') # Run docker build command. -docker build -t ${DOCKER_IMAGE_REPO}:${DOCKER_IMAGE_TAG} \ +docker build --progress=plain -t ${DOCKER_IMAGE_REPO}:${DOCKER_IMAGE_TAG} \ -f tfx/tools/docker/${DOCKER_FILE} \ - --build-arg "TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR}" \ --build-arg "BASE_IMAGE=${BASE_IMAGE}" \ - --build-arg "BEAM_VERSION=${beam_version}" \ + --build-arg "BEAM_VERSION=${BEAM_VERSION}" \ --build-arg "ADDITIONAL_PACKAGES=${ADDITIONAL_PACKAGES}" \ + --build-arg USE_CPP_WHEELS_FROM_TEMP=${USE_CPP_WHEELS_FROM_TEMP} \ + --build-arg CLEAN_CPP_TEMP_CACHE=${CLEAN_CPP_TEMP_CACHE} \ + --build-arg "TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR}" \ . "$@" if [[ -n "${installed_tf_version}" && ! "${installed_tf_version}" =~ rc ]]; then @@ -131,7 +186,6 @@ fi # Remove the temp image. -docker rmi ${wheel_builder_tag} # Cleanup: revert patch and remove downloaded wheel if [[ "${patch_applied}" == "true" ]]; then diff --git a/tfx/tools/docker/discover_versions.py b/tfx/tools/docker/discover_versions.py new file mode 100644 index 0000000000..51af3a9d16 --- /dev/null +++ b/tfx/tools/docker/discover_versions.py @@ -0,0 +1,58 @@ +import re +import subprocess +import json +import sys + +def discover(): + # In the discovery container, we expect dependencies.py to be in /src + sys.path.append('/src') + + try: + import dependencies + except ImportError: + # Fallback for different layouts + sys.path.append('/src/tfx') + import dependencies + + packages = dependencies.make_required_install_packages() + + # Find TensorFlow requirement + tf_req = [p for p in packages if p.startswith("tensorflow") and not p.startswith("tensorflow-")][0] + # Find Beam requirement + beam_req = [p for p in packages if "apache-beam" in p][0] + + # Detect TF version for base image selection + tf_version_match = re.findall(r"([0-9]+\.[0-9]+\.[0-9]+|[0-9]+\.[0-9]+)", tf_req) + tf_version = tf_version_match[0] if tf_version_match else "unknown" + + # Detect Beam version using pip dry-run report for reliability + beam_version = "unknown" + try: + # Use --report which returns JSON, much more reliable than parsing text + cmd = ["python3", "-m", "pip", "install", beam_req, "--dry-run", "--report", "-", "-c", "/src/tfx/tools/docker/requirements.txt", "-c", "/src/tfx/tools/docker/build_constraints.txt"] + output = subprocess.check_output(cmd, stderr=subprocess.PIPE).decode() + report = json.loads(output) + for pkg in report.get("install", []): + if pkg.get("metadata", {}).get("name") == "apache-beam": + beam_version = pkg.get("metadata", {}).get("version") + break + except Exception: + # Fallback to text parsing + try: + cmd = ["python3", "-m", "pip", "install", beam_req, "--dry-run", "-c", "/src/tfx/tools/docker/requirements.txt", "-c", "/src/tfx/tools/docker/build_constraints.txt"] + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode() + match = re.search(r"apache-beam-([0-9]+\.[0-9]+\.[0-9]+)", output) + if match: + beam_version = match.group(1) + except Exception: + pass + + # Final fallback to regex on the requirement string itself if all else fails + if beam_version == "unknown": + beam_matches = re.findall(r"([0-9]+\.[0-9]+\.[0-9]+|[0-9]+\.[0-9]+)", beam_req) + beam_version = beam_matches[0] if beam_matches else "2.53.0" + + print(f"{tf_version}|{beam_version}") + +if __name__ == "__main__": + discover() diff --git a/tfx/tools/docker/requirements.txt b/tfx/tools/docker/requirements.txt index d7b3e960ec..3040b57a28 100644 --- a/tfx/tools/docker/requirements.txt +++ b/tfx/tools/docker/requirements.txt @@ -9,9 +9,10 @@ absl-py==1.4.0 aiohappyeyeballs==2.4.3 aiosignal==1.3.1 +aiohttp==3.13.5 alembic==1.13.3 annotated-types==0.7.0 -anyio==4.6.0 +anyio==4.13.0 apache-airflow==2.10.3 apache-beam==2.50.0 apispec==6.6.1 @@ -32,10 +33,10 @@ bleach==6.1.0 blinker==1.8.2 cachelib==0.9.0 cachetools==5.5.0 -certifi==2024.8.30 +certifi==2024.7.4 cffi==1.17.1 cfgv==3.4.0 -charset-normalizer==3.4.0 +charset-normalizer==3.3.2 chex==0.1.86 click==8.1.3 clickclick==20.10.2 @@ -49,7 +50,7 @@ cramjam==2.8.4 crcmod==1.7 cron-descriptor==1.4.5 croniter==3.0.3 -cryptography==44.0.1 +cryptography==45.0.7 Cython==3.0.11 debugpy==1.8.7 decorator==5.1.1 @@ -57,6 +58,7 @@ defusedxml==0.7.1 Deprecated==1.2.14 dill==0.3.1.1 distlib==0.3.9 +distro==1.9.0 dm-tree==0.1.8 dnspython==2.7.0 docker==7.1.0 @@ -85,13 +87,13 @@ fqdn==1.5.1 frozenlist==1.4.1 fsspec==2024.9.0 gast==0.6.0 -google-api-core==2.21.0 +google-api-core==2.23.0 google-api-python-client==1.12.11 google-apitools==0.5.31 -google-auth==2.35.0 -google-auth-httplib2==0.1.1 +google-auth==2.49.1 +google-auth-httplib2>=0.1.1 google-auth-oauthlib==1.2.1 -google-cloud-aiplatform==1.70.0 +google-cloud-aiplatform==1.144.0 google-cloud-bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 google-cloud-bigtable==2.26.0 @@ -111,34 +113,33 @@ google-crc32c==1.6.0 google-pasta==0.2.0 google-re2==1.1.20240702 google-resumable-media==2.7.2 -googleapis-common-protos==1.65.0 +google-genai==1.68.0 +googleapis-common-protos==1.63.0 greenlet==3.1.1 grpc-google-iam-v1==0.13.1 grpc-interceptor==0.15.4 -grpcio==1.66.2 -grpcio-status==1.48.2 +grpcio==1.62.3 +grpcio-status==1.62.3 gunicorn==23.0.0 -h11==0.14.0 +h11==0.16.0 h5py==3.12.1 hdfs==2.7.3 -httpcore==1.0.6 +httpcore==1.0.9 httplib2==0.22.0 -httpx==0.27.2 +httpx==0.28.1 identify==2.6.1 -idna==3.10 +idna==3.7 importlib_metadata==8.4.0 importlib_resources==6.4.5 inflection==0.5.1 iniconfig==2.0.0 -ipykernel==6.29.5 -ipython-genutils==0.2.0 -ipywidgets==7.8.4 isoduration==20.11.0 itsdangerous==2.2.0 +immutabledict==4.2.0 jax==0.4.23 jaxlib==0.4.23 jedi==0.19.1 -Jinja2==3.1.4 +jinja2==3.1.6 jmespath==1.0.1 joblib==1.4.2 Js2Py==0.74 @@ -147,16 +148,6 @@ jsonpickle==3.3.0 jsonpointer==3.0.0 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -jupyter-events==0.10.0 -jupyter-lsp==2.2.5 -jupyter_client==8.6.3 -jupyter_core==5.7.2 -jupyter_server==2.13.0 -jupyter_server_terminals==0.5.3 -jupyterlab==4.2.5 -jupyterlab_pygments==0.3.0 -jupyterlab_server==2.27.3 -jupyterlab_widgets==1.1.10 tf-keras==2.17.0 keras==3.6.0 keras-tuner==1.4.7 @@ -172,19 +163,20 @@ linkify-it-py==2.0.3 lockfile==0.12.2 lxml==5.3.0 Mako==1.3.5 -Markdown==3.7 +markdown>=3.6 markdown-it-py==3.0.0 MarkupSafe==3.0.1 marshmallow==3.22.0 marshmallow-oneofschema==3.1.1 marshmallow-sqlalchemy==0.28.2 matplotlib-inline==0.1.7 +matplotlib==3.7.1 mdit-py-plugins==0.4.2 mdurl==0.1.2 methodtools==0.4.7 mistune==3.0.2 ml-dtypes==0.3.2 -ml-metadata>=1.17.1 +ml-metadata==1.17.0 mmh==2.2 more-itertools==10.5.0 msgpack==1.1.0 @@ -192,13 +184,10 @@ multidict==6.1.0 mysql-connector-python==9.1.0 mysqlclient==2.2.4 nbclient==0.10.0 -nbconvert==7.16.4 nbformat==5.10.4 nest-asyncio==1.6.0 -nltk==3.9.1 +nltk>=3.9.4 nodeenv==1.9.1 -notebook==7.2.2 -notebook_shim==0.2.4 numpy==1.24.4 oauth2client==4.1.3 oauthlib==3.2.2 @@ -215,7 +204,7 @@ opt_einsum==3.4.0 optax==0.2.2 orbax-checkpoint==0.5.16 ordered-set==4.1.0 -orjson==3.10.6 +orjson==3.11.8 overrides==7.7.0 packaging==23.2 pandas==1.5.3 @@ -225,7 +214,7 @@ pathspec==0.12.1 pendulum==3.0.0 pexpect==4.9.0 pickleshare==0.7.5 -pillow==10.4.0 +pillow>=10.3.0 platformdirs==4.3.6 pluggy==1.5.0 portalocker==2.10.1 @@ -241,18 +230,20 @@ proto-plus==1.24.0 protobuf==4.21.12 psutil==6.0.0 ptyprocess==0.7.0 +pyarrow==10.0.1 pyarrow-hotfix==0.6 -pyasn1==0.6.1 +pyasn1>=0.6.0 pyasn1_modules==0.4.1 +pyopenssl==26.0.0 pybind11==2.13.6 pycparser==2.22 pydantic==2.9.2 pydantic_core==2.23.4 pydot==1.4.2 pyfarmhash==0.3.2 -Pygments==2.18.0 +pygments==2.19.1 pyjsparser==2.7.1 -PyJWT==2.9.0 +pyjwt>=2.8.0 pymongo==4.10.1 pyparsing==3.1.4 pytest==8.0.0 @@ -262,7 +253,7 @@ python-dateutil==2.9.0.post0 python-json-logger==2.0.7 python-nvd3==0.16.0 python-slugify==8.0.4 -python-snappy==0.7.3 +python-snappy==0.7.1 pytz==2024.2 PyYAML==6.0.2 pyzmq==26.2.0 @@ -293,45 +284,46 @@ soupsieve==2.6 SQLAlchemy==1.4.54 SQLAlchemy-JSONField==1.0.2 SQLAlchemy-Utils==0.41.2 -sqlparse==0.5.1 +sqlparse>=0.5.0 struct2tensor>=0.48.1 tabulate==0.9.0 tenacity==9.0.0 +statsmodels==0.14.0 tensorboard==2.17.1 tensorboard-data-server==0.7.2 tensorflow==2.17.1 -tensorflow-cloud==0.1.16 -tensorflow-data-validation==1.17.0 tensorflow-datasets==4.9.3 tensorflow-decision-forests==1.10.1 tensorflow-estimator==2.15.0 tensorflow-hub==0.15.0 +tensorflow-cloud==0.1.16 tensorflow-io==0.24.0 tensorflow-io-gcs-filesystem==0.24.0 -tensorflow-metadata>=1.17.1 +tensorflow-metadata==1.17.1 # tensorflow-ranking==0.5.5 tensorflow-serving-api==2.17.1 tensorflow-text==2.17.0 -tensorflow-transform>=1.17.0 -tensorflow_model_analysis>=0.48.0 +tensorflow-revived-types==0.1.1 +tensorflow-model-analysis==0.48.0 +tensorflow-transform==1.17.0 tensorflowjs==4.17.0 tensorstore==0.1.66 termcolor==2.5.0 terminado==0.18.1 text-unidecode==1.3 tflite-support==0.4.4 -tfx-bsl>=1.17.1 +tfx-bsl==1.17.1 threadpoolctl==3.5.0 time-machine==2.16.0 tinycss2==1.3.0 toml==0.10.2 tomli==2.0.2 toolz==1.0.0 -tornado==6.4.2 +tornado>=6.4.1 tqdm==4.66.5 traitlets==5.14.3 types-python-dateutil==2.9.0.20241003 -typing_extensions==4.12.2 +typing_extensions==4.15.0 tzdata==2024.2 tzlocal==5.2 uc-micro-py==1.0.3 @@ -339,12 +331,13 @@ unicodecsv==0.14.1 universal_pathlib==0.2.5 uri-template==1.3.0 uritemplate==3.0.1 -urllib3==1.26.20 +urllib3==1.26.19 virtualenv==20.26.6 wcwidth==0.2.13 webcolors==24.8.0 webencodings==0.5.1 websocket-client==0.59.0 +websockets==15.0.1 widgetsnbextension==3.6.9 wirerope==0.4.7 wrapt==1.14.1 @@ -353,3 +346,4 @@ wurlitzer==3.1.1 yarl==1.14.0 zipp==3.20.2 zstandard==0.23.0 +pip>=26.0.0 diff --git a/tfx/version.py b/tfx/version.py index 3b482b3eae..ab326d2c53 100644 --- a/tfx/version.py +++ b/tfx/version.py @@ -14,4 +14,4 @@ """Contains the version string of TFX.""" # Note that setup.py uses this version. -__version__ = '1.17.2' +__version__ = '1.17.3'