Skip to content
Open
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/api_eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
pyver: [py312]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -132,7 +132,7 @@ jobs:
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
name: my-artifact-${{ github.run_id }}-py312
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
Expand Down
34 changes: 29 additions & 5 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ on:
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
docker_tag:
required: true
description: 'Docker tag'
type: string
default: 'nightly-test-cu12.8'
result_tag:
required: true
description: 'result_tag if is not none, benchmark results will be uploaded to feishu'
type: string
default: "default"

env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
Expand All @@ -45,7 +55,7 @@ jobs:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
pyver: [py312]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -93,7 +103,7 @@ jobs:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
Expand All @@ -117,7 +127,7 @@ jobs:
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
name: my-artifact-${{ github.run_id }}-py312
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
Expand Down Expand Up @@ -153,7 +163,7 @@ jobs:
TEST_ENV: ${{ matrix.transformers }}
timeout-minutes: 480
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -197,11 +207,25 @@ jobs:
if: contains(fromJson(github.event.inputs.backend), 'pytorch') && !contains(fromJson(github.event.inputs.backend), 'turbomind')
run: |
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and pytorch' --alluredir=${{env.ALLURE_REPORT_DIR}}
- name: Generate result
if: always()
run: |
cd /nvme/qa_test_models/feishu_upload
python3 test_benchmark.py --root ${{env.REPORT_DIR}} --output ${{env.REPORT_DIR}}/${{inputs.result_tag}}.txt --hardware A100 --infer-version ${{inputs.result_tag}}
- name: Async result
if: always() && inputs.result_tag != 'default'
env:
FEISHU_APP_ID: ${{secrets.FEISHU_APP_ID}}
FEISHU_APP_SECRET: ${{secrets.FEISHU_APP_SECRET}}
FEISHU_TABLE_TOKEN: ${{secrets.FEISHU_TABLE_TOKEN}}
FEISHU_TABLE_ID: ${{secrets.BENCHMARK_FEISHU_TABLE_ID}}
run: |
cd /nvme/qa_test_models/feishu_upload
python3 main.py --skip-duplicates ${{env.REPORT_DIR}}/${{inputs.result_tag}}.txt --config config-benchmark.py
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand Down
45 changes: 23 additions & 22 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ on:
description: 'regression functions'
type: string
default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']"
docker_tag:
required: true
description: 'Docker tag'
type: string
default: 'nightly-test-cu12.8'
schedule:
- cron: '00 14 * * 0-4'

Expand All @@ -60,7 +65,7 @@ jobs:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
pyver: [py312]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -109,7 +114,7 @@ jobs:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
Expand All @@ -131,7 +136,7 @@ jobs:
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
name: my-artifact-${{ github.run_id }}-py312
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
Expand All @@ -140,9 +145,13 @@ jobs:
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Clear workfile
if: always()
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
chmod -R 777 ${{env.REPORT_DIR}}

test_quantization:
needs: download_pkgs
Expand All @@ -158,7 +167,7 @@ jobs:
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand All @@ -177,7 +186,7 @@ jobs:
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install auto_gptq matplotlib attrdict
python3 -m pip install matplotlib attrdict
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
Expand Down Expand Up @@ -210,7 +219,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand Down Expand Up @@ -246,7 +254,7 @@ jobs:
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -330,7 +338,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand Down Expand Up @@ -422,7 +429,7 @@ jobs:
extra: '--logprobs-mode raw_logprobs'
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -527,7 +534,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -540,7 +546,7 @@ jobs:
needs: test_quantization
timeout-minutes: 240
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -590,7 +596,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -604,7 +609,7 @@ jobs:
needs: test_quantization
timeout-minutes: 120
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -646,7 +651,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -671,7 +675,7 @@ jobs:
generate_type: base
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -777,7 +781,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -790,7 +793,7 @@ jobs:
needs: test_quantization
timeout-minutes: 240
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -841,7 +844,6 @@ jobs:
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand All @@ -854,7 +856,7 @@ jobs:
needs: [test_tools, test_restful, test_pipeline, test_benchmark]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: registry.h.pjlab.org.cn/ailab-puyu-puyu_gpu/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand All @@ -866,7 +868,6 @@ jobs:
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
Expand All @@ -879,7 +880,7 @@ jobs:
- name: Clear workfile
if: always()
run: |
chmod -R 777 ${{env.ROOT_DIR}}
chmod -R 777 ${{env.REPORT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
Expand Down
21 changes: 13 additions & 8 deletions .github/workflows/daily_ete_test_3090.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ on:
description: 'regression functions'
type: string
default: "['quant', 'tools', 'restful']"
docker_tag:
required: true
description: 'Docker tag'
type: string
default: 'nightly-test-cu12.8'
schedule:
- cron: '00 14 * * 0-4'

Expand All @@ -59,7 +64,7 @@ jobs:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
pyver: [py312]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -108,7 +113,7 @@ jobs:
runs-on: [self-hosted, 3090-r1]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
Expand All @@ -131,7 +136,7 @@ jobs:
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
name: my-artifact-${{ github.run_id }}-py312
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
Expand All @@ -154,7 +159,7 @@ jobs:
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: 3090_legacy
container:
image: openmmlab/lmdeploy:latest-cu12
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand All @@ -169,7 +174,7 @@ jobs:
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install auto_gptq matplotlib
python3 -m pip install matplotlib
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
Expand Down Expand Up @@ -232,7 +237,7 @@ jobs:
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{matrix.transformers}}
container:
image: openmmlab/lmdeploy:latest-cu12
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -317,7 +322,7 @@ jobs:
generate_type: base
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -406,7 +411,7 @@ jobs:
needs: [test_tools, test_restful]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest-cu12
image: openmmlab/lmdeploy:${{ inputs.docker_tag || 'nightly-test-cu12.8' }}
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down
Loading
Loading