forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
153 lines (143 loc) · 5.95 KB
/
_Auto-Parallel.yml
File metadata and controls
153 lines (143 loc) · 5.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
name: Auto-Parallel
on:
workflow_call:
inputs:
docker_build_image:
type: string
required: true
can-skip:
type: string
required: false
env:
PR_ID: ${{ github.event.pull_request.number }}
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
work_dir: /workspace/Paddle
PADDLE_ROOT: /workspace/Paddle
TASK: paddle-CI-${{ github.event.pull_request.number }}-Auto-Parallel
ci_scripts: /workspace/Paddle/ci
BRANCH: ${{ github.event.pull_request.base.ref }}
CI_name: auto-parallel
no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
defaults:
run:
shell: bash
jobs:
check-bypass:
name: Check bypass
if: ${{ inputs.can-skip != 'true' }}
uses: ./.github/workflows/check-bypass.yml
with:
workflow-name: "auto-parallel"
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
parallel-test:
name: Parallel test
needs: check-bypass
if: ${{ needs.check-bypass.outputs.can-skip != 'true' }}
runs-on:
group: Auto-Parallel
timeout-minutes: 120
steps:
- name: Check docker image and run container
env:
FLAGS_dynamic_static_unified_comm: "True"
python_version: "3.10"
paddle_whl: https://paddle-github-action.bj.bcebos.com/PR/build/${{ github.event.pull_request.number }}/${{ github.event.pull_request.head.sha }}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
run: |
container_name=${TASK}-${core_index}-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> ${{ github.env }}
docker_image=${{ inputs.docker_build_image }}
nvidia-docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-v ${{ github.workspace }}:/workspace \
-v /home/FleetX_CI:/gpt_data \
-v /home/Llm_gpt_CI:/llm_gpt_data \
-v /home/Llama_CI:/llama_data \
-v /home/.cache/pip:/home/.cache/pip \
-e BRANCH \
-e PR_ID \
-e COMMIT_ID \
-e work_dir \
-e PADDLE_ROOT \
-e ci_scripts \
-e no_proxy \
-e CI_name \
-e paddle_whl \
-e FLAGS_dynamic_static_unified_comm \
-e python_version \
-w /workspace --runtime=nvidia ${docker_image}
- name: Download Paddle
env:
work_dir: ${{ github.workspace }}
timeout-minutes: 30
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
rm -rf * .[^.]*
echo "Downloading build.tar.gz"
wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/build/${{ github.event.pull_request.number }}/${{ github.event.pull_request.head.sha }}/build.tar.gz --no-check-certificate
echo "Extracting build.tar.gz"
tar --use-compress-program="pzstd" -xpf build.tar.gz
mv paddle Paddle
rm -f build.tar.gz
'
- name: Download PaddleTest
env:
work_dir: ${{ github.workspace }}
timeout-minutes: 30
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
echo "Download and extract PaddleNLP.tar.gz"
wget -q --tries=5 --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleNLP.tar --no-check-certificate
tar xf PaddleNLP.tar && rm -rf PaddleNLP.tar
source ${{ github.workspace }}/../../../proxy
cd PaddleNLP
git config --global user.name "PaddleCI"
git config --global user.email "paddle_ci@example.com"
git pull
git checkout 6ac04028757dfbcc089916997493611f62de81b2
git switch -c 6ac04028757dfbcc089916997493611f62de81b2
git cherry-pick bc08aeec91d2c992c3d8d39755bea7c6213b0e82
git cherry-pick 7ab35ce94eca977bcf3b44bfb42deb0e0b5ef158
git cherry-pick 2ac85997c2dafe3d67a4aac01d553ad76d6024bf
git submodule update --init --recursive --force
'
- name: Test
env:
work_dir: ${{ github.workspace }}
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
ldconfig
set -e
pip config set global.cache-dir "/home/.cache/pip"
ln -sf $(which python${python_version}) /usr/bin/python
python -c "import sys; print(sys.version_info[:])"
source ${{ github.workspace }}/../../../proxy
python -m pip install bce-python-sdk==0.8.74
timeout 80m bash Paddle/ci/auto_parallel/ci_auto_parallel.sh ${paddle_whl}
'
- name: Upload and display logs
if: always()
env:
home_path: ${{ github.workspace }}/..
bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
export AK=paddle
export SK=paddle
if [ ! -f "${{ env.bos_file }}" ]; then
wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
mkdir ${{ env.home_path }}/bos_retry
tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry
fi
cd /workspace/case_logs
for FILE in /workspace/case_logs/*; do
file=$(basename "$FILE")
python ${{ env.bos_file }} $file paddle-github-action/PR/Auto-Parallel/${PR_ID}/${COMMIT_ID}/logs
echo "$file: https://paddle-github-action.bj.bcebos.com/PR/Auto-Parallel/${PR_ID}/${COMMIT_ID}/logs/$file"
done
'
- name: Terminate and delete the container
if: always()
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
docker rm -f ${{ env.container_name }}