Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/test_usage_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

```bash
mkdir build && cd build
cmake -DBUILD_TEST=ON -DUSE_CUDA=ON ..
cmake -DBUILD_TEST=ON -DUSE_CUDA=ON -DUSE_NCCL=ON ..
make -j$(nproc)
```

Expand Down
18 changes: 9 additions & 9 deletions infini_train/src/kernels/cuda/matmul.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ std::shared_ptr<Tensor> MatmulForward(const std::shared_ptr<Tensor> &input, cons
.alpha = 1.0f,
.beta = 0.0f,
.batch_count = static_cast<int>(bs),
.stride_a = n * k,
.stride_b = k * m,
.stride_c = m * n,
.stride_a = bs > 1 ? n * k : 0,
.stride_b = bs > 1 ? k * m : 0,
.stride_c = bs > 1 ? m * n : 0,
.input_dtype = dtype,
.output_dtype = dtype,
});
Expand Down Expand Up @@ -133,9 +133,9 @@ std::shared_ptr<Tensor> MatmulBackwardInput(const std::shared_ptr<Tensor> &other
.alpha = 1.0f,
.beta = 0.0f,
.batch_count = static_cast<int>(bs),
.stride_a = k * n,
.stride_b = n * m,
.stride_c = m * k,
.stride_a = bs > 1 ? k * n : 0,
.stride_b = bs > 1 ? n * m : 0,
.stride_c = bs > 1 ? m * k : 0,
.input_dtype = compute_dtype,
.output_dtype = output_dtype,
});
Expand Down Expand Up @@ -202,9 +202,9 @@ std::shared_ptr<Tensor> MatmulBackwardOther(const std::shared_ptr<Tensor> &input
.alpha = 1.0f,
.beta = 0.0f,
.batch_count = static_cast<int>(bs),
.stride_a = n * m,
.stride_b = k * m,
.stride_c = n * k,
.stride_a = bs > 1 ? n * m : 0,
.stride_b = bs > 1 ? k * m : 0,
.stride_c = bs > 1 ? n * k : 0,
.input_dtype = compute_dtype,
.output_dtype = output_dtype,
});
Expand Down
5 changes: 5 additions & 0 deletions scripts/run_models_and_profile.bash
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ BUILD_DIR="$(read_var BUILD_DIR)"; : "${BUILD_DIR:=../build}"
LOG_DIR="$(read_var LOG_DIR)"; : "${LOG_DIR:=logs}"
PROFILE_LOG_DIR="$(read_var PROFILE_LOG_DIR)"; : "${PROFILE_LOG_DIR:=./profile_logs}"
COMPARE_LOG_DIR="$(read_var COMPARE_LOG_DIR)"; : "${COMPARE_LOG_DIR:=}"
RUN_CTEST="$(read_var RUN_CTEST)"; : "${RUN_CTEST:=true}"
CTEST_CMD="$(read_var CTEST_CMD)"; : "${CTEST_CMD:=ctest --output-on-failure -LE cuda -j$(nproc) && ctest --output-on-failure -L cuda -j1}"

mkdir -p "$BUILD_DIR" "$LOG_DIR" "$PROFILE_LOG_DIR"

Expand Down Expand Up @@ -244,6 +246,9 @@ for ((id=0; id<num_builds; ++id)); do
# always clean before another build
clean_build_dir
run_and_log "$LAST_CMAKE_CMD" "${build_id}" "no" "build"
if [[ "$RUN_CTEST" == "true" ]]; then
run_and_log "$CTEST_CMD" "ctest_${build_id}" "no" "ctest"
fi

# profile flag for runs
profile_flag="no"
Expand Down
4 changes: 3 additions & 1 deletion scripts/test_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"LLAMA3_LLMC_FILEPATH": "/data/shared/InfiniTrain-dev/data/llmc/llama3/llama3.2_1B_fp32.bin",
"PROFILE_LOG_DIR": "./profile_logs",
"LOG_DIR": "./logs",
"COMPARE_LOG_DIR": ""
"COMPARE_LOG_DIR": "",
"RUN_CTEST": "true",
"CTEST_CMD": "ctest --output-on-failure -LE cuda -j$(nproc) && ctest --output-on-failure -L cuda -j1"
},
"builds": [
{
Expand Down
1 change: 1 addition & 0 deletions third_party/googletest
Submodule googletest added at f8d7d7
Loading