From a2ec8f6fe9fbd52a8d4ae24e51936a09001faf4a Mon Sep 17 00:00:00 2001
From: zzy <zzy>
Date: Thu, 28 May 2026 21:28:47 +0800
Subject: [PATCH] Add B200 test and SgLang test results

---
 .../accuracy/accuracy.json                    |    8 -
 .../env_info.json                             |   48 -
 .../online/result.json                        |  163 --
 .../result.json                               |  215 ---
 .../accuracy/accuracy.json                    |    8 -
 .../env_info.json                             |   48 -
 .../interactive/result.json                   |  131 --
 .../online/result.json                        |  151 --
 .../result.json                               |  215 ---
 .../result.json                               |  652 -------
 .../env_info.json                             |   25 -
 .../result.json                               |  963 -----------
 .../result.json                               |  583 -------
 .../result.json                               |  389 -----
 .../accuracy/accuracy.json                    |    8 +
 .../burst/result.json                         |  229 +++
 .../env_info.json                             |  118 ++
 .../interactive/result.json                   |  201 +++
 .../offline/result.json                       |  231 +++
 .../online/result.json                        |  245 +++
 .../result.json                               |  650 +++++++
 .../sustained/result.json                     |  493 ++++++
 .../1x/offline/result.json                    |  231 +++
 .../1x/result.json                            |  236 +++
 .../2x/offline/result.json                    |  231 +++
 .../2x/result.json                            |  236 +++
 .../4x/offline/result.json                    |  231 +++
 .../4x/result.json                            |  236 +++
 .../8x/offline/result.json                    |  231 +++
 .../8x/result.json                            |  236 +++
 .../accuracy/accuracy.json                    |    0
 .../env_info.json                             |  118 ++
 .../result.json                               |  273 +++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |  118 ++
 .../interactive/result.json                   |  201 +++
 .../offline/result.json                       |  231 +++
 .../online/result.json                        |  233 +++
 .../result.json                               |  598 +++++++
 .../sustained/result.json                     |  493 ++++++
 .../accuracy/accuracy.json                    |    8 +
 .../burst/result.json                         |  164 ++
 .../env_info.json                             |   12 +-
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  166 ++
 .../online/result.json                        |  168 ++
 .../result.json                               |  615 +++++++
 .../speculative/result.json                   |  166 ++
 .../sustained/result.json                     |  428 +++++
 .../bf16/accuracy/accuracy.json               |    8 +
 .../bf16/offline/result.json                  |  178 ++
 .../bf16/online/result.json                   |  180 ++
 .../bf16/result.json                          |  395 +++++
 .../bf16/sustained/result.json                |  278 +++
 .../env_info.json                             |   53 +
 .../result.json                               |  963 +++++++++++
 .../w4a16/accuracy/accuracy.json              |    8 +
 .../w4a16/offline/result.json                 |  178 ++
 .../w4a16/online/result.json                  |  180 ++
 .../w4a16/result.json                         |  395 +++++
 .../w4a16/sustained/result.json               |  278 +++
 .../w8a16/accuracy/accuracy.json              |    8 +
 .../w8a16/offline/result.json                 |  178 ++
 .../w8a16/online/result.json                  |  180 ++
 .../w8a16/result.json                         |  395 +++++
 .../w8a16/sustained/result.json               |  278 +++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   53 +
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  154 ++
 .../online/result.json                        |  168 ++
 .../result.json                               |  551 ++++++
 .../speculative/result.json                   |  154 ++
 .../sustained/result.json                     |  428 +++++
 .../accuracy/accuracy.json                    |    0
 .../env_info.json                             |   53 +
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  166 ++
 .../online/result.json                        |  156 ++
 .../result.json                               |  371 ++++
 .../sustained/result.json                     |  278 +++
 .../accuracy/accuracy.json                    |    8 +
 .../burst/result.json                         |  164 ++
 .../env_info.json                             |   53 +
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  166 ++
 .../online/result.json                        |  168 ++
 .../result.json                               |  615 +++++++
 .../speculative/result.json                   |  166 ++
 .../sustained/result.json                     |  428 +++++
 .../bf16/accuracy/accuracy.json               |    8 +
 .../bf16/offline/result.json                  |  178 ++
 .../bf16/online/result.json                   |  180 ++
 .../bf16/result.json                          |  395 +++++
 .../bf16/sustained/result.json                |  278 +++
 .../env_info.json                             |   53 +
 .../result.json                               |  963 +++++++++++
 .../w4a16/accuracy/accuracy.json              |    8 +
 .../w4a16/offline/result.json                 |  178 ++
 .../w4a16/online/result.json                  |  180 ++
 .../w4a16/result.json                         |  395 +++++
 .../w4a16/sustained/result.json               |  278 +++
 .../w8a16/accuracy/accuracy.json              |    8 +
 .../w8a16/offline/result.json                 |  178 ++
 .../w8a16/online/result.json                  |  180 ++
 .../w8a16/result.json                         |  395 +++++
 .../w8a16/sustained/result.json               |  278 +++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   53 +
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  154 ++
 .../online/result.json                        |  168 ++
 .../result.json                               |  551 ++++++
 .../speculative/result.json                   |  154 ++
 .../sustained/result.json                     |  428 +++++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   53 +
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  166 ++
 .../online/result.json                        |  156 ++
 .../result.json                               |  371 ++++
 .../sustained/result.json                     |  278 +++
 .../accuracy/accuracy.json                    |    8 +
 .../burst/result.json                         |  161 ++
 .../env_info.json                             |   44 +
 .../interactive/result.json                   |  139 ++
 .../offline/result.json                       |  196 +++
 .../online/result.json                        |  195 +++
 .../result.json                               |  694 ++++++++
 .../sustained/result.json                     |  456 +++++
 .../bf16/accuracy/accuracy.json               |    8 +
 .../bf16/offline/result.json                  |  221 +++
 .../bf16/result.json                          |  228 +++
 .../env_info.json                             |   44 +
 .../fp8/accuracy/accuracy.json                |    8 +
 .../fp8/offline/result.json                   |  221 +++
 .../fp8/result.json                           |  228 +++
 .../result.json                               |  603 +++++++
 .../w4a16/accuracy/accuracy.json              |    8 +
 .../w4a16/offline/result.json                 |  221 +++
 .../w4a16/result.json                         |  228 +++
 .../w8a16/accuracy/accuracy.json              |    8 +
 .../w8a16/offline/result.json                 |  221 +++
 .../w8a16/result.json                         |  228 +++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   44 +
 .../interactive/result.json                   |  138 ++
 .../offline/result.json                       |  169 ++
 .../online/result.json                        |  192 +++
 .../result.json                               |  617 +++++++
 .../sustained/result.json                     |  456 +++++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   44 +
 .../interactive/result.json                   |  139 ++
 .../offline/result.json                       |  196 +++
 .../online/result.json                        |  171 ++
 .../result.json                               |  460 +++++
 .../sustained/result.json                     |  292 ++++
 .../accuracy/accuracy.json                    |    8 +
 .../burst/result.json                         |  143 ++
 .../env_info.json                             |   32 +
 .../interactive/result.json                   |  115 ++
 .../offline/result.json                       |  111 +-
 .../online/result.json                        |  147 ++
 .../result.json                               |  594 +++++++
 .../speculative}/result.json                  |  108 +-
 .../sustained/result.json                     |  407 +++++
 .../bf16/accuracy/accuracy.json               |    8 +
 .../bf16/offline/result.json                  |  157 ++
 .../bf16/online/result.json                   |  159 ++
 .../bf16/result.json                          |  374 ++++
 .../bf16/sustained/result.json                |  257 +++
 .../env_info.json                             |   32 +
 .../fp8/accuracy/accuracy.json                |    8 +
 .../fp8/offline/result.json                   |  157 ++
 .../fp8/online/result.json                    |  159 ++
 .../fp8/result.json                           |  374 ++++
 .../fp8/sustained/result.json                 |  257 +++
 .../result.json                               | 1499 +++++++++++++++++
 .../w4a16/accuracy/accuracy.json              |    8 +
 .../w4a16/offline/result.json                 |  157 ++
 .../w4a16/online/result.json                  |  159 ++
 .../w4a16/result.json                         |  374 ++++
 .../w4a16/sustained/result.json               |  257 +++
 .../w8a16/accuracy/accuracy.json              |    8 +
 .../w8a16/offline/result.json                 |  157 ++
 .../w8a16/online/result.json                  |  159 ++
 .../w8a16/result.json                         |  374 ++++
 .../w8a16/sustained/result.json               |  257 +++
 .../w8a8/accuracy/accuracy.json               |    8 +
 .../w8a8/offline/result.json                  |  157 ++
 .../w8a8/online/result.json                   |  159 ++
 .../w8a8/result.json                          |  374 ++++
 .../w8a8/sustained/result.json                |  257 +++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   32 +
 .../interactive/result.json                   |  115 ++
 .../offline/result.json                       |  133 ++
 .../online/result.json                        |  147 ++
 .../result.json                               |  500 ++++++
 .../sustained/result.json                     |  407 +++++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   32 +
 .../interactive/result.json                   |  115 ++
 .../offline/result.json                       |  107 +-
 .../online/result.json                        |  135 ++
 .../result.json                               |  353 ++++
 .../sustained/result.json                     |  257 +++
 .../accuracy/accuracy.json                    |    8 +
 .../burst/result.json                         |  164 ++
 .../env_info.json                             |   25 +-
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  166 ++
 .../online/result.json                        |  168 ++
 .../result.json                               |  615 +++++++
 .../speculative/result.json                   |  166 ++
 .../sustained/result.json                     |  428 +++++
 .../bf16}/accuracy/accuracy.json              |    0
 .../bf16/offline/result.json                  |  178 ++
 .../bf16/online/result.json                   |  180 ++
 .../bf16/result.json                          |  395 +++++
 .../bf16/sustained/result.json                |  278 +++
 .../env_info.json                             |   25 +-
 .../result.json                               |  963 +++++++++++
 .../w4a16/accuracy/accuracy.json              |    8 +
 .../w4a16/offline/result.json                 |  178 ++
 .../w4a16/online/result.json                  |  180 ++
 .../w4a16/result.json                         |  395 +++++
 .../w4a16/sustained/result.json               |  278 +++
 .../w8a16/accuracy/accuracy.json              |    8 +
 .../w8a16/offline/result.json                 |  178 ++
 .../w8a16/online/result.json                  |  180 ++
 .../w8a16/result.json                         |  395 +++++
 .../w8a16/sustained/result.json               |  278 +++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   53 +
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |  154 ++
 .../online/result.json                        |  168 ++
 .../result.json                               |  551 ++++++
 .../speculative/result.json                   |  154 ++
 .../sustained/result.json                     |  428 +++++
 .../accuracy/accuracy.json                    |    8 +
 .../env_info.json                             |   53 +
 .../interactive/result.json                   |  136 ++
 .../offline/result.json                       |   98 +-
 .../online/result.json                        |  156 ++
 .../result.json                               |  371 ++++
 .../sustained/result.json                     |  278 +++
 .../accuracy/accuracy.json                    |    8 -
 .../env_info.json                             |   33 -
 .../online/result.json                        |  158 --
 .../result.json                               |  210 ---
 .../accuracy/accuracy.json                    |    8 -
 .../env_info.json                             |   33 -
 .../interactive/result.json                   |  126 --
 .../online/result.json                        |  146 --
 .../result.json                               |  210 ---
 258 files changed, 50069 insertions(+), 4790 deletions(-)
 delete mode 100644 results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/accuracy/accuracy.json
 delete mode 100644 results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/env_info.json
 delete mode 100644 results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/online/result.json
 delete mode 100644 results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/result.json
 delete mode 100644 results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/accuracy/accuracy.json
 delete mode 100644 results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/env_info.json
 delete mode 100644 results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/interactive/result.json
 delete mode 100644 results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/online/result.json
 delete mode 100644 results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/result.json
 delete mode 100644 results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json
 delete mode 100644 results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/env_info.json
 delete mode 100644 results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/result.json
 delete mode 100644 results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json
 delete mode 100644 results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/burst/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/env_info.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/interactive/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/sustained/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/result.json
 rename results/community/{nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd => nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413}/accuracy/accuracy.json (100%)
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/env_info.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/env_info.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/interactive/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/sustained/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/burst/result.json
 rename results/community/{nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd => nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc}/env_info.json (50%)
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/interactive/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/speculative/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/sustained/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/sustained/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/env_info.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/sustained/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/sustained/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/env_info.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/interactive/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/speculative/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/sustained/result.json
 rename results/community/{nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8 => nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2}/accuracy/accuracy.json (100%)
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/env_info.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/interactive/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/offline/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/online/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/result.json
 create mode 100644 results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/sustained/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/burst/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/env_info.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/interactive/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/offline/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/online/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/speculative/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/sustained/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/offline/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/online/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/sustained/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/env_info.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/offline/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/online/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/sustained/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/offline/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/online/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/sustained/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/env_info.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/interactive/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/offline/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/online/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/speculative/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/sustained/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/env_info.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/interactive/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/offline/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/online/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/result.json
 create mode 100644 results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/sustained/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/burst/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/env_info.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/interactive/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/offline/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/online/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/sustained/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/offline/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/env_info.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/offline/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/offline/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/offline/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/env_info.json
 create mode 100644 results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/interactive/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/offline/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/online/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/sustained/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/env_info.json
 create mode 100644 results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/interactive/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/offline/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/online/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/result.json
 create mode 100644 results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/burst/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/env_info.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/interactive/result.json
 rename results/community/{mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0 => nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd}/offline/result.json (54%)
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/result.json
 rename results/community/{tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/offline => nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/speculative}/result.json (50%)
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/offline/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/env_info.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/offline/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/offline/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/offline/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/offline/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/env_info.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/interactive/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/offline/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/sustained/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/env_info.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/interactive/result.json
 rename results/community/{tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c => nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697}/offline/result.json (51%)
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/online/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/result.json
 create mode 100644 results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/sustained/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/burst/result.json
 rename results/community/{nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8 => nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5}/env_info.json (63%)
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/interactive/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/offline/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/online/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/speculative/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/sustained/result.json
 rename results/community/{nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97 => nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16}/accuracy/accuracy.json (100%)
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/offline/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/online/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/sustained/result.json
 rename results/community/{nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97 => nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb}/env_info.json (63%)
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/offline/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/online/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/sustained/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/offline/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/online/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/sustained/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/env_info.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/interactive/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/offline/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/online/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/speculative/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/sustained/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/accuracy/accuracy.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/env_info.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/interactive/result.json
 rename results/community/{mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d => nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50}/offline/result.json (53%)
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/online/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/result.json
 create mode 100644 results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/sustained/result.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/accuracy/accuracy.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/env_info.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/online/result.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/result.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/accuracy/accuracy.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/env_info.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/interactive/result.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/online/result.json
 delete mode 100644 results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/result.json
diff --git a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/accuracy/accuracy.json b/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/accuracy/accuracy.json
deleted file mode 100644
index 72422349..00000000
--- a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/accuracy/accuracy.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "subset_score": 0.07,
-  "baseline_delta": -0.53,
-  "valid": false,
-  "framework": "vllm-musa",
-  "precision": "BF16",
-  "notes": "Integrated accuracy check \u2014 used same vllm-musa instance as benchmark."
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/env_info.json b/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/env_info.json
deleted file mode 100644
index 4244ef73..00000000
--- a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/env_info.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-  "collected_at": "2026-05-18T09:21:31.092840+00:00",
-  "accelerators": [
-    {
-      "index": 0,
-      "name": "MTT S4000",
-      "vendor": "Moore Threads",
-      "memory_gb": 48.0,
-      "driver_version": "2.7.0",
-      "firmware_version": null,
-      "supports_bf16": true
-    }
-  ],
-  "accelerator_platform": "moorethreads",
-  "accelerator_topology": null,
-  "intra_node_interconnect": null,
-  "cpu": {
-    "model": "Intel(R) Xeon(R) Gold 6430",
-    "physical_cores": 64,
-    "logical_cores": 128,
-    "numa_nodes": 2
-  },
-  "system_memory_gb": 1007.5,
-  "pcie_generation": "PCIe 16x/16x",
-  "cpu_accelerator_bandwidth_gbs": null,
-  "network_interfaces": [
-    {
-      "name": "mlx5_0",
-      "type": "InfiniBand/RoCE",
-      "bandwidth_gbps": null
-    },
-    {
-      "name": "mlx5_1",
-      "type": "InfiniBand/RoCE",
-      "bandwidth_gbps": null
-    },
-    {
-      "name": "mlx5_bond_0",
-      "type": "InfiniBand/RoCE",
-      "bandwidth_gbps": null
-    }
-  ],
-  "os": "Ubuntu Jammy Jellyfish (development branch)",
-  "python_version": "3.10.8",
-  "kernel_version": "5.15.0-105-generic",
-  "runtime_version": "Moore Threads Driver 2.7.0",
-  "pytorch_version": "2.2.0"
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/online/result.json b/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/online/result.json
deleted file mode 100644
index 064d6b8a..00000000
--- a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/online/result.json
+++ /dev/null
@@ -1,163 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_A",
-  "implementation_id": "moorethreads_vllm_musa_f2f6f965",
-  "chip": {
-    "name": "MTT S4000",
-    "vendor": "Moore Threads",
-    "count": 1,
-    "memory_gb": 48.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T09:21:31.092840+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "MTT S4000",
-        "vendor": "Moore Threads",
-        "memory_gb": 48.0,
-        "driver_version": "2.7.0",
-        "firmware_version": null,
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_platform": "moorethreads",
-    "accelerator_topology": null,
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6430",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.5,
-    "pcie_generation": "PCIe 16x/16x",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_bond_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8",
-    "kernel_version": "5.15.0-105-generic",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "pytorch_version": "2.2.0"
-  },
-  "software": {
-    "framework": "vllm-musa",
-    "framework_version": "0.4.2",
-    "driver_version": "2.7.0",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8"
-  },
-  "model": {
-    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
-    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 8.0,
-    "precision": "BF16",
-    "effective_dtype": "float16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenario": "online",
-    "num_runs": 3,
-    "warmup_runs": 1,
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "extra_config": null,
-    "runtime_metrics": null
-  },
-  "metrics": {
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 5,
-      "results_by_qps": [
-        {
-          "target_qps": 5,
-          "achieved_qps": 5.0,
-          "ttft_ms_p50": 194.45,
-          "ttft_ms_p90": 315.05,
-          "ttft_ms_p99": 424.55,
-          "tpot_ms_p50": 201.93,
-          "tpot_ms_p90": 253.8,
-          "tpot_ms_p99": 471.28,
-          "elapsed_seconds_median": 137.6,
-          "sla_met": true
-        },
-        {
-          "target_qps": 25,
-          "achieved_qps": 25.0,
-          "ttft_ms_p50": 4796.14,
-          "ttft_ms_p90": 8459.18,
-          "ttft_ms_p99": 9348.86,
-          "tpot_ms_p50": 355.01,
-          "tpot_ms_p90": 6430.04,
-          "tpot_ms_p99": 15579.83,
-          "elapsed_seconds_median": 93.0,
-          "sla_met": false
-        },
-        {
-          "target_qps": 100,
-          "achieved_qps": 100.0,
-          "ttft_ms_p50": 10354.27,
-          "ttft_ms_p90": 17651.16,
-          "ttft_ms_p99": 19078.89,
-          "tpot_ms_p50": 849.82,
-          "tpot_ms_p90": 8677.79,
-          "tpot_ms_p99": 14281.03,
-          "elapsed_seconds_median": 90.0,
-          "sla_met": false
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": null,
-    "baseline_delta": null,
-    "valid": false,
-    "notes": "Run --scenario accuracy to check model accuracy."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "17:53:38",
-    "run_id": "cabb7bd0",
-    "run_name": "mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0",
-    "flagged": null,
-    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T09:37:13.745117+00:00",
-    "benchmark_end_time": "2026-05-18T09:53:38.865501+00:00",
-    "benchmark_elapsed_minutes": 16.4,
-    "model_load_seconds": 122.7
-  }
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/result.json b/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/result.json
deleted file mode 100644
index e4b1093e..00000000
--- a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/result.json
+++ /dev/null
@@ -1,215 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_A",
-  "implementation_id": "moorethreads_vllm_musa_f2f6f965",
-  "chip": {
-    "name": "MTT S4000",
-    "vendor": "Moore Threads",
-    "count": 1,
-    "memory_gb": 48.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T09:21:31.092840+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "MTT S4000",
-        "vendor": "Moore Threads",
-        "memory_gb": 48.0,
-        "driver_version": "2.7.0",
-        "firmware_version": null,
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_platform": "moorethreads",
-    "accelerator_topology": null,
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6430",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.5,
-    "pcie_generation": "PCIe 16x/16x",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_bond_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8",
-    "kernel_version": "5.15.0-105-generic",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "pytorch_version": "2.2.0"
-  },
-  "software": {
-    "framework": "vllm-musa",
-    "framework_version": "0.4.2",
-    "driver_version": "2.7.0",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8"
-  },
-  "model": {
-    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
-    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 8.0,
-    "precision": "BF16",
-    "effective_dtype": "float16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenarios_run": [
-      "offline",
-      "online"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 3,
-    "extra_config": null
-  },
-  "metrics": {
-    "derived": {},
-    "offline": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 8,
-          "throughput_tokens_per_sec": 332.62,
-          "throughput_tokens_per_sec_per_chip": 332.62,
-          "throughput_tokens_per_sec_total": 922.83,
-          "elapsed_seconds_median": 43.4,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 32,
-          "throughput_tokens_per_sec": 331.64,
-          "throughput_tokens_per_sec_per_chip": 331.64,
-          "throughput_tokens_per_sec_total": 920.1,
-          "elapsed_seconds_median": 43.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 128,
-          "throughput_tokens_per_sec": 331.76,
-          "throughput_tokens_per_sec_per_chip": 331.76,
-          "throughput_tokens_per_sec_total": 920.46,
-          "elapsed_seconds_median": 43.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 5,
-      "results_by_qps": [
-        {
-          "target_qps": 5,
-          "achieved_qps": 5.0,
-          "ttft_ms_p50": 194.45,
-          "ttft_ms_p90": 315.05,
-          "ttft_ms_p99": 424.55,
-          "tpot_ms_p50": 201.93,
-          "tpot_ms_p90": 253.8,
-          "tpot_ms_p99": 471.28,
-          "elapsed_seconds_median": 137.6,
-          "sla_met": true
-        },
-        {
-          "target_qps": 25,
-          "achieved_qps": 25.0,
-          "ttft_ms_p50": 4796.14,
-          "ttft_ms_p90": 8459.18,
-          "ttft_ms_p99": 9348.86,
-          "tpot_ms_p50": 355.01,
-          "tpot_ms_p90": 6430.04,
-          "tpot_ms_p99": 15579.83,
-          "elapsed_seconds_median": 93.0,
-          "sla_met": false
-        },
-        {
-          "target_qps": 100,
-          "achieved_qps": 100.0,
-          "ttft_ms_p50": 10354.27,
-          "ttft_ms_p90": 17651.16,
-          "ttft_ms_p99": 19078.89,
-          "tpot_ms_p50": 849.82,
-          "tpot_ms_p90": 8677.79,
-          "tpot_ms_p99": 14281.03,
-          "elapsed_seconds_median": 90.0,
-          "sla_met": false
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": 0.07,
-    "baseline_delta": -0.53,
-    "valid": false,
-    "framework": "vllm-musa",
-    "precision": "BF16",
-    "notes": "Integrated accuracy check \u2014 used same vllm-musa instance as benchmark."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "17:34:52",
-    "run_id": "cabb7bd0",
-    "run_name": "mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0",
-    "flagged": null,
-    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": "Partial run: ['offline', 'online'] succeeded, ['accuracy'] failed.",
-    "benchmark_start_time": "2026-05-18T09:26:10.676960+00:00",
-    "benchmark_end_time": "2026-05-18T09:34:52.667112+00:00",
-    "benchmark_elapsed_minutes": 25.1,
-    "model_load_seconds": 116.8,
-    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online'] scenarios.",
-    "scenario_dirs": {
-      "offline": "results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/offline",
-      "online": "results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/online"
-    }
-  }
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/accuracy/accuracy.json b/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/accuracy/accuracy.json
deleted file mode 100644
index 63c6e929..00000000
--- a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/accuracy/accuracy.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "subset_score": 0.07,
-  "baseline_delta": -0.31,
-  "valid": false,
-  "framework": "vllm-musa",
-  "precision": "BF16",
-  "notes": "Integrated accuracy check \u2014 used same vllm-musa instance as benchmark."
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/env_info.json b/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/env_info.json
deleted file mode 100644
index 31f501be..00000000
--- a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/env_info.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-  "collected_at": "2026-05-18T08:40:55.208034+00:00",
-  "accelerators": [
-    {
-      "index": 0,
-      "name": "MTT S4000",
-      "vendor": "Moore Threads",
-      "memory_gb": 48.0,
-      "driver_version": "2.7.0",
-      "firmware_version": null,
-      "supports_bf16": true
-    }
-  ],
-  "accelerator_platform": "moorethreads",
-  "accelerator_topology": null,
-  "intra_node_interconnect": null,
-  "cpu": {
-    "model": "Intel(R) Xeon(R) Gold 6430",
-    "physical_cores": 64,
-    "logical_cores": 128,
-    "numa_nodes": 2
-  },
-  "system_memory_gb": 1007.5,
-  "pcie_generation": "PCIe 16x/16x",
-  "cpu_accelerator_bandwidth_gbs": null,
-  "network_interfaces": [
-    {
-      "name": "mlx5_0",
-      "type": "InfiniBand/RoCE",
-      "bandwidth_gbps": null
-    },
-    {
-      "name": "mlx5_1",
-      "type": "InfiniBand/RoCE",
-      "bandwidth_gbps": null
-    },
-    {
-      "name": "mlx5_bond_0",
-      "type": "InfiniBand/RoCE",
-      "bandwidth_gbps": null
-    }
-  ],
-  "os": "Ubuntu Jammy Jellyfish (development branch)",
-  "python_version": "3.10.8",
-  "kernel_version": "5.15.0-105-generic",
-  "runtime_version": "Moore Threads Driver 2.7.0",
-  "pytorch_version": "2.2.0"
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/interactive/result.json b/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/interactive/result.json
deleted file mode 100644
index 4f5ff811..00000000
--- a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/interactive/result.json
+++ /dev/null
@@ -1,131 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_F",
-  "implementation_id": "moorethreads_vllm_musa_f2f6f965",
-  "chip": {
-    "name": "MTT S4000",
-    "vendor": "Moore Threads",
-    "count": 1,
-    "memory_gb": 48.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T08:40:55.208034+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "MTT S4000",
-        "vendor": "Moore Threads",
-        "memory_gb": 48.0,
-        "driver_version": "2.7.0",
-        "firmware_version": null,
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_platform": "moorethreads",
-    "accelerator_topology": null,
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6430",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.5,
-    "pcie_generation": "PCIe 16x/16x",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_bond_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8",
-    "kernel_version": "5.15.0-105-generic",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "pytorch_version": "2.2.0"
-  },
-  "software": {
-    "framework": "vllm-musa",
-    "framework_version": "0.4.2",
-    "driver_version": "2.7.0",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8"
-  },
-  "model": {
-    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 0.5,
-    "precision": "BF16",
-    "effective_dtype": "float16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenario": "interactive",
-    "num_runs": 3,
-    "warmup_runs": 1,
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "extra_config": null,
-    "runtime_metrics": null
-  },
-  "metrics": {
-    "interactive": {
-      "ttft_ms_p50": 25.89,
-      "ttft_ms_p90": 27.18,
-      "ttft_ms_p99": 28.51,
-      "tpot_ms_p50": 14.85,
-      "tpot_ms_p90": 15.17,
-      "tpot_ms_p99": 15.5,
-      "peak_memory_gb": null,
-      "elapsed_seconds_median": 481.4
-    }
-  },
-  "accuracy": {
-    "subset_score": null,
-    "baseline_delta": null,
-    "valid": false,
-    "notes": "Run --scenario accuracy to check model accuracy."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "17:21:09",
-    "run_id": "4f66d29d",
-    "run_name": "mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d",
-    "flagged": null,
-    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T08:56:46.686185+00:00",
-    "benchmark_end_time": "2026-05-18T09:21:09.800661+00:00",
-    "benchmark_elapsed_minutes": 24.4,
-    "model_load_seconds": 151.2
-  }
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/online/result.json b/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/online/result.json
deleted file mode 100644
index eb13372d..00000000
--- a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/online/result.json
+++ /dev/null
@@ -1,151 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_F",
-  "implementation_id": "moorethreads_vllm_musa_f2f6f965",
-  "chip": {
-    "name": "MTT S4000",
-    "vendor": "Moore Threads",
-    "count": 1,
-    "memory_gb": 48.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T08:40:55.208034+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "MTT S4000",
-        "vendor": "Moore Threads",
-        "memory_gb": 48.0,
-        "driver_version": "2.7.0",
-        "firmware_version": null,
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_platform": "moorethreads",
-    "accelerator_topology": null,
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6430",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.5,
-    "pcie_generation": "PCIe 16x/16x",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_bond_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8",
-    "kernel_version": "5.15.0-105-generic",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "pytorch_version": "2.2.0"
-  },
-  "software": {
-    "framework": "vllm-musa",
-    "framework_version": "0.4.2",
-    "driver_version": "2.7.0",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8"
-  },
-  "model": {
-    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 0.5,
-    "precision": "BF16",
-    "effective_dtype": "float16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenario": "online",
-    "num_runs": 3,
-    "warmup_runs": 1,
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "extra_config": null,
-    "runtime_metrics": null
-  },
-  "metrics": {
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 40,
-      "results_by_qps": [
-        {
-          "target_qps": 10,
-          "achieved_qps": 10.0,
-          "ttft_ms_p50": 47.68,
-          "ttft_ms_p90": 96.31,
-          "ttft_ms_p99": 956.22,
-          "tpot_ms_p50": 47.25,
-          "tpot_ms_p90": 80.82,
-          "tpot_ms_p99": 131.63,
-          "elapsed_seconds_median": 37.8,
-          "sla_met": false
-        },
-        {
-          "target_qps": 40,
-          "achieved_qps": 40.0,
-          "ttft_ms_p50": 94.5,
-          "ttft_ms_p90": 194.64,
-          "ttft_ms_p99": 331.88,
-          "tpot_ms_p50": 74.76,
-          "tpot_ms_p90": 287.01,
-          "tpot_ms_p99": 444.19,
-          "elapsed_seconds_median": 19.0,
-          "sla_met": true
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": null,
-    "baseline_delta": null,
-    "valid": false,
-    "notes": "Run --scenario accuracy to check model accuracy."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "16:53:54",
-    "run_id": "4f66d29d",
-    "run_name": "mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d",
-    "flagged": null,
-    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T08:51:01.188901+00:00",
-    "benchmark_end_time": "2026-05-18T08:53:54.250762+00:00",
-    "benchmark_elapsed_minutes": 2.9,
-    "model_load_seconds": 132.6
-  }
-}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/result.json b/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/result.json
deleted file mode 100644
index a1c073de..00000000
--- a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/result.json
+++ /dev/null
@@ -1,215 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_F",
-  "implementation_id": "moorethreads_vllm_musa_f2f6f965",
-  "chip": {
-    "name": "MTT S4000",
-    "vendor": "Moore Threads",
-    "count": 1,
-    "memory_gb": 48.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T08:40:55.208034+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "MTT S4000",
-        "vendor": "Moore Threads",
-        "memory_gb": 48.0,
-        "driver_version": "2.7.0",
-        "firmware_version": null,
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_platform": "moorethreads",
-    "accelerator_topology": null,
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6430",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.5,
-    "pcie_generation": "PCIe 16x/16x",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_bond_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8",
-    "kernel_version": "5.15.0-105-generic",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "pytorch_version": "2.2.0"
-  },
-  "software": {
-    "framework": "vllm-musa",
-    "framework_version": "0.4.2",
-    "driver_version": "2.7.0",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8"
-  },
-  "model": {
-    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 0.5,
-    "precision": "BF16",
-    "effective_dtype": "float16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenarios_run": [
-      "offline",
-      "online",
-      "interactive"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 3,
-    "extra_config": null
-  },
-  "metrics": {
-    "derived": {},
-    "offline": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 4,
-          "throughput_tokens_per_sec": 1994.51,
-          "throughput_tokens_per_sec_per_chip": 1994.51,
-          "throughput_tokens_per_sec_total": 3642.41,
-          "elapsed_seconds_median": 12.5,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 16,
-          "throughput_tokens_per_sec": 1998.44,
-          "throughput_tokens_per_sec_per_chip": 1998.44,
-          "throughput_tokens_per_sec_total": 3649.59,
-          "elapsed_seconds_median": 12.5,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 64,
-          "throughput_tokens_per_sec": 2004.02,
-          "throughput_tokens_per_sec_per_chip": 2004.02,
-          "throughput_tokens_per_sec_total": 3659.77,
-          "elapsed_seconds_median": 12.5,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 40,
-      "results_by_qps": [
-        {
-          "target_qps": 10,
-          "achieved_qps": 10.0,
-          "ttft_ms_p50": 47.68,
-          "ttft_ms_p90": 96.31,
-          "ttft_ms_p99": 956.22,
-          "tpot_ms_p50": 47.25,
-          "tpot_ms_p90": 80.82,
-          "tpot_ms_p99": 131.63,
-          "elapsed_seconds_median": 37.8,
-          "sla_met": false
-        },
-        {
-          "target_qps": 40,
-          "achieved_qps": 40.0,
-          "ttft_ms_p50": 94.5,
-          "ttft_ms_p90": 194.64,
-          "ttft_ms_p99": 331.88,
-          "tpot_ms_p50": 74.76,
-          "tpot_ms_p90": 287.01,
-          "tpot_ms_p99": 444.19,
-          "elapsed_seconds_median": 19.0,
-          "sla_met": true
-        }
-      ]
-    },
-    "interactive": {
-      "ttft_ms_p50": 25.89,
-      "ttft_ms_p90": 27.18,
-      "ttft_ms_p99": 28.51,
-      "tpot_ms_p50": 14.85,
-      "tpot_ms_p90": 15.17,
-      "tpot_ms_p99": 15.5,
-      "peak_memory_gb": null,
-      "elapsed_seconds_median": 481.4
-    }
-  },
-  "accuracy": {
-    "subset_score": 0.07,
-    "baseline_delta": -0.31,
-    "valid": false,
-    "framework": "vllm-musa",
-    "precision": "BF16",
-    "notes": "Integrated accuracy check \u2014 used same vllm-musa instance as benchmark."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "16:48:27",
-    "run_id": "4f66d29d",
-    "run_name": "mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d",
-    "flagged": null,
-    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": "Partial run: ['offline', 'online', 'interactive'] succeeded, ['accuracy'] failed.",
-    "benchmark_start_time": "2026-05-18T08:45:57.373367+00:00",
-    "benchmark_end_time": "2026-05-18T08:48:27.423209+00:00",
-    "benchmark_elapsed_minutes": 29.8,
-    "model_load_seconds": 146.8,
-    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive'] scenarios.",
-    "scenario_dirs": {
-      "offline": "results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/offline",
-      "online": "results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/online",
-      "interactive": "results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/interactive"
-    }
-  }
-}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json
deleted file mode 100644
index adcbafe3..00000000
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json
+++ /dev/null
@@ -1,652 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_A",
-  "implementation_id": "nvidia_sglang_c43a8309",
-  "chip": {
-    "name": "NVIDIA A100-SXM4-40GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 40,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-06T11:15:11.081772+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "NVIDIA A100-SXM4-40GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 40,
-        "driver_version": "565.57.01",
-        "firmware_version": null,
-        "compute_capability": "8.0",
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tPXB\tNODE\tSYS\t0-31,64-95\t0\t\tN/A\nNIC0\tPXB\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tPXB\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tNODE\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "AMD EPYC 7532 32-Core Processor",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.7,
-    "pcie_generation": "PCIe Gen 4",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_2",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_3",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu 22.04.4 LTS",
-    "python_version": "3.10.20",
-    "kernel_version": "5.15.0-60-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "SGLang",
-    "framework_version": "0.5.6",
-    "driver_version": "565.57.01",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.4 LTS",
-    "python_version": "3.10.20"
-  },
-  "model": {
-    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
-    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 8,
-    "precision": "BF16",
-    "effective_dtype": "bfloat16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenarios_run": [
-      "offline",
-      "online",
-      "interactive",
-      "sustained",
-      "speculative",
-      "burst"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 3,
-    "extra_config": null
-  },
-  "metrics": {
-    "derived": {},
-    "offline": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 8,
-          "throughput_tokens_per_sec": 3144.73,
-          "throughput_tokens_per_sec_per_chip": 3144.73,
-          "elapsed_seconds_median": 11.2,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 32,
-          "throughput_tokens_per_sec": 3146.66,
-          "throughput_tokens_per_sec_per_chip": 3146.66,
-          "elapsed_seconds_median": 11.2,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 128,
-          "throughput_tokens_per_sec": 3146.09,
-          "throughput_tokens_per_sec_per_chip": 3146.09,
-          "elapsed_seconds_median": 11.2,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 100,
-      "results_by_qps": [
-        {
-          "target_qps": 5,
-          "achieved_qps": 5,
-          "ttft_ms_p50": 43.91,
-          "ttft_ms_p90": 62.26,
-          "ttft_ms_p99": 972.47,
-          "tpot_ms_p50": 15.63,
-          "tpot_ms_p90": 17.36,
-          "tpot_ms_p99": 18.58,
-          "elapsed_seconds_median": 66.1,
-          "sla_met": false
-        },
-        {
-          "target_qps": 25,
-          "achieved_qps": 25,
-          "ttft_ms_p50": 52.85,
-          "ttft_ms_p90": 67.65,
-          "ttft_ms_p99": 80.71,
-          "tpot_ms_p50": 36.16,
-          "tpot_ms_p90": 41.45,
-          "tpot_ms_p99": 57.42,
-          "elapsed_seconds_median": 17.1,
-          "sla_met": true
-        },
-        {
-          "target_qps": 100,
-          "achieved_qps": 100,
-          "ttft_ms_p50": 50.85,
-          "ttft_ms_p90": 62.88,
-          "ttft_ms_p99": 245.1,
-          "tpot_ms_p50": 41.47,
-          "tpot_ms_p90": 53.07,
-          "tpot_ms_p99": 177.42,
-          "elapsed_seconds_median": 10.8,
-          "sla_met": true
-        }
-      ]
-    },
-    "interactive": {
-      "ttft_ms_p50": 32.52,
-      "ttft_ms_p90": 44.71,
-      "ttft_ms_p99": 61.84,
-      "tpot_ms_p50": 12.93,
-      "tpot_ms_p90": 12.98,
-      "tpot_ms_p99": 13.03,
-      "peak_memory_gb": null,
-      "elapsed_seconds_median": 381.4
-    },
-    "sustained": {
-      "sustained_concurrency": 8,
-      "duration_minutes": 30,
-      "warmup_minutes": 2,
-      "sample_interval_seconds": 60,
-      "samples": [
-        {
-          "minute": 1,
-          "is_warmup": true,
-          "throughput_tokens_per_sec": 477,
-          "tokens_out": 28638,
-          "tokens_in": 0,
-          "requests_completed": 154,
-          "ttft_ms_p50": 49.6,
-          "ttft_ms_p99": 6750.3
-        },
-        {
-          "minute": 2,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 566.2,
-          "tokens_out": 33972,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.6,
-          "ttft_ms_p99": 61.2
-        },
-        {
-          "minute": 3,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 560.8,
-          "tokens_out": 33639,
-          "tokens_in": 0,
-          "requests_completed": 177,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 61.3
-        },
-        {
-          "minute": 4,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 565.3,
-          "tokens_out": 33929,
-          "tokens_in": 0,
-          "requests_completed": 181,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 60.8
-        },
-        {
-          "minute": 5,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 561.4,
-          "tokens_out": 33685,
-          "tokens_in": 0,
-          "requests_completed": 179,
-          "ttft_ms_p50": 44.2,
-          "ttft_ms_p99": 61.6
-        },
-        {
-          "minute": 6,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 561.9,
-          "tokens_out": 33707,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 60.8
-        },
-        {
-          "minute": 7,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 570,
-          "tokens_out": 34190,
-          "tokens_in": 0,
-          "requests_completed": 179,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 61.4
-        },
-        {
-          "minute": 8,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 558.3,
-          "tokens_out": 33498,
-          "tokens_in": 0,
-          "requests_completed": 177,
-          "ttft_ms_p50": 44.6,
-          "ttft_ms_p99": 62.6
-        },
-        {
-          "minute": 9,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 563.3,
-          "tokens_out": 33801,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 61.8
-        },
-        {
-          "minute": 10,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 552.7,
-          "tokens_out": 33163,
-          "tokens_in": 0,
-          "requests_completed": 176,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 52.4
-        },
-        {
-          "minute": 11,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 569.3,
-          "tokens_out": 34157,
-          "tokens_in": 0,
-          "requests_completed": 181,
-          "ttft_ms_p50": 44.1,
-          "ttft_ms_p99": 60.4
-        },
-        {
-          "minute": 12,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 558.9,
-          "tokens_out": 33526,
-          "tokens_in": 0,
-          "requests_completed": 177,
-          "ttft_ms_p50": 44.2,
-          "ttft_ms_p99": 47.9
-        },
-        {
-          "minute": 13,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 568.4,
-          "tokens_out": 34113,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 139.2
-        },
-        {
-          "minute": 14,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 557.2,
-          "tokens_out": 33424,
-          "tokens_in": 0,
-          "requests_completed": 178,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 61.5
-        },
-        {
-          "minute": 15,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 565.5,
-          "tokens_out": 33942,
-          "tokens_in": 0,
-          "requests_completed": 181,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 61.7
-        },
-        {
-          "minute": 16,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 554.1,
-          "tokens_out": 33238,
-          "tokens_in": 0,
-          "requests_completed": 175,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 61.2
-        },
-        {
-          "minute": 17,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 563.7,
-          "tokens_out": 33832,
-          "tokens_in": 0,
-          "requests_completed": 178,
-          "ttft_ms_p50": 44.4,
-          "ttft_ms_p99": 61.7
-        },
-        {
-          "minute": 18,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 563.3,
-          "tokens_out": 33783,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 62.1
-        },
-        {
-          "minute": 19,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 565,
-          "tokens_out": 33912,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.6,
-          "ttft_ms_p99": 62.2
-        },
-        {
-          "minute": 20,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 563.3,
-          "tokens_out": 33773,
-          "tokens_in": 0,
-          "requests_completed": 179,
-          "ttft_ms_p50": 44.6,
-          "ttft_ms_p99": 61.6
-        },
-        {
-          "minute": 21,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 564.7,
-          "tokens_out": 33889,
-          "tokens_in": 0,
-          "requests_completed": 178,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 61.3
-        },
-        {
-          "minute": 22,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 564.3,
-          "tokens_out": 33853,
-          "tokens_in": 0,
-          "requests_completed": 179,
-          "ttft_ms_p50": 44.6,
-          "ttft_ms_p99": 61.1
-        },
-        {
-          "minute": 23,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 562.3,
-          "tokens_out": 33744,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 61.2
-        },
-        {
-          "minute": 24,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 569.5,
-          "tokens_out": 34180,
-          "tokens_in": 0,
-          "requests_completed": 180,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 61.3
-        },
-        {
-          "minute": 25,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 550.8,
-          "tokens_out": 33047,
-          "tokens_in": 0,
-          "requests_completed": 176,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 61.5
-        },
-        {
-          "minute": 26,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 562.5,
-          "tokens_out": 33749,
-          "tokens_in": 0,
-          "requests_completed": 178,
-          "ttft_ms_p50": 44.5,
-          "ttft_ms_p99": 63.9
-        },
-        {
-          "minute": 27,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 561.7,
-          "tokens_out": 33689,
-          "tokens_in": 0,
-          "requests_completed": 179,
-          "ttft_ms_p50": 44.7,
-          "ttft_ms_p99": 61.6
-        },
-        {
-          "minute": 28,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 566.5,
-          "tokens_out": 34010,
-          "tokens_in": 0,
-          "requests_completed": 181,
-          "ttft_ms_p50": 44.7,
-          "ttft_ms_p99": 141.7
-        },
-        {
-          "minute": 29,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 558.2,
-          "tokens_out": 33464,
-          "tokens_in": 0,
-          "requests_completed": 178,
-          "ttft_ms_p50": 44.3,
-          "ttft_ms_p99": 60.9
-        }
-      ],
-      "sustained_throughput_tokens_per_sec": 562.5,
-      "throttle_ratio": 0.966,
-      "throttle_onset_minute": null,
-      "ttft_p99_drift_ms": -0.3,
-      "throughput_post_warmup_reliability": {
-        "n": 28,
-        "mean": 562.5,
-        "std": 4.9,
-        "cv_pct": 0.86,
-        "stability": "stable",
-        "runs": [
-          566.2,
-          560.8,
-          565.3,
-          561.4,
-          561.9,
-          570.0,
-          558.3,
-          563.3,
-          552.7,
-          569.3,
-          558.9,
-          568.4,
-          557.2,
-          565.5,
-          554.1,
-          563.7,
-          563.3,
-          565.0,
-          563.3,
-          564.7,
-          564.3,
-          562.3,
-          569.5,
-          550.8,
-          562.5,
-          561.7,
-          566.5,
-          558.2
-        ]
-      }
-    },
-    "speculative": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 8,
-          "throughput_tokens_per_sec": 705.16,
-          "throughput_tokens_per_sec_per_chip": 705.16,
-          "elapsed_seconds_median": 49.7,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 32,
-          "throughput_tokens_per_sec": 703.58,
-          "throughput_tokens_per_sec_per_chip": 703.58,
-          "elapsed_seconds_median": 49.8,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 128,
-          "throughput_tokens_per_sec": 704.29,
-          "throughput_tokens_per_sec_per_chip": 704.29,
-          "elapsed_seconds_median": 49.7,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "burst": {
-      "sla_ttft_ms": 500,
-      "burst_steady_qps": 5,
-      "burst_peak_qps": 25,
-      "burst_duration_seconds": 30,
-      "burst_interval_seconds": 120,
-      "steady_requests_total": 1812,
-      "burst_requests_total": 2245,
-      "steady_ttft_p50_ms": 43.06,
-      "steady_ttft_p99_ms": 3985.36,
-      "burst_ttft_p50_ms": 57.82,
-      "burst_ttft_p99_ms": 99.11,
-      "sla_met_during_burst": true,
-      "burst_degradation_ratio": 0.025,
-      "results_by_cycle": [
-        {
-          "cycle": 1,
-          "steady_requests": 581,
-          "burst_requests": 760,
-          "steady_ttft_p99_ms": 5093.98,
-          "burst_ttft_p99_ms": 103.16
-        },
-        {
-          "cycle": 2,
-          "steady_requests": 595,
-          "burst_requests": 734,
-          "steady_ttft_p99_ms": 63.56,
-          "burst_ttft_p99_ms": 90.45
-        },
-        {
-          "cycle": 3,
-          "steady_requests": 636,
-          "burst_requests": 751,
-          "steady_ttft_p99_ms": 65.44,
-          "burst_ttft_p99_ms": 85.05
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": 0.61,
-    "baseline_delta": 0.01,
-    "valid": true,
-    "framework": "SGLang",
-    "precision": "BF16",
-    "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
-  },
-  "meta": {
-    "submitted_by": "Gong-K",
-    "submission_type": "individual",
-    "date": "2026-05-06",
-    "time": "11:21:34",
-    "run_id": "958afbbd",
-    "run_name": "nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": null,
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-06T11:19:15.947406+00:00",
-    "benchmark_end_time": "2026-05-06T11:21:34.758403+00:00",
-    "benchmark_elapsed_minutes": 74.2,
-    "model_load_seconds": 50,
-    "benchmark_elapsed_minutes_note": "Total across [offline, online, interactive, sustained, speculative, burst] scenarios.",
-    "scenario_dirs": {
-      "offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/offline",
-      "online": "results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/online",
-      "interactive": "results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/interactive",
-      "sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/sustained",
-      "speculative": "results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/speculative",
-      "burst": "results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/burst"
-    }
-  }
-}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/env_info.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/env_info.json
deleted file mode 100644
index 967b71e8..00000000
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/env_info.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-  "collected_at": "2026-04-30T00:00:00Z",
-  "accelerators": [
-    {
-      "index": 0,
-      "name": "NVIDIA A100-SXM4-40GB",
-      "memory_gb": 40,
-      "driver_version": "565.57.01",
-      "firmware_version": null
-    }
-  ],
-  "accelerator_topology": null,
-  "cpu": {
-    "model": "unknown",
-    "physical_cores": 1,
-    "logical_cores": 1,
-    "numa_nodes": 1
-  },
-  "system_memory_gb": 0,
-  "pcie_generation": "unknown",
-  "cpu_accelerator_bandwidth_gbs": null,
-  "network_interfaces": null,
-  "kernel_version": "unknown",
-  "runtime_version": "CUDA 12.8"
-}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/result.json
deleted file mode 100644
index 5105bc28..00000000
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/result.json
+++ /dev/null
@@ -1,963 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_C",
-  "implementation_id": "nvidia_sglang_c43a8309",
-  "chip": {
-    "name": "NVIDIA A100-SXM4-40GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 40,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "software": {
-    "framework": "SGLang",
-    "framework_version": "0.5.6",
-    "driver_version": "565.57.01",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.4 LTS",
-    "python_version": "3.10.20"
-  },
-  "model": {
-    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 8,
-    "precision": "BF16",
-    "effective_dtype": "bfloat16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original",
-    "_note": "suite model_id. Each precision level uses its own quantized checkpoint."
-  },
-  "task": {
-    "scenarios_run": [
-      "accuracy",
-      "offline",
-      "online",
-      "sustained"
-    ],
-    "precision_levels_run": [
-      "BF16",
-      "FP8",
-      "W8A8",
-      "W8A16",
-      "W4A16"
-    ],
-    "precision_levels_skipped": [
-      "FP16"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 3,
-    "extra_config": null
-  },
-  "metrics": {
-    "quantization": {
-      "results_by_precision": [
-        {
-          "precision": "BF16",
-          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-          "best_throughput_tokens_per_sec": 3160.74,
-          "accuracy_score": 0.57,
-          "accuracy_baseline_delta": 0.01,
-          "accuracy_valid": true,
-          "quality_efficiency": 1801.6,
-          "speedup_vs_bf16": 1,
-          "results_by_concurrency": [
-            {
-              "client_concurrency": 1,
-              "throughput_tokens_per_sec": 3149.6,
-              "throughput_tokens_per_sec_per_chip": 3149.6,
-              "elapsed_seconds_median": 11.4,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 4,
-              "throughput_tokens_per_sec": 3160.74,
-              "throughput_tokens_per_sec_per_chip": 3160.74,
-              "elapsed_seconds_median": 11.3,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 16,
-              "throughput_tokens_per_sec": 3148.17,
-              "throughput_tokens_per_sec_per_chip": 3148.17,
-              "elapsed_seconds_median": 11.3,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 64,
-              "throughput_tokens_per_sec": 3156.58,
-              "throughput_tokens_per_sec_per_chip": 3156.58,
-              "elapsed_seconds_median": 11.3,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            }
-          ],
-          "result_dir": "bf16",
-          "effective_dtype": "bfloat16",
-          "quantization_method": null
-        },
-        {
-          "precision": "W8A16",
-          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
-          "best_throughput_tokens_per_sec": 3396.91,
-          "accuracy_score": 0.58,
-          "accuracy_baseline_delta": -0.01,
-          "accuracy_valid": true,
-          "quality_efficiency": 1970.2,
-          "speedup_vs_bf16": 1.075,
-          "results_by_concurrency": [
-            {
-              "client_concurrency": 1,
-              "throughput_tokens_per_sec": 3396.91,
-              "throughput_tokens_per_sec_per_chip": 3396.91,
-              "elapsed_seconds_median": 10.6,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 4,
-              "throughput_tokens_per_sec": 3316.93,
-              "throughput_tokens_per_sec_per_chip": 3316.93,
-              "elapsed_seconds_median": 10.8,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 16,
-              "throughput_tokens_per_sec": 3387.33,
-              "throughput_tokens_per_sec_per_chip": 3387.33,
-              "elapsed_seconds_median": 10.6,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 64,
-              "throughput_tokens_per_sec": 3395.75,
-              "throughput_tokens_per_sec_per_chip": 3395.75,
-              "elapsed_seconds_median": 10.6,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            }
-          ],
-          "result_dir": "w8a16",
-          "effective_dtype": "auto",
-          "quantization_method": "compressed-tensors"
-        },
-        {
-          "precision": "W4A16",
-          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
-          "best_throughput_tokens_per_sec": 1817.91,
-          "accuracy_score": 0.56,
-          "accuracy_baseline_delta": -0.01,
-          "accuracy_valid": true,
-          "quality_efficiency": 1018,
-          "speedup_vs_bf16": 0.575,
-          "results_by_concurrency": [
-            {
-              "client_concurrency": 1,
-              "throughput_tokens_per_sec": 1808.4,
-              "throughput_tokens_per_sec_per_chip": 1808.4,
-              "elapsed_seconds_median": 19,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 4,
-              "throughput_tokens_per_sec": 1810.14,
-              "throughput_tokens_per_sec_per_chip": 1810.14,
-              "elapsed_seconds_median": 19,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 16,
-              "throughput_tokens_per_sec": 1810.03,
-              "throughput_tokens_per_sec_per_chip": 1810.03,
-              "elapsed_seconds_median": 19,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            },
-            {
-              "client_concurrency": 64,
-              "throughput_tokens_per_sec": 1817.91,
-              "throughput_tokens_per_sec_per_chip": 1817.91,
-              "elapsed_seconds_median": 19,
-              "peak_memory_gb": null,
-              "power_watts_avg": null,
-              "power_watts_peak": null,
-              "oom": false,
-              "_throughput_note": "output_only",
-              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-            }
-          ],
-          "result_dir": "w4a16",
-          "effective_dtype": "auto",
-          "quantization_method": "gptq"
-        }
-      ]
-    },
-    "derived": {},
-    "quantization_online": {
-      "results_by_precision": [
-        {
-          "precision": "BF16",
-          "max_valid_qps": 50,
-          "results_by_qps": [
-            {
-              "target_qps": 5,
-              "achieved_qps": 5,
-              "ttft_ms_p50": 44.84,
-              "ttft_ms_p90": 63.85,
-              "ttft_ms_p99": 1627.45,
-              "tpot_ms_p50": 15.7,
-              "tpot_ms_p90": 17.63,
-              "tpot_ms_p99": 19.03,
-              "elapsed_seconds_median": 66.2,
-              "sla_met": false
-            },
-            {
-              "target_qps": 10,
-              "achieved_qps": 10,
-              "ttft_ms_p50": 47,
-              "ttft_ms_p90": 56.65,
-              "ttft_ms_p99": 65.26,
-              "tpot_ms_p50": 21.61,
-              "tpot_ms_p90": 23.39,
-              "tpot_ms_p99": 26.07,
-              "elapsed_seconds_median": 32.9,
-              "sla_met": true
-            },
-            {
-              "target_qps": 25,
-              "achieved_qps": 25,
-              "ttft_ms_p50": 52.95,
-              "ttft_ms_p90": 66.65,
-              "ttft_ms_p99": 78.1,
-              "tpot_ms_p50": 35.4,
-              "tpot_ms_p90": 40.44,
-              "tpot_ms_p99": 49.08,
-              "elapsed_seconds_median": 17.1,
-              "sla_met": true
-            },
-            {
-              "target_qps": 50,
-              "achieved_qps": 50,
-              "ttft_ms_p50": 51.84,
-              "ttft_ms_p90": 67.78,
-              "ttft_ms_p99": 87.9,
-              "tpot_ms_p50": 41.22,
-              "tpot_ms_p90": 49.79,
-              "tpot_ms_p99": 75.08,
-              "elapsed_seconds_median": 12.9,
-              "sla_met": true
-            }
-          ]
-        },
-        {
-          "precision": "W8A16",
-          "max_valid_qps": 50,
-          "results_by_qps": [
-            {
-              "target_qps": 5,
-              "achieved_qps": 5,
-              "ttft_ms_p50": 36.98,
-              "ttft_ms_p90": 65.92,
-              "ttft_ms_p99": 1707.78,
-              "tpot_ms_p50": 9.64,
-              "tpot_ms_p90": 10.99,
-              "tpot_ms_p99": 14.19,
-              "elapsed_seconds_median": 64.8,
-              "sla_met": false
-            },
-            {
-              "target_qps": 10,
-              "achieved_qps": 10,
-              "ttft_ms_p50": 42.23,
-              "ttft_ms_p90": 54.06,
-              "ttft_ms_p99": 61.13,
-              "tpot_ms_p50": 15.34,
-              "tpot_ms_p90": 19.73,
-              "tpot_ms_p99": 21.49,
-              "elapsed_seconds_median": 31.8,
-              "sla_met": true
-            },
-            {
-              "target_qps": 25,
-              "achieved_qps": 25,
-              "ttft_ms_p50": 54.16,
-              "ttft_ms_p90": 70.81,
-              "ttft_ms_p99": 86.6,
-              "tpot_ms_p50": 38.87,
-              "tpot_ms_p90": 45.19,
-              "tpot_ms_p99": 56.31,
-              "elapsed_seconds_median": 17.6,
-              "sla_met": true
-            },
-            {
-              "target_qps": 50,
-              "achieved_qps": 50,
-              "ttft_ms_p50": 55.28,
-              "ttft_ms_p90": 74.41,
-              "ttft_ms_p99": 101.43,
-              "tpot_ms_p50": 47.2,
-              "tpot_ms_p90": 55.92,
-              "tpot_ms_p99": 82.54,
-              "elapsed_seconds_median": 14.1,
-              "sla_met": true
-            }
-          ]
-        },
-        {
-          "precision": "W4A16",
-          "max_valid_qps": 50,
-          "results_by_qps": [
-            {
-              "target_qps": 5,
-              "achieved_qps": 5,
-              "ttft_ms_p50": 57.96,
-              "ttft_ms_p90": 100.93,
-              "ttft_ms_p99": 1674.78,
-              "tpot_ms_p50": 23.16,
-              "tpot_ms_p90": 36.67,
-              "tpot_ms_p99": 42.7,
-              "elapsed_seconds_median": 66.6,
-              "sla_met": false
-            },
-            {
-              "target_qps": 10,
-              "achieved_qps": 10,
-              "ttft_ms_p50": 65.68,
-              "ttft_ms_p90": 85.76,
-              "ttft_ms_p99": 92.43,
-              "tpot_ms_p50": 42.17,
-              "tpot_ms_p90": 43.43,
-              "tpot_ms_p99": 46.09,
-              "elapsed_seconds_median": 35.7,
-              "sla_met": true
-            },
-            {
-              "target_qps": 25,
-              "achieved_qps": 25,
-              "ttft_ms_p50": 64.12,
-              "ttft_ms_p90": 88.09,
-              "ttft_ms_p99": 113.73,
-              "tpot_ms_p50": 53.25,
-              "tpot_ms_p90": 59.64,
-              "tpot_ms_p99": 73.73,
-              "elapsed_seconds_median": 20.9,
-              "sla_met": true
-            },
-            {
-              "target_qps": 50,
-              "achieved_qps": 50,
-              "ttft_ms_p50": 57.15,
-              "ttft_ms_p90": 81.87,
-              "ttft_ms_p99": 103.31,
-              "tpot_ms_p50": 55.67,
-              "tpot_ms_p90": 67.41,
-              "tpot_ms_p99": 86.73,
-              "elapsed_seconds_median": 16.4,
-              "sla_met": true
-            }
-          ]
-        }
-      ]
-    },
-    "quantization_sustained": {
-      "results_by_precision": [
-        {
-          "precision": "BF16",
-          "sustained_throughput_tokens_per_sec": 558.6,
-          "throttle_ratio": 0.889,
-          "throttle_onset_minute": 1,
-          "ttft_p99_drift_ms": -2930,
-          "sustained_concurrency": 8,
-          "duration_minutes": 15,
-          "samples": [
-            {
-              "minute": 1,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 510.3,
-              "tokens_out": 30617,
-              "tokens_in": 0,
-              "requests_completed": 168,
-              "ttft_ms_p50": 47,
-              "ttft_ms_p99": 2980.6
-            },
-            {
-              "minute": 2,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 566.2,
-              "tokens_out": 33989,
-              "tokens_in": 0,
-              "requests_completed": 185,
-              "ttft_ms_p50": 43.1,
-              "ttft_ms_p99": 59.5
-            },
-            {
-              "minute": 3,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 555.8,
-              "tokens_out": 33345,
-              "tokens_in": 0,
-              "requests_completed": 183,
-              "ttft_ms_p50": 43.1,
-              "ttft_ms_p99": 50.9
-            },
-            {
-              "minute": 4,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 574.1,
-              "tokens_out": 34447,
-              "tokens_in": 0,
-              "requests_completed": 183,
-              "ttft_ms_p50": 42.7,
-              "ttft_ms_p99": 59.1
-            },
-            {
-              "minute": 5,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 564.4,
-              "tokens_out": 33852,
-              "tokens_in": 0,
-              "requests_completed": 182,
-              "ttft_ms_p50": 43,
-              "ttft_ms_p99": 45.9
-            },
-            {
-              "minute": 6,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 552.2,
-              "tokens_out": 33145,
-              "tokens_in": 0,
-              "requests_completed": 180,
-              "ttft_ms_p50": 43.3,
-              "ttft_ms_p99": 59.3
-            },
-            {
-              "minute": 7,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 562.1,
-              "tokens_out": 33715,
-              "tokens_in": 0,
-              "requests_completed": 184,
-              "ttft_ms_p50": 43.1,
-              "ttft_ms_p99": 59.1
-            },
-            {
-              "minute": 8,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 562.6,
-              "tokens_out": 33751,
-              "tokens_in": 0,
-              "requests_completed": 183,
-              "ttft_ms_p50": 43,
-              "ttft_ms_p99": 58.6
-            },
-            {
-              "minute": 9,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 565.5,
-              "tokens_out": 33923,
-              "tokens_in": 0,
-              "requests_completed": 183,
-              "ttft_ms_p50": 43.4,
-              "ttft_ms_p99": 46.6
-            },
-            {
-              "minute": 10,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 559.5,
-              "tokens_out": 33594,
-              "tokens_in": 0,
-              "requests_completed": 180,
-              "ttft_ms_p50": 43.5,
-              "ttft_ms_p99": 59.5
-            },
-            {
-              "minute": 11,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 555.9,
-              "tokens_out": 33329,
-              "tokens_in": 0,
-              "requests_completed": 181,
-              "ttft_ms_p50": 43.2,
-              "ttft_ms_p99": 58.6
-            },
-            {
-              "minute": 12,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 561.2,
-              "tokens_out": 33679,
-              "tokens_in": 0,
-              "requests_completed": 183,
-              "ttft_ms_p50": 43.2,
-              "ttft_ms_p99": 59.9
-            },
-            {
-              "minute": 13,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 568,
-              "tokens_out": 34091,
-              "tokens_in": 0,
-              "requests_completed": 186,
-              "ttft_ms_p50": 43.4,
-              "ttft_ms_p99": 57.6
-            },
-            {
-              "minute": 14,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 562.5,
-              "tokens_out": 33735,
-              "tokens_in": 0,
-              "requests_completed": 183,
-              "ttft_ms_p50": 43.4,
-              "ttft_ms_p99": 50.6
-            }
-          ]
-        },
-        {
-          "precision": "W8A16",
-          "sustained_throughput_tokens_per_sec": 841.8,
-          "throttle_ratio": 0.902,
-          "throttle_onset_minute": null,
-          "ttft_p99_drift_ms": -3044.7,
-          "sustained_concurrency": 8,
-          "duration_minutes": 15,
-          "samples": [
-            {
-              "minute": 1,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 770,
-              "tokens_out": 46214,
-              "tokens_in": 0,
-              "requests_completed": 254,
-              "ttft_ms_p50": 35.2,
-              "ttft_ms_p99": 3097.4
-            },
-            {
-              "minute": 2,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 851.3,
-              "tokens_out": 51089,
-              "tokens_in": 0,
-              "requests_completed": 281,
-              "ttft_ms_p50": 34.7,
-              "ttft_ms_p99": 45.2
-            },
-            {
-              "minute": 3,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 851.8,
-              "tokens_out": 51090,
-              "tokens_in": 0,
-              "requests_completed": 275,
-              "ttft_ms_p50": 34.8,
-              "ttft_ms_p99": 52.2
-            },
-            {
-              "minute": 4,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 839.2,
-              "tokens_out": 50347,
-              "tokens_in": 0,
-              "requests_completed": 277,
-              "ttft_ms_p50": 34.8,
-              "ttft_ms_p99": 49.7
-            },
-            {
-              "minute": 5,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 849.4,
-              "tokens_out": 50977,
-              "tokens_in": 0,
-              "requests_completed": 278,
-              "ttft_ms_p50": 35,
-              "ttft_ms_p99": 53
-            },
-            {
-              "minute": 6,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 850.5,
-              "tokens_out": 51013,
-              "tokens_in": 0,
-              "requests_completed": 279,
-              "ttft_ms_p50": 34.8,
-              "ttft_ms_p99": 47.5
-            },
-            {
-              "minute": 7,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 850.2,
-              "tokens_out": 51029,
-              "tokens_in": 0,
-              "requests_completed": 275,
-              "ttft_ms_p50": 35.2,
-              "ttft_ms_p99": 52.9
-            },
-            {
-              "minute": 8,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 833.1,
-              "tokens_out": 49975,
-              "tokens_in": 0,
-              "requests_completed": 273,
-              "ttft_ms_p50": 35.1,
-              "ttft_ms_p99": 52.4
-            },
-            {
-              "minute": 9,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 853.8,
-              "tokens_out": 51245,
-              "tokens_in": 0,
-              "requests_completed": 281,
-              "ttft_ms_p50": 34.9,
-              "ttft_ms_p99": 47.7
-            },
-            {
-              "minute": 10,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 852.4,
-              "tokens_out": 51154,
-              "tokens_in": 0,
-              "requests_completed": 280,
-              "ttft_ms_p50": 35,
-              "ttft_ms_p99": 39.7
-            },
-            {
-              "minute": 11,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 849.4,
-              "tokens_out": 50955,
-              "tokens_in": 0,
-              "requests_completed": 277,
-              "ttft_ms_p50": 35,
-              "ttft_ms_p99": 51.7
-            },
-            {
-              "minute": 12,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 840.2,
-              "tokens_out": 50400,
-              "tokens_in": 0,
-              "requests_completed": 275,
-              "ttft_ms_p50": 34.9,
-              "ttft_ms_p99": 52.7
-            },
-            {
-              "minute": 13,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 850.7,
-              "tokens_out": 51044,
-              "tokens_in": 0,
-              "requests_completed": 281,
-              "ttft_ms_p50": 35,
-              "ttft_ms_p99": 47.5
-            },
-            {
-              "minute": 14,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 843.5,
-              "tokens_out": 50629,
-              "tokens_in": 0,
-              "requests_completed": 277,
-              "ttft_ms_p50": 35.2,
-              "ttft_ms_p99": 52.7
-            }
-          ]
-        },
-        {
-          "precision": "W4A16",
-          "sustained_throughput_tokens_per_sec": 760.9,
-          "throttle_ratio": 0.887,
-          "throttle_onset_minute": 1,
-          "ttft_p99_drift_ms": -2750.7,
-          "sustained_concurrency": 8,
-          "duration_minutes": 15,
-          "samples": [
-            {
-              "minute": 1,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 687.5,
-              "tokens_out": 41259,
-              "tokens_in": 0,
-              "requests_completed": 236,
-              "ttft_ms_p50": 36.8,
-              "ttft_ms_p99": 2802.1
-            },
-            {
-              "minute": 2,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 770.2,
-              "tokens_out": 46209,
-              "tokens_in": 0,
-              "requests_completed": 256,
-              "ttft_ms_p50": 35.4,
-              "ttft_ms_p99": 51.3
-            },
-            {
-              "minute": 3,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 764,
-              "tokens_out": 45832,
-              "tokens_in": 0,
-              "requests_completed": 258,
-              "ttft_ms_p50": 35.3,
-              "ttft_ms_p99": 51.2
-            },
-            {
-              "minute": 4,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 768.7,
-              "tokens_out": 46151,
-              "tokens_in": 0,
-              "requests_completed": 257,
-              "ttft_ms_p50": 35.5,
-              "ttft_ms_p99": 51.9
-            },
-            {
-              "minute": 5,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 766.7,
-              "tokens_out": 45997,
-              "tokens_in": 0,
-              "requests_completed": 258,
-              "ttft_ms_p50": 35.3,
-              "ttft_ms_p99": 51.5
-            },
-            {
-              "minute": 6,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 768.2,
-              "tokens_out": 46086,
-              "tokens_in": 0,
-              "requests_completed": 257,
-              "ttft_ms_p50": 35.4,
-              "ttft_ms_p99": 47.7
-            },
-            {
-              "minute": 7,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 764.3,
-              "tokens_out": 45881,
-              "tokens_in": 0,
-              "requests_completed": 258,
-              "ttft_ms_p50": 35.4,
-              "ttft_ms_p99": 51.4
-            },
-            {
-              "minute": 8,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 768.5,
-              "tokens_out": 46105,
-              "tokens_in": 0,
-              "requests_completed": 260,
-              "ttft_ms_p50": 35.3,
-              "ttft_ms_p99": 51.9
-            },
-            {
-              "minute": 9,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 762.5,
-              "tokens_out": 45749,
-              "tokens_in": 0,
-              "requests_completed": 253,
-              "ttft_ms_p50": 35.2,
-              "ttft_ms_p99": 52.1
-            },
-            {
-              "minute": 10,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 773,
-              "tokens_out": 46367,
-              "tokens_in": 0,
-              "requests_completed": 260,
-              "ttft_ms_p50": 35.5,
-              "ttft_ms_p99": 51.4
-            },
-            {
-              "minute": 11,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 761.1,
-              "tokens_out": 45663,
-              "tokens_in": 0,
-              "requests_completed": 254,
-              "ttft_ms_p50": 35.4,
-              "ttft_ms_p99": 49.6
-            },
-            {
-              "minute": 12,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 760.9,
-              "tokens_out": 45671,
-              "tokens_in": 0,
-              "requests_completed": 256,
-              "ttft_ms_p50": 35.4,
-              "ttft_ms_p99": 45.8
-            },
-            {
-              "minute": 13,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 775.3,
-              "tokens_out": 46524,
-              "tokens_in": 0,
-              "requests_completed": 259,
-              "ttft_ms_p50": 35.4,
-              "ttft_ms_p99": 51.6
-            },
-            {
-              "minute": 14,
-              "is_warmup": false,
-              "throughput_tokens_per_sec": 762.3,
-              "tokens_out": 45727,
-              "tokens_in": 0,
-              "requests_completed": 257,
-              "ttft_ms_p50": 35.3,
-              "ttft_ms_p99": 51.4
-            }
-          ]
-        }
-      ]
-    }
-  },
-  "accuracy": null,
-  "meta": {
-    "submitted_by": "Gong-K",
-    "submission_type": "individual",
-    "date": "2026-04-30",
-    "time": "08:29:45",
-    "run_id": "651fefa6",
-    "run_name": "nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
-    "env_info_file": "../../env_info.json",
-    "log_file": null,
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-04-30T08:26:37.946702+00:00",
-    "benchmark_end_time": "2026-04-30T08:29:45.379126+00:00",
-    "benchmark_elapsed_minutes": 76.2,
-    "model_load_seconds": 65.9,
-    "benchmark_elapsed_minutes_note": "Sum of per-precision benchmark_elapsed_minutes (excludes sleep gaps and orchestrator overhead).",
-    "scenario_dirs": {
-      "bf16/offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/bf16/offline",
-      "bf16/online": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/bf16/online",
-      "bf16/sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/bf16/sustained",
-      "fp8/offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/fp8/offline",
-      "fp8/online": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/fp8/online",
-      "fp8/sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/fp8/sustained",
-      "w8a8/offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w8a8/offline",
-      "w8a8/online": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w8a8/online",
-      "w8a8/sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w8a8/sustained",
-      "w8a16/offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w8a16/offline",
-      "w8a16/online": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w8a16/online",
-      "w8a16/sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w8a16/sustained",
-      "w4a16/offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w4a16/offline",
-      "w4a16/online": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w4a16/online",
-      "w4a16/sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_sglang_c43a8309_651fefa6/w4a16/sustained"
-    },
-    "precision_dirs": {
-      "BF16": "bf16",
-      "FP8": "fp8",
-      "W8A8": "w8a8",
-      "W8A16": "w8a16",
-      "W4A16": "w4a16"
-    },
-    "precision_model_map": {
-      "BF16": {
-        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-        "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
-        "dtype_override": "bfloat16"
-      },
-      "FP8": {
-        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
-        "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
-        "engine_kwargs": {
-          "quantization": "compressed-tensors"
-        },
-        "_note": "Static per-tensor FP8 (weights + activations). Requires Ampere+ (A100, A800, H20). Skipped automatically on FP16-only hardware."
-      },
-      "W8A8": {
-        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
-        "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
-        "engine_kwargs": {
-          "quantization": "compressed-tensors"
-        },
-        "_note": "INT8 weights + INT8 activations via compressed-tensors. Exercises native int8 tensor cores."
-      },
-      "W8A16": {
-        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
-        "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
-        "engine_kwargs": {
-          "quantization": "compressed-tensors"
-        },
-        "_note": "INT8 weights, FP16 activations. Weight-only quantization \u2014 reduces memory bandwidth, not compute dtype."
-      },
-      "W4A16": {
-        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
-        "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
-        "engine_kwargs": {
-          "quantization": "gptq"
-        },
-        "_note": "INT4 weights, FP16 activations via GPTQ Marlin kernels. Weight-only quantization \u2014 larger memory saving than W8A16."
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json
deleted file mode 100644
index e110ab18..00000000
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json
+++ /dev/null
@@ -1,583 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_D",
-  "implementation_id": "nvidia_sglang_c43a8309",
-  "chip": {
-    "name": "NVIDIA A100-SXM4-40GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 40,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-07T06:55:48.459765+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "NVIDIA A100-SXM4-40GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 40,
-        "driver_version": "565.57.01",
-        "firmware_version": null,
-        "compute_capability": "8.0",
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "AMD EPYC 7532 32-Core Processor",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.7,
-    "pcie_generation": "PCIe Gen 4",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_2",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu 22.04.4 LTS",
-    "python_version": "3.10.20",
-    "kernel_version": "5.15.0-60-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "SGLang",
-    "framework_version": "0.5.6",
-    "driver_version": "565.57.01",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.4 LTS",
-    "python_version": "3.10.20"
-  },
-  "model": {
-    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
-    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 8,
-    "precision": "BF16",
-    "effective_dtype": "bfloat16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenarios_run": [
-      "offline",
-      "interactive",
-      "sustained",
-      "online",
-      "speculative"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 2,
-    "extra_config": null
-  },
-  "metrics": {
-    "derived": {},
-    "offline": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 1,
-          "throughput_tokens_per_sec": 59.89,
-          "throughput_tokens_per_sec_per_chip": 59.89,
-          "elapsed_seconds_median": 214.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 4,
-          "throughput_tokens_per_sec": 59.82,
-          "throughput_tokens_per_sec_per_chip": 59.82,
-          "elapsed_seconds_median": 214.8,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "interactive": {
-      "ttft_ms_p50": 2987.93,
-      "ttft_ms_p90": 3151.31,
-      "ttft_ms_p99": 3185.98,
-      "tpot_ms_p50": 15.65,
-      "tpot_ms_p90": 15.72,
-      "tpot_ms_p99": 15.76,
-      "peak_memory_gb": null,
-      "elapsed_seconds_median": 650.6
-    },
-    "sustained": {
-      "sustained_concurrency": 8,
-      "duration_minutes": 30,
-      "warmup_minutes": 2,
-      "sample_interval_seconds": 60,
-      "samples": [
-        {
-          "minute": 1,
-          "is_warmup": true,
-          "throughput_tokens_per_sec": 37.5,
-          "tokens_out": 2250,
-          "tokens_in": 0,
-          "requests_completed": 10,
-          "ttft_ms_p50": 14034.2,
-          "ttft_ms_p99": 30569.8
-        },
-        {
-          "minute": 2,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23679.8,
-          "ttft_ms_p99": 29684.9
-        },
-        {
-          "minute": 3,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 22756.1,
-          "ttft_ms_p99": 29093.4
-        },
-        {
-          "minute": 4,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23284,
-          "ttft_ms_p99": 29407.4
-        },
-        {
-          "minute": 5,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23627,
-          "ttft_ms_p99": 29689.4
-        },
-        {
-          "minute": 6,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23576.2,
-          "ttft_ms_p99": 29714.6
-        },
-        {
-          "minute": 7,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23169.5,
-          "ttft_ms_p99": 29430
-        },
-        {
-          "minute": 8,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23525.8,
-          "ttft_ms_p99": 29430.1
-        },
-        {
-          "minute": 9,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23823.6,
-          "ttft_ms_p99": 29827.2
-        },
-        {
-          "minute": 10,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 22851.5,
-          "ttft_ms_p99": 29426.4
-        },
-        {
-          "minute": 11,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 37.5,
-          "tokens_out": 2250,
-          "tokens_in": 0,
-          "requests_completed": 10,
-          "ttft_ms_p50": 23268.9,
-          "ttft_ms_p99": 29167.7
-        },
-        {
-          "minute": 12,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23677.8,
-          "ttft_ms_p99": 29717.6
-        },
-        {
-          "minute": 13,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23598.2,
-          "ttft_ms_p99": 29748
-        },
-        {
-          "minute": 14,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23189.8,
-          "ttft_ms_p99": 29437.1
-        },
-        {
-          "minute": 15,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23568.1,
-          "ttft_ms_p99": 29461.6
-        },
-        {
-          "minute": 16,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23841.9,
-          "ttft_ms_p99": 29818.9
-        },
-        {
-          "minute": 17,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 22839,
-          "ttft_ms_p99": 29428.4
-        },
-        {
-          "minute": 18,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23356.8,
-          "ttft_ms_p99": 29448.1
-        },
-        {
-          "minute": 19,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23860,
-          "ttft_ms_p99": 29836.8
-        },
-        {
-          "minute": 20,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 22877.8,
-          "ttft_ms_p99": 29251.6
-        },
-        {
-          "minute": 21,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23360.2,
-          "ttft_ms_p99": 29503.3
-        },
-        {
-          "minute": 22,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 37.5,
-          "tokens_out": 2250,
-          "tokens_in": 0,
-          "requests_completed": 10,
-          "ttft_ms_p50": 23633.9,
-          "ttft_ms_p99": 29457.5
-        },
-        {
-          "minute": 23,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23851.7,
-          "ttft_ms_p99": 29866.9
-        },
-        {
-          "minute": 24,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 22862.3,
-          "ttft_ms_p99": 29426.1
-        },
-        {
-          "minute": 25,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23381.4,
-          "ttft_ms_p99": 29497.2
-        },
-        {
-          "minute": 26,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23862.4,
-          "ttft_ms_p99": 29847.2
-        },
-        {
-          "minute": 27,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.3,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 22872.5,
-          "ttft_ms_p99": 29246.9
-        },
-        {
-          "minute": 28,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23368.3,
-          "ttft_ms_p99": 29473.1
-        },
-        {
-          "minute": 29,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 56.2,
-          "tokens_out": 3375,
-          "tokens_in": 0,
-          "requests_completed": 15,
-          "ttft_ms_p50": 23691.2,
-          "ttft_ms_p99": 29750.9
-        }
-      ],
-      "sustained_throughput_tokens_per_sec": 54.9,
-      "throttle_ratio": 0.666,
-      "throttle_onset_minute": 11,
-      "ttft_p99_drift_ms": 66,
-      "throughput_post_warmup_reliability": {
-        "n": 28,
-        "mean": 54.9,
-        "std": 4.9,
-        "cv_pct": 8.95,
-        "stability": "high-variance",
-        "runs": [
-          56.2,
-          56.3,
-          56.2,
-          56.3,
-          56.3,
-          56.2,
-          56.2,
-          56.2,
-          56.3,
-          37.5,
-          56.3,
-          56.2,
-          56.3,
-          56.2,
-          56.3,
-          56.2,
-          56.3,
-          56.2,
-          56.3,
-          56.2,
-          37.5,
-          56.3,
-          56.2,
-          56.2,
-          56.3,
-          56.3,
-          56.2,
-          56.2
-        ]
-      }
-    },
-    "online": {
-      "sla_ttft_ms": 5000,
-      "max_valid_qps": 0,
-      "results_by_qps": [
-        {
-          "target_qps": 0.5,
-          "achieved_qps": 0.5,
-          "ttft_ms_p50": 112272.07,
-          "ttft_ms_p90": 202401.64,
-          "ttft_ms_p99": 216182.98,
-          "tpot_ms_p50": 52.34,
-          "tpot_ms_p90": 78.65,
-          "tpot_ms_p99": 80.48,
-          "elapsed_seconds_median": 413.5,
-          "sla_met": false
-        },
-        {
-          "target_qps": 1,
-          "achieved_qps": 1,
-          "ttft_ms_p50": 145998.66,
-          "ttft_ms_p90": 264672.22,
-          "ttft_ms_p99": 294893.64,
-          "tpot_ms_p50": 52.5,
-          "tpot_ms_p90": 78.93,
-          "tpot_ms_p99": 80.57,
-          "elapsed_seconds_median": 414.8,
-          "sla_met": false
-        },
-        {
-          "target_qps": 2,
-          "achieved_qps": 2,
-          "ttft_ms_p50": 179802.9,
-          "ttft_ms_p90": 322496.7,
-          "ttft_ms_p99": 356490.83,
-          "tpot_ms_p50": 52.59,
-          "tpot_ms_p90": 79.01,
-          "tpot_ms_p99": 80.69,
-          "elapsed_seconds_median": 412.7,
-          "sla_met": false
-        }
-      ]
-    },
-    "speculative": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 1,
-          "throughput_tokens_per_sec": 36.86,
-          "throughput_tokens_per_sec_per_chip": 36.86,
-          "elapsed_seconds_median": 348.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 4,
-          "throughput_tokens_per_sec": 36.85,
-          "throughput_tokens_per_sec_per_chip": 36.85,
-          "elapsed_seconds_median": 348.7,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": 0.57,
-    "baseline_delta": 0.01,
-    "valid": true,
-    "framework": "SGLang",
-    "precision": "BF16",
-    "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
-  },
-  "meta": {
-    "submitted_by": "Gong-K",
-    "submission_type": "individual",
-    "date": "2026-05-07",
-    "time": "07:22:09",
-    "run_id": "99c43b97",
-    "run_name": "nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": null,
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-07T07:00:40.025406+00:00",
-    "benchmark_end_time": "2026-05-07T07:22:09.476338+00:00",
-    "benchmark_elapsed_minutes": 150.5,
-    "model_load_seconds": 52.9,
-    "benchmark_elapsed_minutes_note": "Total across [offline, interactive, sustained, online, speculative] scenarios.",
-    "scenario_dirs": {
-      "offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/offline",
-      "interactive": "results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/interactive",
-      "sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/sustained",
-      "online": "results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/online",
-      "speculative": "results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/speculative"
-    }
-  }
-}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json
deleted file mode 100644
index 54c9f403..00000000
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json
+++ /dev/null
@@ -1,389 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_F",
-  "implementation_id": "nvidia_sglang_c43a8309",
-  "chip": {
-    "name": "NVIDIA A100-SXM4-40GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 40,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-07T10:52:35.700123+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "NVIDIA A100-SXM4-40GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 40,
-        "driver_version": "565.57.01",
-        "firmware_version": null,
-        "compute_capability": "8.0",
-        "supports_bf16": true
-      }
-    ],
-    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "AMD EPYC 7532 32-Core Processor",
-      "physical_cores": 64,
-      "logical_cores": 128,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 1007.7,
-    "pcie_generation": "PCIe Gen 4",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_2",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu 22.04.4 LTS",
-    "python_version": "3.10.20",
-    "kernel_version": "5.15.0-60-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "SGLang",
-    "framework_version": "0.5.6",
-    "driver_version": "565.57.01",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.4 LTS",
-    "python_version": "3.10.20"
-  },
-  "model": {
-    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 0.5,
-    "precision": "BF16",
-    "effective_dtype": "bfloat16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenarios_run": [
-      "offline",
-      "online",
-      "interactive",
-      "sustained"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 3,
-    "extra_config": null
-  },
-  "metrics": {
-    "derived": {},
-    "offline": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 4,
-          "throughput_tokens_per_sec": 11447.71,
-          "throughput_tokens_per_sec_per_chip": 11447.71,
-          "elapsed_seconds_median": 3.7,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 16,
-          "throughput_tokens_per_sec": 11507.48,
-          "throughput_tokens_per_sec_per_chip": 11507.48,
-          "elapsed_seconds_median": 3.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 64,
-          "throughput_tokens_per_sec": 11509.2,
-          "throughput_tokens_per_sec_per_chip": 11509.2,
-          "elapsed_seconds_median": 3.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 40,
-      "results_by_qps": [
-        {
-          "target_qps": 10,
-          "achieved_qps": 10,
-          "ttft_ms_p50": 18.63,
-          "ttft_ms_p90": 31.07,
-          "ttft_ms_p99": 1226.53,
-          "tpot_ms_p50": 2.56,
-          "tpot_ms_p90": 3.01,
-          "tpot_ms_p99": 4.21,
-          "elapsed_seconds_median": 31.9,
-          "sla_met": false
-        },
-        {
-          "target_qps": 40,
-          "achieved_qps": 40,
-          "ttft_ms_p50": 34.28,
-          "ttft_ms_p90": 41.32,
-          "ttft_ms_p99": 44.41,
-          "tpot_ms_p50": 20.08,
-          "tpot_ms_p90": 24.83,
-          "tpot_ms_p99": 31.82,
-          "elapsed_seconds_median": 10,
-          "sla_met": true
-        }
-      ]
-    },
-    "interactive": {
-      "ttft_ms_p50": 16.46,
-      "ttft_ms_p90": 17.26,
-      "ttft_ms_p99": 18.42,
-      "tpot_ms_p50": 1.89,
-      "tpot_ms_p90": 1.91,
-      "tpot_ms_p99": 1.99,
-      "peak_memory_gb": null,
-      "elapsed_seconds_median": 56.5
-    },
-    "sustained": {
-      "sustained_concurrency": 32,
-      "duration_minutes": 15,
-      "warmup_minutes": 1,
-      "sample_interval_seconds": 60,
-      "samples": [
-        {
-          "minute": 1,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 6616.4,
-          "tokens_out": 397010,
-          "tokens_in": 0,
-          "requests_completed": 2131,
-          "ttft_ms_p50": 19.6,
-          "ttft_ms_p99": 4749.5
-        },
-        {
-          "minute": 2,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7181.7,
-          "tokens_out": 430976,
-          "tokens_in": 0,
-          "requests_completed": 2317,
-          "ttft_ms_p50": 19.3,
-          "ttft_ms_p99": 41.7
-        },
-        {
-          "minute": 3,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7188.9,
-          "tokens_out": 431427,
-          "tokens_in": 0,
-          "requests_completed": 2312,
-          "ttft_ms_p50": 19.1,
-          "ttft_ms_p99": 41
-        },
-        {
-          "minute": 4,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7110.2,
-          "tokens_out": 426673,
-          "tokens_in": 0,
-          "requests_completed": 2292,
-          "ttft_ms_p50": 19.4,
-          "ttft_ms_p99": 34
-        },
-        {
-          "minute": 5,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7106.5,
-          "tokens_out": 426157,
-          "tokens_in": 0,
-          "requests_completed": 2287,
-          "ttft_ms_p50": 19.2,
-          "ttft_ms_p99": 40.6
-        },
-        {
-          "minute": 6,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7144.4,
-          "tokens_out": 428781,
-          "tokens_in": 0,
-          "requests_completed": 2311,
-          "ttft_ms_p50": 19.2,
-          "ttft_ms_p99": 33.6
-        },
-        {
-          "minute": 7,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7158.3,
-          "tokens_out": 429690,
-          "tokens_in": 0,
-          "requests_completed": 2306,
-          "ttft_ms_p50": 19.3,
-          "ttft_ms_p99": 40.3
-        },
-        {
-          "minute": 8,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7020,
-          "tokens_out": 421197,
-          "tokens_in": 0,
-          "requests_completed": 2265,
-          "ttft_ms_p50": 19.3,
-          "ttft_ms_p99": 41.4
-        },
-        {
-          "minute": 9,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7160.7,
-          "tokens_out": 429349,
-          "tokens_in": 0,
-          "requests_completed": 2303,
-          "ttft_ms_p50": 19.4,
-          "ttft_ms_p99": 41.1
-        },
-        {
-          "minute": 10,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7183.9,
-          "tokens_out": 431115,
-          "tokens_in": 0,
-          "requests_completed": 2319,
-          "ttft_ms_p50": 19.4,
-          "ttft_ms_p99": 38
-        },
-        {
-          "minute": 11,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7180.8,
-          "tokens_out": 431065,
-          "tokens_in": 0,
-          "requests_completed": 2308,
-          "ttft_ms_p50": 19.2,
-          "ttft_ms_p99": 34.1
-        },
-        {
-          "minute": 12,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7127.9,
-          "tokens_out": 427694,
-          "tokens_in": 0,
-          "requests_completed": 2301,
-          "ttft_ms_p50": 19.3,
-          "ttft_ms_p99": 34.3
-        },
-        {
-          "minute": 13,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7111.1,
-          "tokens_out": 426396,
-          "tokens_in": 0,
-          "requests_completed": 2282,
-          "ttft_ms_p50": 19.2,
-          "ttft_ms_p99": 41
-        },
-        {
-          "minute": 14,
-          "is_warmup": false,
-          "throughput_tokens_per_sec": 7044.6,
-          "tokens_out": 422801,
-          "tokens_in": 0,
-          "requests_completed": 2269,
-          "ttft_ms_p50": 19.4,
-          "ttft_ms_p99": 41.8
-        }
-      ],
-      "sustained_throughput_tokens_per_sec": 7095.4,
-      "throttle_ratio": 0.92,
-      "throttle_onset_minute": null,
-      "ttft_p99_drift_ms": -4707.7,
-      "throughput_post_warmup_reliability": {
-        "n": 14,
-        "mean": 7095.4,
-        "std": 147.1,
-        "cv_pct": 2.07,
-        "stability": "stable",
-        "runs": [
-          6616.4,
-          7181.7,
-          7188.9,
-          7110.2,
-          7106.5,
-          7144.4,
-          7158.3,
-          7020.0,
-          7160.7,
-          7183.9,
-          7180.8,
-          7127.9,
-          7111.1,
-          7044.6
-        ]
-      }
-    }
-  },
-  "accuracy": {
-    "subset_score": 0.41,
-    "baseline_delta": 0.03,
-    "valid": true,
-    "framework": "SGLang",
-    "precision": "BF16",
-    "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
-  },
-  "meta": {
-    "submitted_by": "Gong-K",
-    "submission_type": "individual",
-    "date": "2026-05-07",
-    "time": "10:56:30",
-    "run_id": "435424a8",
-    "run_name": "nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": null,
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-07T10:55:44.424768+00:00",
-    "benchmark_end_time": "2026-05-07T10:56:30.330070+00:00",
-    "benchmark_elapsed_minutes": 20.7,
-    "model_load_seconds": 33.4,
-    "benchmark_elapsed_minutes_note": "Total across [offline, online, interactive, sustained] scenarios.",
-    "scenario_dirs": {
-      "offline": "results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/offline",
-      "online": "results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/online",
-      "interactive": "results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/interactive",
-      "sustained": "results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/sustained"
-    }
-  }
-}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/accuracy/accuracy.json
new file mode 100644
index 00000000..9930c9e2
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.77,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/burst/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/burst/result.json
new file mode 100644
index 00000000..c39a27f2
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/burst/result.json
@@ -0,0 +1,229 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_B",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T22:51:04.801985+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_revision": "50fd307e57011801c7833c87efa1984ddf2db42f",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "burst",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "burst": {
+      "sla_ttft_ms": 1000,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 98.54,
+      "steady_ttft_p99_ms": 201.51,
+      "burst_ttft_p50_ms": 140.87,
+      "burst_ttft_p99_ms": 384.05,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 1.906,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 364.09,
+          "burst_ttft_p99_ms": 404.43
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 173.75,
+          "burst_ttft_p99_ms": 391.83
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 174.59,
+          "burst_ttft_p99_ms": 340.29
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "00:44:05",
+    "run_id": "40a62dd1",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-08T00:35:27.385471+00:00",
+    "benchmark_end_time": "2026-05-08T00:44:05.823975+00:00",
+    "benchmark_elapsed_minutes": 8.6,
+    "model_load_seconds": 444.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/env_info.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/env_info.json
new file mode 100644
index 00000000..4eeed5c5
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/env_info.json
@@ -0,0 +1,118 @@
+{
+  "collected_at": "2026-05-07T22:51:04.801985+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 1,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 2,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 3,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 4,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 5,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 6,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 7,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "intra_node_interconnect": "NVLink",
+  "cpu": {
+    "model": "AMD EPYC 7532 32-Core Processor",
+    "physical_cores": 64,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/interactive/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/interactive/result.json
new file mode 100644
index 00000000..4f2ece9d
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/interactive/result.json
@@ -0,0 +1,201 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_B",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T22:51:04.801985+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_revision": "50fd307e57011801c7833c87efa1984ddf2db42f",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 72.21,
+      "ttft_ms_p90": 78.15,
+      "ttft_ms_p99": 308.29,
+      "tpot_ms_p50": 18.52,
+      "tpot_ms_p90": 18.58,
+      "tpot_ms_p99": 18.6,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 176.9
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "00:26:55",
+    "run_id": "40a62dd1",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-08T00:18:03.615707+00:00",
+    "benchmark_end_time": "2026-05-08T00:26:55.443402+00:00",
+    "benchmark_elapsed_minutes": 8.9,
+    "model_load_seconds": 446.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/offline/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/offline/result.json
new file mode 100644
index 00000000..76d2c725
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/offline/result.json
@@ -0,0 +1,231 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_B",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T22:51:04.801985+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_revision": "50fd307e57011801c7833c87efa1984ddf2db42f",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 2234.48,
+          "throughput_tokens_per_sec_per_chip": 279.31,
+          "elapsed_seconds_median": 15.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 2233.83,
+          "throughput_tokens_per_sec_per_chip": 279.23,
+          "elapsed_seconds_median": 15.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 2236.02,
+          "throughput_tokens_per_sec_per_chip": 279.5,
+          "elapsed_seconds_median": 15.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "23:12:06",
+    "run_id": "40a62dd1",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T23:08:55.136710+00:00",
+    "benchmark_end_time": "2026-05-07T23:12:06.765021+00:00",
+    "benchmark_elapsed_minutes": 3.2,
+    "model_load_seconds": 480.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/online/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/online/result.json
new file mode 100644
index 00000000..4de35ed5
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/online/result.json
@@ -0,0 +1,245 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_B",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T22:51:04.801985+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_revision": "50fd307e57011801c7833c87efa1984ddf2db42f",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 1000,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 92.29,
+          "ttft_ms_p90": 136.26,
+          "ttft_ms_p99": 184.14,
+          "tpot_ms_p50": 25.88,
+          "tpot_ms_p90": 27.81,
+          "tpot_ms_p99": 30.42,
+          "elapsed_seconds_median": 104.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 96.6,
+          "ttft_ms_p90": 143.81,
+          "ttft_ms_p99": 166.6,
+          "tpot_ms_p50": 32.04,
+          "tpot_ms_p90": 34.57,
+          "tpot_ms_p99": 37.5,
+          "elapsed_seconds_median": 47.5,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 115.36,
+          "ttft_ms_p90": 157.84,
+          "ttft_ms_p99": 185.54,
+          "tpot_ms_p50": 47.27,
+          "tpot_ms_p90": 54.71,
+          "tpot_ms_p99": 57.26,
+          "elapsed_seconds_median": 26.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 135.31,
+          "ttft_ms_p90": 174.62,
+          "ttft_ms_p99": 789.97,
+          "tpot_ms_p50": 56.94,
+          "tpot_ms_p90": 71.74,
+          "tpot_ms_p99": 146.67,
+          "elapsed_seconds_median": 17.6,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "23:30:30",
+    "run_id": "40a62dd1",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T23:20:37.906520+00:00",
+    "benchmark_end_time": "2026-05-07T23:30:30.833319+00:00",
+    "benchmark_elapsed_minutes": 9.9,
+    "model_load_seconds": 461.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/result.json
new file mode 100644
index 00000000..d9b1803e
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/result.json
@@ -0,0 +1,650 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_B",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T22:51:04.801985+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_revision": "50fd307e57011801c7833c87efa1984ddf2db42f",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained",
+      "interactive",
+      "burst"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 2234.48,
+          "throughput_tokens_per_sec_per_chip": 279.31,
+          "elapsed_seconds_median": 15.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 2233.83,
+          "throughput_tokens_per_sec_per_chip": 279.23,
+          "elapsed_seconds_median": 15.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 2236.02,
+          "throughput_tokens_per_sec_per_chip": 279.5,
+          "elapsed_seconds_median": 15.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 1000,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 92.29,
+          "ttft_ms_p90": 136.26,
+          "ttft_ms_p99": 184.14,
+          "tpot_ms_p50": 25.88,
+          "tpot_ms_p90": 27.81,
+          "tpot_ms_p99": 30.42,
+          "elapsed_seconds_median": 104.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 96.6,
+          "ttft_ms_p90": 143.81,
+          "ttft_ms_p99": 166.6,
+          "tpot_ms_p50": 32.04,
+          "tpot_ms_p90": 34.57,
+          "tpot_ms_p99": 37.5,
+          "elapsed_seconds_median": 47.5,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 115.36,
+          "ttft_ms_p90": 157.84,
+          "ttft_ms_p99": 185.54,
+          "tpot_ms_p50": 47.27,
+          "tpot_ms_p90": 54.71,
+          "tpot_ms_p99": 57.26,
+          "elapsed_seconds_median": 26.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 135.31,
+          "ttft_ms_p90": 174.62,
+          "ttft_ms_p99": 789.97,
+          "tpot_ms_p50": 56.94,
+          "tpot_ms_p90": 71.74,
+          "tpot_ms_p99": 146.67,
+          "elapsed_seconds_median": 17.6,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 4,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 166.1,
+          "tokens_out": 9973,
+          "tokens_in": 0,
+          "requests_completed": 54,
+          "ttft_ms_p50": 97.1,
+          "ttft_ms_p99": 745.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.2,
+          "tokens_out": 10872,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 95.8,
+          "ttft_ms_p99": 150.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 186.1,
+          "tokens_out": 11167,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 138.2
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.3,
+          "tokens_out": 10997,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.8,
+          "ttft_ms_p99": 143.4
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.9,
+          "tokens_out": 10911,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.6,
+          "ttft_ms_p99": 114.2
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 179.2,
+          "tokens_out": 10755,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.0,
+          "ttft_ms_p99": 131.9
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.2,
+          "tokens_out": 10992,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 142.5
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.1,
+          "tokens_out": 10805,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.4,
+          "ttft_ms_p99": 116.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.9,
+          "tokens_out": 10855,
+          "tokens_in": 0,
+          "requests_completed": 56,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 138.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 185.0,
+          "tokens_out": 11100,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 112.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.6,
+          "tokens_out": 10833,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 132.8
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 185.3,
+          "tokens_out": 11128,
+          "tokens_in": 0,
+          "requests_completed": 60,
+          "ttft_ms_p50": 93.7,
+          "ttft_ms_p99": 143.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.0,
+          "tokens_out": 10859,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 147.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 182.3,
+          "tokens_out": 10933,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.4,
+          "ttft_ms_p99": 143.0
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 179.6,
+          "tokens_out": 10776,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 96.8
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.8,
+          "tokens_out": 10736,
+          "tokens_in": 0,
+          "requests_completed": 55,
+          "ttft_ms_p50": 93.0,
+          "ttft_ms_p99": 158.0
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 187.3,
+          "tokens_out": 11233,
+          "tokens_in": 0,
+          "requests_completed": 60,
+          "ttft_ms_p50": 92.9,
+          "ttft_ms_p99": 111.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.4,
+          "tokens_out": 10885,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 92.8,
+          "ttft_ms_p99": 155.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.1,
+          "tokens_out": 10985,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 92.7,
+          "ttft_ms_p99": 142.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.8,
+          "tokens_out": 11032,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 115.7
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 177.4,
+          "tokens_out": 10641,
+          "tokens_in": 0,
+          "requests_completed": 56,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 159.1
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 182.3,
+          "tokens_out": 10937,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.0,
+          "ttft_ms_p99": 95.0
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.8,
+          "tokens_out": 10723,
+          "tokens_in": 0,
+          "requests_completed": 55,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 137.9
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.9,
+          "tokens_out": 10856,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.1,
+          "ttft_ms_p99": 138.4
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 184.6,
+          "tokens_out": 11084,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 131.4
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 186.9,
+          "tokens_out": 11212,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 137.1
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 182.4,
+          "tokens_out": 10943,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 96.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.6,
+          "tokens_out": 10722,
+          "tokens_in": 0,
+          "requests_completed": 56,
+          "ttft_ms_p50": 93.5,
+          "ttft_ms_p99": 142.5
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.7,
+          "tokens_out": 10717,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.6,
+          "ttft_ms_p99": 138.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 182.0,
+      "throttle_ratio": 0.947,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -12.7
+    },
+    "interactive": {
+      "ttft_ms_p50": 72.21,
+      "ttft_ms_p90": 78.15,
+      "ttft_ms_p99": 308.29,
+      "tpot_ms_p50": 18.52,
+      "tpot_ms_p90": 18.58,
+      "tpot_ms_p99": 18.6,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 176.9
+    },
+    "burst": {
+      "sla_ttft_ms": 1000,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 98.54,
+      "steady_ttft_p99_ms": 201.51,
+      "burst_ttft_p50_ms": 140.87,
+      "burst_ttft_p99_ms": 384.05,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 1.906,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 364.09,
+          "burst_ttft_p99_ms": 404.43
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 173.75,
+          "burst_ttft_p99_ms": 391.83
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 174.59,
+          "burst_ttft_p99_ms": 340.29
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.77,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "23:12:06",
+    "run_id": "40a62dd1",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T23:08:55.136710+00:00",
+    "benchmark_end_time": "2026-05-07T23:12:06.765021+00:00",
+    "benchmark_elapsed_minutes": 60.7,
+    "model_load_seconds": 480.4,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained', 'interactive', 'burst'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/offline",
+      "online": "results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/online",
+      "sustained": "results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/sustained",
+      "interactive": "results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/interactive",
+      "burst": "results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/burst"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/sustained/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/sustained/result.json
new file mode 100644
index 00000000..0cc90236
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1/sustained/result.json
@@ -0,0 +1,493 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_B",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T22:51:04.801985+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_revision": "50fd307e57011801c7833c87efa1984ddf2db42f",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 4,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 166.1,
+          "tokens_out": 9973,
+          "tokens_in": 0,
+          "requests_completed": 54,
+          "ttft_ms_p50": 97.1,
+          "ttft_ms_p99": 745.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.2,
+          "tokens_out": 10872,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 95.8,
+          "ttft_ms_p99": 150.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 186.1,
+          "tokens_out": 11167,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 138.2
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.3,
+          "tokens_out": 10997,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.8,
+          "ttft_ms_p99": 143.4
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.9,
+          "tokens_out": 10911,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.6,
+          "ttft_ms_p99": 114.2
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 179.2,
+          "tokens_out": 10755,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.0,
+          "ttft_ms_p99": 131.9
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.2,
+          "tokens_out": 10992,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 142.5
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.1,
+          "tokens_out": 10805,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.4,
+          "ttft_ms_p99": 116.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.9,
+          "tokens_out": 10855,
+          "tokens_in": 0,
+          "requests_completed": 56,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 138.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 185.0,
+          "tokens_out": 11100,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 112.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.6,
+          "tokens_out": 10833,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 132.8
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 185.3,
+          "tokens_out": 11128,
+          "tokens_in": 0,
+          "requests_completed": 60,
+          "ttft_ms_p50": 93.7,
+          "ttft_ms_p99": 143.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.0,
+          "tokens_out": 10859,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 147.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 182.3,
+          "tokens_out": 10933,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.4,
+          "ttft_ms_p99": 143.0
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 179.6,
+          "tokens_out": 10776,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 96.8
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.8,
+          "tokens_out": 10736,
+          "tokens_in": 0,
+          "requests_completed": 55,
+          "ttft_ms_p50": 93.0,
+          "ttft_ms_p99": 158.0
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 187.3,
+          "tokens_out": 11233,
+          "tokens_in": 0,
+          "requests_completed": 60,
+          "ttft_ms_p50": 92.9,
+          "ttft_ms_p99": 111.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 181.4,
+          "tokens_out": 10885,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 92.8,
+          "ttft_ms_p99": 155.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.1,
+          "tokens_out": 10985,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 92.7,
+          "ttft_ms_p99": 142.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 183.8,
+          "tokens_out": 11032,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 115.7
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 177.4,
+          "tokens_out": 10641,
+          "tokens_in": 0,
+          "requests_completed": 56,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 159.1
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 182.3,
+          "tokens_out": 10937,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.0,
+          "ttft_ms_p99": 95.0
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.8,
+          "tokens_out": 10723,
+          "tokens_in": 0,
+          "requests_completed": 55,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 137.9
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 180.9,
+          "tokens_out": 10856,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.1,
+          "ttft_ms_p99": 138.4
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 184.6,
+          "tokens_out": 11084,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 131.4
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 186.9,
+          "tokens_out": 11212,
+          "tokens_in": 0,
+          "requests_completed": 58,
+          "ttft_ms_p50": 93.2,
+          "ttft_ms_p99": 137.1
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 182.4,
+          "tokens_out": 10943,
+          "tokens_in": 0,
+          "requests_completed": 59,
+          "ttft_ms_p50": 93.3,
+          "ttft_ms_p99": 96.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.6,
+          "tokens_out": 10722,
+          "tokens_in": 0,
+          "requests_completed": 56,
+          "ttft_ms_p50": 93.5,
+          "ttft_ms_p99": 142.5
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 178.7,
+          "tokens_out": 10717,
+          "tokens_in": 0,
+          "requests_completed": 57,
+          "ttft_ms_p50": 93.6,
+          "ttft_ms_p99": 138.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 182.0,
+      "throttle_ratio": 0.947,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -12.7
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "00:09:25",
+    "run_id": "40a62dd1",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_B_nvidia_sglang_c43a8309_40a62dd1",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T23:39:17.027251+00:00",
+    "benchmark_end_time": "2026-05-08T00:09:25.500189+00:00",
+    "benchmark_elapsed_minutes": 30.1,
+    "model_load_seconds": 456.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/offline/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/offline/result.json
new file mode 100644
index 00000000..a6a37cf6
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/offline/result.json
@@ -0,0 +1,231 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 4199.41,
+          "throughput_tokens_per_sec_per_chip": 4199.41,
+          "elapsed_seconds_median": 12.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 4196.73,
+          "throughput_tokens_per_sec_per_chip": 4196.73,
+          "elapsed_seconds_median": 12.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 4200.78,
+          "throughput_tokens_per_sec_per_chip": 4200.78,
+          "elapsed_seconds_median": 12.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:54:23",
+    "run_id": "e61755d3",
+    "run_name": "nvidia_a100_sxm4_40gbx1_suite_E_nvidia_sglang_c43a8309_e61755d3",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:51:48.053693+00:00",
+    "benchmark_end_time": "2026-05-09T01:54:23.461144+00:00",
+    "benchmark_elapsed_minutes": 2.6,
+    "model_load_seconds": 55.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/result.json
new file mode 100644
index 00000000..aa566c0d
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/result.json
@@ -0,0 +1,236 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 4199.41,
+          "throughput_tokens_per_sec_per_chip": 4199.41,
+          "elapsed_seconds_median": 12.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 4196.73,
+          "throughput_tokens_per_sec_per_chip": 4196.73,
+          "elapsed_seconds_median": 12.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 4200.78,
+          "throughput_tokens_per_sec_per_chip": 4200.78,
+          "elapsed_seconds_median": 12.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to populate."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:54:23",
+    "run_id": "e61755d3",
+    "run_name": "nvidia_a100_sxm4_40gbx1_suite_E_nvidia_sglang_c43a8309_e61755d3",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:51:48.053693+00:00",
+    "benchmark_end_time": "2026-05-09T01:54:23.461144+00:00",
+    "benchmark_elapsed_minutes": 2.6,
+    "model_load_seconds": 55.5,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/offline/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/offline/result.json
new file mode 100644
index 00000000..8db270ea
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/offline/result.json
@@ -0,0 +1,231 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 2,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 2,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 5095.96,
+          "throughput_tokens_per_sec_per_chip": 2547.98,
+          "elapsed_seconds_median": 10.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 5098.68,
+          "throughput_tokens_per_sec_per_chip": 2549.34,
+          "elapsed_seconds_median": 10.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 5107.25,
+          "throughput_tokens_per_sec_per_chip": 2553.63,
+          "elapsed_seconds_median": 10.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:50:22",
+    "run_id": "b52e1fb8",
+    "run_name": "nvidia_a100_sxm4_40gbx2_suite_E_nvidia_sglang_c43a8309_b52e1fb8",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:48:13.765095+00:00",
+    "benchmark_end_time": "2026-05-09T01:50:22.725663+00:00",
+    "benchmark_elapsed_minutes": 2.1,
+    "model_load_seconds": 61.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/result.json
new file mode 100644
index 00000000..d7858378
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/result.json
@@ -0,0 +1,236 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 2,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 2,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 5095.96,
+          "throughput_tokens_per_sec_per_chip": 2547.98,
+          "elapsed_seconds_median": 10.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 5098.68,
+          "throughput_tokens_per_sec_per_chip": 2549.34,
+          "elapsed_seconds_median": 10.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 5107.25,
+          "throughput_tokens_per_sec_per_chip": 2553.63,
+          "elapsed_seconds_median": 10.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to populate."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:50:22",
+    "run_id": "b52e1fb8",
+    "run_name": "nvidia_a100_sxm4_40gbx2_suite_E_nvidia_sglang_c43a8309_b52e1fb8",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:48:13.765095+00:00",
+    "benchmark_end_time": "2026-05-09T01:50:22.725663+00:00",
+    "benchmark_elapsed_minutes": 2.1,
+    "model_load_seconds": 61.7,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/2x/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/offline/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/offline/result.json
new file mode 100644
index 00000000..3dd2ffd2
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/offline/result.json
@@ -0,0 +1,231 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 4,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 4,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 10106.45,
+          "throughput_tokens_per_sec_per_chip": 2526.61,
+          "elapsed_seconds_median": 5.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 10486.01,
+          "throughput_tokens_per_sec_per_chip": 2621.5,
+          "elapsed_seconds_median": 5.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 10488.02,
+          "throughput_tokens_per_sec_per_chip": 2622.0,
+          "elapsed_seconds_median": 5.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:46:44",
+    "run_id": "cf842629",
+    "run_name": "nvidia_a100_sxm4_40gbx4_suite_E_nvidia_sglang_c43a8309_cf842629",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:45:38.186874+00:00",
+    "benchmark_end_time": "2026-05-09T01:46:44.010738+00:00",
+    "benchmark_elapsed_minutes": 1.1,
+    "model_load_seconds": 75.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/result.json
new file mode 100644
index 00000000..e7ed4ae1
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/result.json
@@ -0,0 +1,236 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 4,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 4,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 10106.45,
+          "throughput_tokens_per_sec_per_chip": 2526.61,
+          "elapsed_seconds_median": 5.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 10486.01,
+          "throughput_tokens_per_sec_per_chip": 2621.5,
+          "elapsed_seconds_median": 5.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 10488.02,
+          "throughput_tokens_per_sec_per_chip": 2622.0,
+          "elapsed_seconds_median": 5.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to populate."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:46:44",
+    "run_id": "cf842629",
+    "run_name": "nvidia_a100_sxm4_40gbx4_suite_E_nvidia_sglang_c43a8309_cf842629",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:45:38.186874+00:00",
+    "benchmark_end_time": "2026-05-09T01:46:44.010738+00:00",
+    "benchmark_elapsed_minutes": 1.1,
+    "model_load_seconds": 75.6,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/4x/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/offline/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/offline/result.json
new file mode 100644
index 00000000..5f9467a7
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/offline/result.json
@@ -0,0 +1,231 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 13319.56,
+          "throughput_tokens_per_sec_per_chip": 1664.95,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 13335.41,
+          "throughput_tokens_per_sec_per_chip": 1666.93,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 13332.3,
+          "throughput_tokens_per_sec_per_chip": 1666.54,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:43:53",
+    "run_id": "67683413",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:42:59.242491+00:00",
+    "benchmark_end_time": "2026-05-09T01:43:53.069174+00:00",
+    "benchmark_elapsed_minutes": 0.9,
+    "model_load_seconds": 99.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/result.json
new file mode 100644
index 00000000..14c1dd52
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/result.json
@@ -0,0 +1,236 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T01:37:37.031654+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 13319.56,
+          "throughput_tokens_per_sec_per_chip": 1664.95,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 13335.41,
+          "throughput_tokens_per_sec_per_chip": 1666.93,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 13332.3,
+          "throughput_tokens_per_sec_per_chip": 1666.54,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to populate."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:43:53",
+    "run_id": "67683413",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:42:59.242491+00:00",
+    "benchmark_end_time": "2026-05-09T01:43:53.069174+00:00",
+    "benchmark_elapsed_minutes": 0.9,
+    "model_load_seconds": 99.9,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/8x/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/accuracy/accuracy.json
similarity index 100%
rename from results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/accuracy/accuracy.json
rename to results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/accuracy/accuracy.json
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/env_info.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/env_info.json
new file mode 100644
index 00000000..18bde9fe
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/env_info.json
@@ -0,0 +1,118 @@
+{
+  "collected_at": "2026-05-09T01:37:37.031654+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 1,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 2,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 3,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 4,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 5,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 6,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 7,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "intra_node_interconnect": "NVLink",
+  "cpu": {
+    "model": "AMD EPYC 7532 32-Core Processor",
+    "physical_cores": 64,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/result.json
new file mode 100644
index 00000000..f4fb58a0
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/result.json
@@ -0,0 +1,273 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_E",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null,
+    "_count_note": "Maximum chip count used in this suite. See task.chip_counts_run for all counts tested."
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "chip_counts_run": [
+      1,
+      2,
+      4,
+      8
+    ],
+    "parallelism_note": "Each chip_count uses tensor_parallel_size=N",
+    "num_runs": 3
+  },
+  "metrics": {
+    "scaling": {
+      "base_chip_count": 1,
+      "base_throughput_tokens_per_sec": 4200.78,
+      "results_by_chip_count": [
+        {
+          "chip_count": 1,
+          "best_throughput_tokens_per_sec": 4200.78,
+          "throughput_tokens_per_sec_per_chip": 4200.78,
+          "scaling_efficiency": 1.0,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 8,
+              "throughput_tokens_per_sec": 4199.41,
+              "throughput_tokens_per_sec_per_chip": 4199.41,
+              "elapsed_seconds_median": 12.6,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 32,
+              "throughput_tokens_per_sec": 4196.73,
+              "throughput_tokens_per_sec_per_chip": 4196.73,
+              "elapsed_seconds_median": 12.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 128,
+              "throughput_tokens_per_sec": 4200.78,
+              "throughput_tokens_per_sec_per_chip": 4200.78,
+              "elapsed_seconds_median": 12.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "1x"
+        },
+        {
+          "chip_count": 2,
+          "best_throughput_tokens_per_sec": 5107.25,
+          "throughput_tokens_per_sec_per_chip": 2553.62,
+          "scaling_efficiency": 0.608,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 8,
+              "throughput_tokens_per_sec": 5095.96,
+              "throughput_tokens_per_sec_per_chip": 2547.98,
+              "elapsed_seconds_median": 10.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 32,
+              "throughput_tokens_per_sec": 5098.68,
+              "throughput_tokens_per_sec_per_chip": 2549.34,
+              "elapsed_seconds_median": 10.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 128,
+              "throughput_tokens_per_sec": 5107.25,
+              "throughput_tokens_per_sec_per_chip": 2553.62,
+              "elapsed_seconds_median": 10.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "2x"
+        },
+        {
+          "chip_count": 4,
+          "best_throughput_tokens_per_sec": 10488.02,
+          "throughput_tokens_per_sec_per_chip": 2622.01,
+          "scaling_efficiency": 0.624,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 8,
+              "throughput_tokens_per_sec": 10106.45,
+              "throughput_tokens_per_sec_per_chip": 2526.61,
+              "elapsed_seconds_median": 5.2,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 32,
+              "throughput_tokens_per_sec": 10486.01,
+              "throughput_tokens_per_sec_per_chip": 2621.5,
+              "elapsed_seconds_median": 5.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 128,
+              "throughput_tokens_per_sec": 10488.02,
+              "throughput_tokens_per_sec_per_chip": 2622.01,
+              "elapsed_seconds_median": 5.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "4x"
+        },
+        {
+          "chip_count": 8,
+          "best_throughput_tokens_per_sec": 13335.41,
+          "throughput_tokens_per_sec_per_chip": 1666.93,
+          "scaling_efficiency": 0.397,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 8,
+              "throughput_tokens_per_sec": 13319.56,
+              "throughput_tokens_per_sec_per_chip": 1664.94,
+              "elapsed_seconds_median": 4.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 32,
+              "throughput_tokens_per_sec": 13335.41,
+              "throughput_tokens_per_sec_per_chip": 1666.93,
+              "elapsed_seconds_median": 4.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 128,
+              "throughput_tokens_per_sec": 13332.3,
+              "throughput_tokens_per_sec_per_chip": 1666.54,
+              "elapsed_seconds_median": 4.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "8x"
+        }
+      ]
+    },
+    "derived": {}
+  },
+  "accuracy": {
+    "subset_score": 0.61,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "01:54:23",
+    "run_id": "67683413",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T01:51:48.053693+00:00",
+    "benchmark_end_time": "2026-05-09T01:54:23.461144+00:00",
+    "benchmark_elapsed_minutes": 6.7,
+    "model_load_seconds": 55.5,
+    "benchmark_elapsed_minutes_note": "Sum of per-chip-count benchmark_elapsed_minutes (excludes sleep gaps, orchestrator overhead, and skipped counts).",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_40gbx8_suite_E_nvidia_sglang_c43a8309_67683413/1x/offline"
+    },
+    "chip_count_dirs": {
+      "1": "1x",
+      "2": "2x",
+      "4": "4x",
+      "8": "8x"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/accuracy/accuracy.json
new file mode 100644
index 00000000..25ffb30c
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.66,
+  "baseline_delta": 0.04,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/env_info.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/env_info.json
new file mode 100644
index 00000000..d278d605
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/env_info.json
@@ -0,0 +1,118 @@
+{
+  "collected_at": "2026-05-09T19:16:00.016713+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 1,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 2,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 3,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 4,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 5,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 6,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    },
+    {
+      "index": 7,
+      "name": "NVIDIA A100-SXM4-40GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 40.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "intra_node_interconnect": "NVLink",
+  "cpu": {
+    "model": "AMD EPYC 7532 32-Core Processor",
+    "physical_cores": 64,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/interactive/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/interactive/result.json
new file mode 100644
index 00000000..324baf20
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/interactive/result.json
@@ -0,0 +1,201 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_G",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T19:16:00.016713+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_revision": "eba92302a2861cdc0098cc54bc9f17cb2c47eb61",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "moe",
+    "parameter_count_b": 7.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 42.97,
+      "ttft_ms_p90": 45.86,
+      "ttft_ms_p99": 942.53,
+      "tpot_ms_p50": 4.94,
+      "tpot_ms_p90": 5.0,
+      "tpot_ms_p99": 5.75,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 50.9
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "19:59:26",
+    "run_id": "9e9c88dd",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T19:56:50.783467+00:00",
+    "benchmark_end_time": "2026-05-09T19:59:26.812585+00:00",
+    "benchmark_elapsed_minutes": 2.6,
+    "model_load_seconds": 353.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/offline/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/offline/result.json
new file mode 100644
index 00000000..6e88b068
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/offline/result.json
@@ -0,0 +1,231 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_G",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T19:16:00.016713+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_revision": "eba92302a2861cdc0098cc54bc9f17cb2c47eb61",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "moe",
+    "parameter_count_b": 7.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 4096.26,
+          "throughput_tokens_per_sec_per_chip": 512.03,
+          "elapsed_seconds_median": 8.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 4330.45,
+          "throughput_tokens_per_sec_per_chip": 541.31,
+          "elapsed_seconds_median": 7.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 4346.6,
+          "throughput_tokens_per_sec_per_chip": 543.32,
+          "elapsed_seconds_median": 7.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "19:32:15",
+    "run_id": "9e9c88dd",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T19:30:32.944483+00:00",
+    "benchmark_end_time": "2026-05-09T19:32:15.237392+00:00",
+    "benchmark_elapsed_minutes": 1.7,
+    "model_load_seconds": 358.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/online/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/online/result.json
new file mode 100644
index 00000000..e99cb8e0
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/online/result.json
@@ -0,0 +1,233 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_G",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T19:16:00.016713+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_revision": "eba92302a2861cdc0098cc54bc9f17cb2c47eb61",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "moe",
+    "parameter_count_b": 7.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 52.25,
+          "ttft_ms_p90": 78.57,
+          "ttft_ms_p99": 553.67,
+          "tpot_ms_p50": 11.24,
+          "tpot_ms_p90": 13.21,
+          "tpot_ms_p99": 18.14,
+          "elapsed_seconds_median": 159.1,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 63.44,
+          "ttft_ms_p90": 97.44,
+          "ttft_ms_p99": 196.44,
+          "tpot_ms_p50": 20.84,
+          "tpot_ms_p90": 23.14,
+          "tpot_ms_p99": 26.92,
+          "elapsed_seconds_median": 32.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 92.92,
+          "ttft_ms_p90": 110.13,
+          "ttft_ms_p99": 161.4,
+          "tpot_ms_p50": 49.96,
+          "tpot_ms_p90": 66.04,
+          "tpot_ms_p99": 127.08,
+          "elapsed_seconds_median": 14.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "19:49:44",
+    "run_id": "9e9c88dd",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T19:39:29.143471+00:00",
+    "benchmark_end_time": "2026-05-09T19:49:44.926315+00:00",
+    "benchmark_elapsed_minutes": 10.3,
+    "model_load_seconds": 380.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/result.json
new file mode 100644
index 00000000..3c975318
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/result.json
@@ -0,0 +1,598 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_G",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T19:16:00.016713+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_revision": "eba92302a2861cdc0098cc54bc9f17cb2c47eb61",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "moe",
+    "parameter_count_b": 7.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 4096.26,
+          "throughput_tokens_per_sec_per_chip": 512.03,
+          "elapsed_seconds_median": 8.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 4330.45,
+          "throughput_tokens_per_sec_per_chip": 541.31,
+          "elapsed_seconds_median": 7.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 4346.6,
+          "throughput_tokens_per_sec_per_chip": 543.32,
+          "elapsed_seconds_median": 7.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 52.25,
+          "ttft_ms_p90": 78.57,
+          "ttft_ms_p99": 553.67,
+          "tpot_ms_p50": 11.24,
+          "tpot_ms_p90": 13.21,
+          "tpot_ms_p99": 18.14,
+          "elapsed_seconds_median": 159.1,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 63.44,
+          "ttft_ms_p90": 97.44,
+          "ttft_ms_p99": 196.44,
+          "tpot_ms_p50": 20.84,
+          "tpot_ms_p90": 23.14,
+          "tpot_ms_p99": 26.92,
+          "elapsed_seconds_median": 32.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 92.92,
+          "ttft_ms_p90": 110.13,
+          "ttft_ms_p99": 161.4,
+          "tpot_ms_p50": 49.96,
+          "tpot_ms_p90": 66.04,
+          "tpot_ms_p99": 127.08,
+          "elapsed_seconds_median": 14.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 42.97,
+      "ttft_ms_p90": 45.86,
+      "ttft_ms_p99": 942.53,
+      "tpot_ms_p50": 4.94,
+      "tpot_ms_p90": 5.0,
+      "tpot_ms_p99": 5.75,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 50.9
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 599.8,
+          "tokens_out": 35995,
+          "tokens_in": 0,
+          "requests_completed": 187,
+          "ttft_ms_p50": 58.7,
+          "ttft_ms_p99": 1593.9
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 629.0,
+          "tokens_out": 37760,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.7,
+          "ttft_ms_p99": 90.4
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 632.3,
+          "tokens_out": 37933,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 91.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 636.0,
+          "tokens_out": 38148,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.5,
+          "ttft_ms_p99": 90.9
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.8,
+          "tokens_out": 37654,
+          "tokens_in": 0,
+          "requests_completed": 195,
+          "ttft_ms_p50": 57.1,
+          "ttft_ms_p99": 90.6
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 631.7,
+          "tokens_out": 37903,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 640.5,
+          "tokens_out": 38458,
+          "tokens_in": 0,
+          "requests_completed": 200,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 91.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.2,
+          "tokens_out": 37625,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.8
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.3,
+          "tokens_out": 37630,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 93.1
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 639.5,
+          "tokens_out": 38365,
+          "tokens_in": 0,
+          "requests_completed": 200,
+          "ttft_ms_p50": 57.0,
+          "ttft_ms_p99": 90.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 620.4,
+          "tokens_out": 37220,
+          "tokens_in": 0,
+          "requests_completed": 193,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 91.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 638.6,
+          "tokens_out": 38323,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.1,
+          "ttft_ms_p99": 91.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.5,
+          "tokens_out": 37637,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 632.4,
+          "tokens_out": 37975,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 92.2
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 637.7,
+          "tokens_out": 38230,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 92.4
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 641.6,
+          "tokens_out": 38514,
+          "tokens_in": 0,
+          "requests_completed": 200,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.5
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 629.3,
+          "tokens_out": 37765,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 90.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 635.1,
+          "tokens_out": 38103,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 91.8
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 631.9,
+          "tokens_out": 37909,
+          "tokens_in": 0,
+          "requests_completed": 195,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 91.1
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 631.6,
+          "tokens_out": 37900,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 92.5
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 636.3,
+          "tokens_out": 38166,
+          "tokens_in": 0,
+          "requests_completed": 199,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 92.8
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 629.9,
+          "tokens_out": 37790,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 90.9
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 634.3,
+          "tokens_out": 38083,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.5,
+          "ttft_ms_p99": 83.4
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 630.9,
+          "tokens_out": 37851,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 91.4
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 636.2,
+          "tokens_out": 38151,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 92.2
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 625.1,
+          "tokens_out": 37526,
+          "tokens_in": 0,
+          "requests_completed": 195,
+          "ttft_ms_p50": 57.1,
+          "ttft_ms_p99": 85.4
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 626.9,
+          "tokens_out": 37605,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 90.2
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 634.1,
+          "tokens_out": 38047,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.5,
+          "ttft_ms_p99": 92.4
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 628.0,
+          "tokens_out": 37674,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 94.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 632.1,
+      "throttle_ratio": 0.967,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 4.2
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.66,
+    "baseline_delta": 0.04,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "19:32:15",
+    "run_id": "9e9c88dd",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T19:30:32.944483+00:00",
+    "benchmark_end_time": "2026-05-09T19:32:15.237392+00:00",
+    "benchmark_elapsed_minutes": 44.7,
+    "model_load_seconds": 358.7,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/offline",
+      "online": "results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/online",
+      "interactive": "results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/interactive",
+      "sustained": "results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/sustained/result.json b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/sustained/result.json
new file mode 100644
index 00000000..3b191050
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd/sustained/result.json
@@ -0,0 +1,493 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_G",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-40GB",
+    "vendor": "NVIDIA",
+    "count": 8,
+    "memory_gb": 40.0,
+    "interconnect_intra_node": "NVLink",
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T19:16:00.016713+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 1,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 2,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 3,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 4,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 5,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 6,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      },
+      {
+        "index": 7,
+        "name": "NVIDIA A100-SXM4-40GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 40.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tPXB\tPXB\tNODE\t0-31,64-95\t0\t\tN/A\nGPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNODE\tNODE\tPXB\t0-31,64-95\t0\t\tN/A\nGPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nGPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tPXB\tPXB\tNODE\tNODE\tSYS\tSYS\tSYS\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tNODE\tNODE\tPXB\tPXB\tSYS\tSYS\tSYS\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": "NVLink",
+    "cpu": {
+      "model": "AMD EPYC 7532 32-Core Processor",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_revision": "eba92302a2861cdc0098cc54bc9f17cb2c47eb61",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "moe",
+    "parameter_count_b": 7.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 8,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 599.8,
+          "tokens_out": 35995,
+          "tokens_in": 0,
+          "requests_completed": 187,
+          "ttft_ms_p50": 58.7,
+          "ttft_ms_p99": 1593.9
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 629.0,
+          "tokens_out": 37760,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.7,
+          "ttft_ms_p99": 90.4
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 632.3,
+          "tokens_out": 37933,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 91.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 636.0,
+          "tokens_out": 38148,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.5,
+          "ttft_ms_p99": 90.9
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.8,
+          "tokens_out": 37654,
+          "tokens_in": 0,
+          "requests_completed": 195,
+          "ttft_ms_p50": 57.1,
+          "ttft_ms_p99": 90.6
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 631.7,
+          "tokens_out": 37903,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 640.5,
+          "tokens_out": 38458,
+          "tokens_in": 0,
+          "requests_completed": 200,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 91.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.2,
+          "tokens_out": 37625,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.8
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.3,
+          "tokens_out": 37630,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 93.1
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 639.5,
+          "tokens_out": 38365,
+          "tokens_in": 0,
+          "requests_completed": 200,
+          "ttft_ms_p50": 57.0,
+          "ttft_ms_p99": 90.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 620.4,
+          "tokens_out": 37220,
+          "tokens_in": 0,
+          "requests_completed": 193,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 91.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 638.6,
+          "tokens_out": 38323,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.1,
+          "ttft_ms_p99": 91.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 627.5,
+          "tokens_out": 37637,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 632.4,
+          "tokens_out": 37975,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 92.2
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 637.7,
+          "tokens_out": 38230,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 92.4
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 641.6,
+          "tokens_out": 38514,
+          "tokens_in": 0,
+          "requests_completed": 200,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 90.5
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 629.3,
+          "tokens_out": 37765,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 90.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 635.1,
+          "tokens_out": 38103,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 91.8
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 631.9,
+          "tokens_out": 37909,
+          "tokens_in": 0,
+          "requests_completed": 195,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 91.1
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 631.6,
+          "tokens_out": 37900,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 92.5
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 636.3,
+          "tokens_out": 38166,
+          "tokens_in": 0,
+          "requests_completed": 199,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 92.8
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 629.9,
+          "tokens_out": 37790,
+          "tokens_in": 0,
+          "requests_completed": 197,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 90.9
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 634.3,
+          "tokens_out": 38083,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.5,
+          "ttft_ms_p99": 83.4
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 630.9,
+          "tokens_out": 37851,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 91.4
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 636.2,
+          "tokens_out": 38151,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.2,
+          "ttft_ms_p99": 92.2
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 625.1,
+          "tokens_out": 37526,
+          "tokens_in": 0,
+          "requests_completed": 195,
+          "ttft_ms_p50": 57.1,
+          "ttft_ms_p99": 85.4
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 626.9,
+          "tokens_out": 37605,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.4,
+          "ttft_ms_p99": 90.2
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 634.1,
+          "tokens_out": 38047,
+          "tokens_in": 0,
+          "requests_completed": 198,
+          "ttft_ms_p50": 57.5,
+          "ttft_ms_p99": 92.4
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 628.0,
+          "tokens_out": 37674,
+          "tokens_in": 0,
+          "requests_completed": 196,
+          "ttft_ms_p50": 57.3,
+          "ttft_ms_p99": 94.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 632.1,
+      "throttle_ratio": 0.967,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 4.2
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "20:36:20",
+    "run_id": "9e9c88dd",
+    "run_name": "nvidia_a100_sxm4_40gbx8_suite_G_nvidia_sglang_c43a8309_9e9c88dd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T20:06:15.402590+00:00",
+    "benchmark_end_time": "2026-05-09T20:36:20.103726+00:00",
+    "benchmark_elapsed_minutes": 30.1,
+    "model_load_seconds": 365.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/accuracy/accuracy.json
new file mode 100644
index 00000000..5b260195
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.61,
+  "baseline_delta": 0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/burst/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/burst/result.json
new file mode 100644
index 00000000..28a3fd9f
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/burst/result.json
@@ -0,0 +1,164 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:48:17.734271+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "burst",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 40.23,
+      "steady_ttft_p99_ms": 7616.24,
+      "burst_ttft_p50_ms": 58.43,
+      "burst_ttft_p99_ms": 92.44,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 0.012,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 9005.58,
+          "burst_ttft_p99_ms": 93.71
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 60.59,
+          "burst_ttft_p99_ms": 90.98
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 61.04,
+          "burst_ttft_p99_ms": 90.39
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "07:22:26",
+    "run_id": "b14c1ebc",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T07:14:36.321733+00:00",
+    "benchmark_end_time": "2026-05-09T07:22:26.557125+00:00",
+    "benchmark_elapsed_minutes": 7.8,
+    "model_load_seconds": 148.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/env_info.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/env_info.json
similarity index 50%
rename from results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/env_info.json
rename to results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/env_info.json
index 6f03653e..e13ead7e 100644
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/env_info.json
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/env_info.json
@@ -1,22 +1,22 @@
 {
-  "collected_at": "2026-05-06T11:15:11.081772+00:00",
+  "collected_at": "2026-05-09T05:48:17.734271+00:00",
   "accelerators": [
     {
       "index": 0,
-      "name": "NVIDIA A100-SXM4-40GB",
+      "name": "NVIDIA A100-SXM4-80GB",
       "vendor": "NVIDIA",
-      "memory_gb": 40,
+      "memory_gb": 80.0,
       "driver_version": "565.57.01",
       "firmware_version": null,
       "compute_capability": "8.0",
       "supports_bf16": true
     }
   ],
-  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tPXB\tNODE\tSYS\t0-31,64-95\t0\t\tN/A\nNIC0\tPXB\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tPXB\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tNODE\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
   "intra_node_interconnect": null,
   "cpu": {
-    "model": "AMD EPYC 7532 32-Core Processor",
-    "physical_cores": 64,
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
     "logical_cores": 128,
     "numa_nodes": 2
   },
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/interactive/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/interactive/result.json
new file mode 100644
index 00000000..97cf6bd4
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:48:17.734271+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 29.64,
+      "ttft_ms_p90": 42.16,
+      "ttft_ms_p99": 60.21,
+      "tpot_ms_p50": 11.01,
+      "tpot_ms_p90": 11.05,
+      "tpot_ms_p99": 11.1,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 325.6
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "06:19:40",
+    "run_id": "b14c1ebc",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T06:03:20.785501+00:00",
+    "benchmark_end_time": "2026-05-09T06:19:40.172258+00:00",
+    "benchmark_elapsed_minutes": 16.3,
+    "model_load_seconds": 74.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/offline/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/offline/result.json
new file mode 100644
index 00000000..f8b644a7
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/offline/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:48:17.734271+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 3826.14,
+          "throughput_tokens_per_sec_per_chip": 3826.14,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 3825.94,
+          "throughput_tokens_per_sec_per_chip": 3825.94,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 3825.6,
+          "throughput_tokens_per_sec_per_chip": 3825.6,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "05:55:09",
+    "run_id": "b14c1ebc",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:53:13.837994+00:00",
+    "benchmark_end_time": "2026-05-09T05:55:09.368859+00:00",
+    "benchmark_elapsed_minutes": 1.9,
+    "model_load_seconds": 76.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/online/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/online/result.json
new file mode 100644
index 00000000..2720c2c6
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/online/result.json
@@ -0,0 +1,168 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:48:17.734271+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 100,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 41.3,
+          "ttft_ms_p90": 61.57,
+          "ttft_ms_p99": 2755.55,
+          "tpot_ms_p50": 12.84,
+          "tpot_ms_p90": 14.51,
+          "tpot_ms_p99": 18.23,
+          "elapsed_seconds_median": 65.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.35,
+          "ttft_ms_p90": 76.61,
+          "ttft_ms_p99": 88.02,
+          "tpot_ms_p50": 30.58,
+          "tpot_ms_p90": 36.23,
+          "tpot_ms_p99": 44.23,
+          "elapsed_seconds_median": 16.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 51.72,
+          "ttft_ms_p90": 73.43,
+          "ttft_ms_p99": 279.13,
+          "tpot_ms_p50": 38.98,
+          "tpot_ms_p90": 50.77,
+          "tpot_ms_p99": 137.32,
+          "elapsed_seconds_median": 10.3,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "06:01:30",
+    "run_id": "b14c1ebc",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:56:56.038603+00:00",
+    "benchmark_end_time": "2026-05-09T06:01:30.135883+00:00",
+    "benchmark_elapsed_minutes": 4.6,
+    "model_load_seconds": 72.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/result.json
new file mode 100644
index 00000000..62fe8bc2
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/result.json
@@ -0,0 +1,615 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:48:17.734271+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained",
+      "speculative",
+      "burst"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 3826.14,
+          "throughput_tokens_per_sec_per_chip": 3826.14,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 3825.94,
+          "throughput_tokens_per_sec_per_chip": 3825.94,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 3825.6,
+          "throughput_tokens_per_sec_per_chip": 3825.6,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 100,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 41.3,
+          "ttft_ms_p90": 61.57,
+          "ttft_ms_p99": 2755.55,
+          "tpot_ms_p50": 12.84,
+          "tpot_ms_p90": 14.51,
+          "tpot_ms_p99": 18.23,
+          "elapsed_seconds_median": 65.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.35,
+          "ttft_ms_p90": 76.61,
+          "ttft_ms_p99": 88.02,
+          "tpot_ms_p50": 30.58,
+          "tpot_ms_p90": 36.23,
+          "tpot_ms_p99": 44.23,
+          "elapsed_seconds_median": 16.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 51.72,
+          "ttft_ms_p90": 73.43,
+          "ttft_ms_p99": 279.13,
+          "tpot_ms_p50": 38.98,
+          "tpot_ms_p90": 50.77,
+          "tpot_ms_p99": 137.32,
+          "elapsed_seconds_median": 10.3,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 29.64,
+      "ttft_ms_p90": 42.16,
+      "ttft_ms_p99": 60.21,
+      "tpot_ms_p50": 11.01,
+      "tpot_ms_p90": 11.05,
+      "tpot_ms_p99": 11.1,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 325.6
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 461.8,
+          "tokens_out": 27707,
+          "tokens_in": 0,
+          "requests_completed": 149,
+          "ttft_ms_p50": 43.5,
+          "ttft_ms_p99": 16624.9
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.3,
+          "tokens_out": 39864,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 38.8,
+          "ttft_ms_p99": 54.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.9,
+          "tokens_out": 39547,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 54.8
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.0,
+          "tokens_out": 40062,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.0
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.5,
+          "tokens_out": 39684,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 54.9
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.0,
+          "tokens_out": 39666,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 54.9
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.3,
+          "tokens_out": 39923,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.0,
+          "ttft_ms_p99": 42.4
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.0,
+          "tokens_out": 40095,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.3,
+          "tokens_out": 40337,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 44.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.9,
+          "tokens_out": 39632,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 46.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.6,
+          "tokens_out": 39525,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 46.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.2,
+          "tokens_out": 39809,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.2,
+          "tokens_out": 39897,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.1
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.4,
+          "tokens_out": 39497,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.5,
+          "tokens_out": 40005,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 57.0
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.9,
+          "tokens_out": 40133,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.8,
+          "tokens_out": 40122,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.5,
+          "ttft_ms_p99": 47.1
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.7,
+          "tokens_out": 39530,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 43.2
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.7,
+          "tokens_out": 39630,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 46.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.3,
+          "tokens_out": 39871,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.2
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.0,
+          "tokens_out": 39810,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 55.4
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.9,
+          "tokens_out": 40137,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 47.5
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.6,
+          "tokens_out": 39960,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 59.2
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.1,
+          "tokens_out": 39597,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 43.0
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.1,
+          "tokens_out": 39972,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.0
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.6,
+          "tokens_out": 39744,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 54.2
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.4,
+          "tokens_out": 39800,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.1,
+          "tokens_out": 39830,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.1
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.1,
+          "tokens_out": 39812,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 54.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 664.0,
+      "throttle_ratio": 0.979,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 0.2
+    },
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 732.93,
+          "throughput_tokens_per_sec_per_chip": 732.93,
+          "elapsed_seconds_median": 47.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 732.58,
+          "throughput_tokens_per_sec_per_chip": 732.58,
+          "elapsed_seconds_median": 47.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 731.36,
+          "throughput_tokens_per_sec_per_chip": 731.36,
+          "elapsed_seconds_median": 47.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 40.23,
+      "steady_ttft_p99_ms": 7616.24,
+      "burst_ttft_p50_ms": 58.43,
+      "burst_ttft_p99_ms": 92.44,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 0.012,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 9005.58,
+          "burst_ttft_p99_ms": 93.71
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 60.59,
+          "burst_ttft_p99_ms": 90.98
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 61.04,
+          "burst_ttft_p99_ms": 90.39
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.61,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "05:55:09",
+    "run_id": "b14c1ebc",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:53:13.837994+00:00",
+    "benchmark_end_time": "2026-05-09T05:55:09.368859+00:00",
+    "benchmark_elapsed_minutes": 70.2,
+    "model_load_seconds": 76.2,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained', 'speculative', 'burst'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/offline",
+      "online": "results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/online",
+      "interactive": "results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/interactive",
+      "sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/sustained",
+      "speculative": "results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/speculative",
+      "burst": "results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/burst"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/speculative/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/speculative/result.json
new file mode 100644
index 00000000..45e13030
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/speculative/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:48:17.734271+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "speculative",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 732.93,
+          "throughput_tokens_per_sec_per_chip": 732.93,
+          "elapsed_seconds_median": 47.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 732.58,
+          "throughput_tokens_per_sec_per_chip": 732.58,
+          "elapsed_seconds_median": 47.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 731.36,
+          "throughput_tokens_per_sec_per_chip": 731.36,
+          "elapsed_seconds_median": 47.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "07:10:47",
+    "run_id": "b14c1ebc",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T07:01:16.371264+00:00",
+    "benchmark_end_time": "2026-05-09T07:10:47.930065+00:00",
+    "benchmark_elapsed_minutes": 9.5,
+    "model_load_seconds": 224.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/sustained/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/sustained/result.json
new file mode 100644
index 00000000..428e2715
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc/sustained/result.json
@@ -0,0 +1,428 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:48:17.734271+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 461.8,
+          "tokens_out": 27707,
+          "tokens_in": 0,
+          "requests_completed": 149,
+          "ttft_ms_p50": 43.5,
+          "ttft_ms_p99": 16624.9
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.3,
+          "tokens_out": 39864,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 38.8,
+          "ttft_ms_p99": 54.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.9,
+          "tokens_out": 39547,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 54.8
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.0,
+          "tokens_out": 40062,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.0
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.5,
+          "tokens_out": 39684,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 54.9
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.0,
+          "tokens_out": 39666,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 54.9
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.3,
+          "tokens_out": 39923,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.0,
+          "ttft_ms_p99": 42.4
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.0,
+          "tokens_out": 40095,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.3,
+          "tokens_out": 40337,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 44.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.9,
+          "tokens_out": 39632,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 46.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.6,
+          "tokens_out": 39525,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 46.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.2,
+          "tokens_out": 39809,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.2,
+          "tokens_out": 39897,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.1
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.4,
+          "tokens_out": 39497,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.5,
+          "tokens_out": 40005,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 57.0
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.9,
+          "tokens_out": 40133,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.8,
+          "tokens_out": 40122,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.5,
+          "ttft_ms_p99": 47.1
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.7,
+          "tokens_out": 39530,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 43.2
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.7,
+          "tokens_out": 39630,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 46.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.3,
+          "tokens_out": 39871,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.2
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.0,
+          "tokens_out": 39810,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 55.4
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.9,
+          "tokens_out": 40137,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 47.5
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.6,
+          "tokens_out": 39960,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 59.2
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.1,
+          "tokens_out": 39597,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 43.0
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.1,
+          "tokens_out": 39972,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.0
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.6,
+          "tokens_out": 39744,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 54.2
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.4,
+          "tokens_out": 39800,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.1,
+          "tokens_out": 39830,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.1
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.1,
+          "tokens_out": 39812,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.1,
+          "ttft_ms_p99": 54.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 664.0,
+      "throttle_ratio": 0.979,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 0.2
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "06:55:50",
+    "run_id": "b14c1ebc",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_b14c1ebc",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T06:25:45.065449+00:00",
+    "benchmark_end_time": "2026-05-09T06:55:49.949999+00:00",
+    "benchmark_elapsed_minutes": 30.1,
+    "model_load_seconds": 252.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/accuracy/accuracy.json
new file mode 100644
index 00000000..37f4d824
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/offline/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/offline/result.json
new file mode 100644
index 00000000..0f55961e
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3811.78,
+          "throughput_tokens_per_sec_per_chip": 3811.78,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3812.36,
+          "throughput_tokens_per_sec_per_chip": 3812.36,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3814.22,
+          "throughput_tokens_per_sec_per_chip": 3814.22,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3806.77,
+          "throughput_tokens_per_sec_per_chip": 3806.77,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:10:25",
+    "run_id": "29a32aea",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:07:50.935068+00:00",
+    "benchmark_end_time": "2026-04-30T08:10:25.900871+00:00",
+    "benchmark_elapsed_minutes": 2.6,
+    "model_load_seconds": 70.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/online/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/online/result.json
new file mode 100644
index 00000000..a809fb6e
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 41.09,
+          "ttft_ms_p90": 61.91,
+          "ttft_ms_p99": 3163.27,
+          "tpot_ms_p50": 12.83,
+          "tpot_ms_p90": 14.44,
+          "tpot_ms_p99": 17.5,
+          "elapsed_seconds_median": 65.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 42.35,
+          "ttft_ms_p90": 57.39,
+          "ttft_ms_p99": 64.91,
+          "tpot_ms_p50": 15.94,
+          "tpot_ms_p90": 17.81,
+          "tpot_ms_p99": 19.09,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.45,
+          "ttft_ms_p90": 75.19,
+          "ttft_ms_p99": 86.75,
+          "tpot_ms_p50": 29.88,
+          "tpot_ms_p90": 35.11,
+          "tpot_ms_p99": 39.97,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 51.39,
+          "ttft_ms_p90": 75.87,
+          "ttft_ms_p99": 94.44,
+          "tpot_ms_p50": 37.98,
+          "tpot_ms_p90": 44.01,
+          "tpot_ms_p99": 60.2,
+          "elapsed_seconds_median": 12.1,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:18:27",
+    "run_id": "29a32aea",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:12:09.187300+00:00",
+    "benchmark_end_time": "2026-04-30T08:18:27.314631+00:00",
+    "benchmark_elapsed_minutes": 6.3,
+    "model_load_seconds": 70.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/result.json
new file mode 100644
index 00000000..46c68a3a
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3811.78,
+          "throughput_tokens_per_sec_per_chip": 3811.78,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3812.36,
+          "throughput_tokens_per_sec_per_chip": 3812.36,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3814.22,
+          "throughput_tokens_per_sec_per_chip": 3814.22,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3806.77,
+          "throughput_tokens_per_sec_per_chip": 3806.77,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 41.09,
+          "ttft_ms_p90": 61.91,
+          "ttft_ms_p99": 3163.27,
+          "tpot_ms_p50": 12.83,
+          "tpot_ms_p90": 14.44,
+          "tpot_ms_p99": 17.5,
+          "elapsed_seconds_median": 65.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 42.35,
+          "ttft_ms_p90": 57.39,
+          "ttft_ms_p99": 64.91,
+          "tpot_ms_p50": 15.94,
+          "tpot_ms_p90": 17.81,
+          "tpot_ms_p99": 19.09,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.45,
+          "ttft_ms_p90": 75.19,
+          "ttft_ms_p99": 86.75,
+          "tpot_ms_p50": 29.88,
+          "tpot_ms_p90": 35.11,
+          "tpot_ms_p99": 39.97,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 51.39,
+          "ttft_ms_p90": 75.87,
+          "ttft_ms_p99": 94.44,
+          "tpot_ms_p50": 37.98,
+          "tpot_ms_p90": 44.01,
+          "tpot_ms_p99": 60.2,
+          "elapsed_seconds_median": 12.1,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 587.4,
+          "tokens_out": 35271,
+          "tokens_in": 0,
+          "requests_completed": 193,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 4162.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 673.7,
+          "tokens_out": 40390,
+          "tokens_in": 0,
+          "requests_completed": 218,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 56.2
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 657.0,
+          "tokens_out": 39444,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 39.0,
+          "ttft_ms_p99": 55.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.4,
+          "tokens_out": 39848,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.9,
+          "tokens_out": 39968,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 659.9,
+          "tokens_out": 39577,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.6,
+          "ttft_ms_p99": 60.7
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.8,
+          "tokens_out": 39897,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 57.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 657.4,
+          "tokens_out": 39449,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.6,
+          "tokens_out": 40108,
+          "tokens_in": 0,
+          "requests_completed": 218,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 53.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.6,
+          "tokens_out": 39662,
+          "tokens_in": 0,
+          "requests_completed": 218,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.1
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.3,
+          "tokens_out": 39497,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 55.9
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.7,
+          "tokens_out": 39679,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 47.6
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.9,
+          "tokens_out": 39798,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 671.5,
+          "tokens_out": 40279,
+          "tokens_in": 0,
+          "requests_completed": 215,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 658.1,
+      "throttle_ratio": 0.872,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -4105.9
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:10:25",
+    "run_id": "29a32aea",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:07:50.935068+00:00",
+    "benchmark_end_time": "2026-04-30T08:10:25.900871+00:00",
+    "benchmark_elapsed_minutes": 24.0,
+    "model_load_seconds": 70.3,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/offline",
+      "online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/online",
+      "sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/sustained/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/sustained/result.json
new file mode 100644
index 00000000..3d008d8f
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 587.4,
+          "tokens_out": 35271,
+          "tokens_in": 0,
+          "requests_completed": 193,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 4162.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 673.7,
+          "tokens_out": 40390,
+          "tokens_in": 0,
+          "requests_completed": 218,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 56.2
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 657.0,
+          "tokens_out": 39444,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 39.0,
+          "ttft_ms_p99": 55.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.4,
+          "tokens_out": 39848,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.9,
+          "tokens_out": 39968,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 659.9,
+          "tokens_out": 39577,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.6,
+          "ttft_ms_p99": 60.7
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.8,
+          "tokens_out": 39897,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 57.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 657.4,
+          "tokens_out": 39449,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.6,
+          "tokens_out": 40108,
+          "tokens_in": 0,
+          "requests_completed": 218,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 53.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.6,
+          "tokens_out": 39662,
+          "tokens_in": 0,
+          "requests_completed": 218,
+          "ttft_ms_p50": 39.2,
+          "ttft_ms_p99": 54.1
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 658.3,
+          "tokens_out": 39497,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 55.9
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.7,
+          "tokens_out": 39679,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 47.6
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.9,
+          "tokens_out": 39798,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.4,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 671.5,
+          "tokens_out": 40279,
+          "tokens_in": 0,
+          "requests_completed": 215,
+          "ttft_ms_p50": 39.3,
+          "ttft_ms_p99": 56.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 658.1,
+      "throttle_ratio": 0.872,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -4105.9
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:35:24",
+    "run_id": "29a32aea",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:20:20.395609+00:00",
+    "benchmark_end_time": "2026-04-30T08:35:24.178367+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 76.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/env_info.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/env_info.json
new file mode 100644
index 00000000..b1f9df26
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-04-30T08:03:04.688337+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/result.json
new file mode 100644
index 00000000..0d242ee4
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/result.json
@@ -0,0 +1,963 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original",
+    "_note": "suite model_id. Each precision level uses its own quantized checkpoint."
+  },
+  "task": {
+    "scenarios_run": [
+      "accuracy",
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "precision_levels_run": [
+      "BF16",
+      "FP8",
+      "W8A8",
+      "W8A16",
+      "W4A16"
+    ],
+    "precision_levels_skipped": [
+      "FP16"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "quantization": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+          "best_throughput_tokens_per_sec": 3814.22,
+          "accuracy_score": 0.55,
+          "accuracy_baseline_delta": -0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 2097.8,
+          "speedup_vs_bf16": 1.0,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3811.78,
+              "throughput_tokens_per_sec_per_chip": 3811.78,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 3812.36,
+              "throughput_tokens_per_sec_per_chip": 3812.36,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 3814.22,
+              "throughput_tokens_per_sec_per_chip": 3814.22,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 3806.77,
+              "throughput_tokens_per_sec_per_chip": 3806.77,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "bf16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": null
+        },
+        {
+          "precision": "W8A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+          "best_throughput_tokens_per_sec": 3995.48,
+          "accuracy_score": 0.59,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 2357.3,
+          "speedup_vs_bf16": 1.048,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3994.04,
+              "throughput_tokens_per_sec_per_chip": 3994.04,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 3995.3,
+              "throughput_tokens_per_sec_per_chip": 3995.3,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 3993.64,
+              "throughput_tokens_per_sec_per_chip": 3993.64,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 3995.48,
+              "throughput_tokens_per_sec_per_chip": 3995.48,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a16",
+          "effective_dtype": "auto",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W4A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+          "best_throughput_tokens_per_sec": 2208.93,
+          "accuracy_score": 0.57,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 1259.1,
+          "speedup_vs_bf16": 0.579,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 2151.78,
+              "throughput_tokens_per_sec_per_chip": 2151.78,
+              "elapsed_seconds_median": 16.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 2107.59,
+              "throughput_tokens_per_sec_per_chip": 2107.59,
+              "elapsed_seconds_median": 16.2,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 2144.09,
+              "throughput_tokens_per_sec_per_chip": 2144.09,
+              "elapsed_seconds_median": 16.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 2208.93,
+              "throughput_tokens_per_sec_per_chip": 2208.93,
+              "elapsed_seconds_median": 15.7,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w4a16",
+          "effective_dtype": "auto",
+          "quantization_method": "gptq"
+        }
+      ]
+    },
+    "derived": {},
+    "quantization_online": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 41.09,
+              "ttft_ms_p90": 61.91,
+              "ttft_ms_p99": 3163.27,
+              "tpot_ms_p50": 12.83,
+              "tpot_ms_p90": 14.44,
+              "tpot_ms_p99": 17.5,
+              "elapsed_seconds_median": 65.7,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 42.35,
+              "ttft_ms_p90": 57.39,
+              "ttft_ms_p99": 64.91,
+              "tpot_ms_p50": 15.94,
+              "tpot_ms_p90": 17.81,
+              "tpot_ms_p99": 19.09,
+              "elapsed_seconds_median": 32.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 53.45,
+              "ttft_ms_p90": 75.19,
+              "ttft_ms_p99": 86.75,
+              "tpot_ms_p50": 29.88,
+              "tpot_ms_p90": 35.11,
+              "tpot_ms_p99": 39.97,
+              "elapsed_seconds_median": 16.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 51.39,
+              "ttft_ms_p90": 75.87,
+              "ttft_ms_p99": 94.44,
+              "tpot_ms_p50": 37.98,
+              "tpot_ms_p90": 44.01,
+              "tpot_ms_p99": 60.2,
+              "elapsed_seconds_median": 12.1,
+              "sla_met": true
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 36.12,
+              "ttft_ms_p90": 69.07,
+              "ttft_ms_p99": 3268.43,
+              "tpot_ms_p50": 8.37,
+              "tpot_ms_p90": 9.71,
+              "tpot_ms_p99": 14.4,
+              "elapsed_seconds_median": 64.6,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 37.67,
+              "ttft_ms_p90": 55.4,
+              "ttft_ms_p99": 66.01,
+              "tpot_ms_p50": 11.2,
+              "tpot_ms_p90": 12.6,
+              "tpot_ms_p99": 14.08,
+              "elapsed_seconds_median": 31.2,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 54.14,
+              "ttft_ms_p90": 78.87,
+              "ttft_ms_p99": 95.15,
+              "tpot_ms_p50": 30.91,
+              "tpot_ms_p90": 37.44,
+              "tpot_ms_p99": 44.41,
+              "elapsed_seconds_median": 16.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 56.58,
+              "ttft_ms_p90": 83.52,
+              "ttft_ms_p99": 102.84,
+              "tpot_ms_p50": 41.53,
+              "tpot_ms_p90": 50.68,
+              "tpot_ms_p99": 75.53,
+              "elapsed_seconds_median": 12.9,
+              "sla_met": true
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 57.25,
+              "ttft_ms_p90": 96.93,
+              "ttft_ms_p99": 3210.47,
+              "tpot_ms_p50": 21.98,
+              "tpot_ms_p90": 36.21,
+              "tpot_ms_p99": 40.53,
+              "elapsed_seconds_median": 66.2,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 65.61,
+              "ttft_ms_p90": 87.43,
+              "ttft_ms_p99": 98.67,
+              "tpot_ms_p50": 35.19,
+              "tpot_ms_p90": 36.47,
+              "tpot_ms_p99": 39.7,
+              "elapsed_seconds_median": 34.8,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 63.45,
+              "ttft_ms_p90": 96.95,
+              "ttft_ms_p99": 116.42,
+              "tpot_ms_p50": 45.94,
+              "tpot_ms_p90": 50.69,
+              "tpot_ms_p99": 57.79,
+              "elapsed_seconds_median": 19.3,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 60.0,
+              "ttft_ms_p90": 97.76,
+              "ttft_ms_p99": 121.5,
+              "tpot_ms_p50": 49.66,
+              "tpot_ms_p90": 59.83,
+              "tpot_ms_p99": 75.38,
+              "elapsed_seconds_median": 14.9,
+              "sla_met": true
+            }
+          ]
+        }
+      ]
+    },
+    "quantization_sustained": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "sustained_throughput_tokens_per_sec": 658.1,
+          "throttle_ratio": 0.872,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -4105.9,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 587.4,
+              "tokens_out": 35271,
+              "tokens_in": 0,
+              "requests_completed": 193,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 4162.3
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 673.7,
+              "tokens_out": 40390,
+              "tokens_in": 0,
+              "requests_completed": 218,
+              "ttft_ms_p50": 39.2,
+              "ttft_ms_p99": 56.2
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 657.0,
+              "tokens_out": 39444,
+              "tokens_in": 0,
+              "requests_completed": 216,
+              "ttft_ms_p50": 39.0,
+              "ttft_ms_p99": 55.0
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 664.4,
+              "tokens_out": 39848,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.2,
+              "ttft_ms_p99": 55.6
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 665.9,
+              "tokens_out": 39968,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.2,
+              "ttft_ms_p99": 54.4
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 659.9,
+              "tokens_out": 39577,
+              "tokens_in": 0,
+              "requests_completed": 213,
+              "ttft_ms_p50": 39.6,
+              "ttft_ms_p99": 60.7
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 664.8,
+              "tokens_out": 39897,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.4,
+              "ttft_ms_p99": 57.2
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 657.4,
+              "tokens_out": 39449,
+              "tokens_in": 0,
+              "requests_completed": 213,
+              "ttft_ms_p50": 39.2,
+              "ttft_ms_p99": 54.0
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 668.6,
+              "tokens_out": 40108,
+              "tokens_in": 0,
+              "requests_completed": 218,
+              "ttft_ms_p50": 39.2,
+              "ttft_ms_p99": 53.4
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 660.6,
+              "tokens_out": 39662,
+              "tokens_in": 0,
+              "requests_completed": 218,
+              "ttft_ms_p50": 39.2,
+              "ttft_ms_p99": 54.1
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 658.3,
+              "tokens_out": 39497,
+              "tokens_in": 0,
+              "requests_completed": 213,
+              "ttft_ms_p50": 39.3,
+              "ttft_ms_p99": 55.9
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 661.7,
+              "tokens_out": 39679,
+              "tokens_in": 0,
+              "requests_completed": 214,
+              "ttft_ms_p50": 39.3,
+              "ttft_ms_p99": 47.6
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 662.9,
+              "tokens_out": 39798,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.4,
+              "ttft_ms_p99": 55.8
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 671.5,
+              "tokens_out": 40279,
+              "tokens_in": 0,
+              "requests_completed": 215,
+              "ttft_ms_p50": 39.3,
+              "ttft_ms_p99": 56.4
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "sustained_throughput_tokens_per_sec": 949.7,
+          "throttle_ratio": 0.895,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -3559.4,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 862.6,
+              "tokens_out": 51782,
+              "tokens_in": 0,
+              "requests_completed": 285,
+              "ttft_ms_p50": 34.2,
+              "ttft_ms_p99": 3609.1
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 956.6,
+              "tokens_out": 57403,
+              "tokens_in": 0,
+              "requests_completed": 311,
+              "ttft_ms_p50": 33.0,
+              "ttft_ms_p99": 55.5
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 964.1,
+              "tokens_out": 57850,
+              "tokens_in": 0,
+              "requests_completed": 315,
+              "ttft_ms_p50": 32.9,
+              "ttft_ms_p99": 49.4
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 949.6,
+              "tokens_out": 56948,
+              "tokens_in": 0,
+              "requests_completed": 311,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 52.8
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 956.8,
+              "tokens_out": 57415,
+              "tokens_in": 0,
+              "requests_completed": 316,
+              "ttft_ms_p50": 33.1,
+              "ttft_ms_p99": 51.3
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 959.2,
+              "tokens_out": 57545,
+              "tokens_in": 0,
+              "requests_completed": 312,
+              "ttft_ms_p50": 32.7,
+              "ttft_ms_p99": 50.3
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 956.0,
+              "tokens_out": 57358,
+              "tokens_in": 0,
+              "requests_completed": 312,
+              "ttft_ms_p50": 33.0,
+              "ttft_ms_p99": 44.9
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 951.3,
+              "tokens_out": 57077,
+              "tokens_in": 0,
+              "requests_completed": 310,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 50.9
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 956.1,
+              "tokens_out": 57391,
+              "tokens_in": 0,
+              "requests_completed": 314,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 51.0
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 960.7,
+              "tokens_out": 57646,
+              "tokens_in": 0,
+              "requests_completed": 312,
+              "ttft_ms_p50": 33.1,
+              "ttft_ms_p99": 51.1
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 958.1,
+              "tokens_out": 57460,
+              "tokens_in": 0,
+              "requests_completed": 313,
+              "ttft_ms_p50": 33.1,
+              "ttft_ms_p99": 51.8
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 954.6,
+              "tokens_out": 57271,
+              "tokens_in": 0,
+              "requests_completed": 314,
+              "ttft_ms_p50": 33.5,
+              "ttft_ms_p99": 52.4
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 954.0,
+              "tokens_out": 57231,
+              "tokens_in": 0,
+              "requests_completed": 313,
+              "ttft_ms_p50": 33.4,
+              "ttft_ms_p99": 56.3
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 956.1,
+              "tokens_out": 57378,
+              "tokens_in": 0,
+              "requests_completed": 312,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 49.7
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "sustained_throughput_tokens_per_sec": 777.5,
+          "throttle_ratio": 0.886,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -3160.0,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 702.0,
+              "tokens_out": 42136,
+              "tokens_in": 0,
+              "requests_completed": 240,
+              "ttft_ms_p50": 36.4,
+              "ttft_ms_p99": 3215.0
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 788.1,
+              "tokens_out": 47275,
+              "tokens_in": 0,
+              "requests_completed": 262,
+              "ttft_ms_p50": 35.4,
+              "ttft_ms_p99": 52.1
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 776.1,
+              "tokens_out": 46574,
+              "tokens_in": 0,
+              "requests_completed": 261,
+              "ttft_ms_p50": 35.2,
+              "ttft_ms_p99": 51.6
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 792.4,
+              "tokens_out": 47547,
+              "tokens_in": 0,
+              "requests_completed": 266,
+              "ttft_ms_p50": 35.2,
+              "ttft_ms_p99": 50.7
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 780.6,
+              "tokens_out": 46845,
+              "tokens_in": 0,
+              "requests_completed": 262,
+              "ttft_ms_p50": 35.2,
+              "ttft_ms_p99": 53.1
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 783.4,
+              "tokens_out": 47022,
+              "tokens_in": 0,
+              "requests_completed": 263,
+              "ttft_ms_p50": 35.2,
+              "ttft_ms_p99": 52.3
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 778.0,
+              "tokens_out": 46682,
+              "tokens_in": 0,
+              "requests_completed": 259,
+              "ttft_ms_p50": 35.2,
+              "ttft_ms_p99": 50.6
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 790.4,
+              "tokens_out": 47403,
+              "tokens_in": 0,
+              "requests_completed": 268,
+              "ttft_ms_p50": 35.4,
+              "ttft_ms_p99": 52.3
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 780.9,
+              "tokens_out": 46849,
+              "tokens_in": 0,
+              "requests_completed": 264,
+              "ttft_ms_p50": 35.6,
+              "ttft_ms_p99": 52.0
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 774.9,
+              "tokens_out": 46503,
+              "tokens_in": 0,
+              "requests_completed": 258,
+              "ttft_ms_p50": 35.7,
+              "ttft_ms_p99": 51.7
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 789.4,
+              "tokens_out": 47353,
+              "tokens_in": 0,
+              "requests_completed": 265,
+              "ttft_ms_p50": 35.5,
+              "ttft_ms_p99": 55.6
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 780.1,
+              "tokens_out": 46810,
+              "tokens_in": 0,
+              "requests_completed": 266,
+              "ttft_ms_p50": 35.5,
+              "ttft_ms_p99": 52.2
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 779.7,
+              "tokens_out": 46798,
+              "tokens_in": 0,
+              "requests_completed": 259,
+              "ttft_ms_p50": 35.5,
+              "ttft_ms_p99": 53.6
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 789.6,
+              "tokens_out": 47370,
+              "tokens_in": 0,
+              "requests_completed": 267,
+              "ttft_ms_p50": 35.4,
+              "ttft_ms_p99": 55.0
+            }
+          ]
+        }
+      ]
+    }
+  },
+  "accuracy": null,
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:10:25",
+    "run_id": "29a32aea",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:07:50.935068+00:00",
+    "benchmark_end_time": "2026-04-30T08:10:25.900871+00:00",
+    "benchmark_elapsed_minutes": 73.8,
+    "model_load_seconds": 70.3,
+    "benchmark_elapsed_minutes_note": "Sum of per-precision benchmark_elapsed_minutes (excludes sleep gaps and orchestrator overhead).",
+    "scenario_dirs": {
+      "bf16/offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/offline",
+      "bf16/online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/online",
+      "bf16/sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/bf16/sustained",
+      "fp8/offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/fp8/offline",
+      "fp8/online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/fp8/online",
+      "fp8/sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/fp8/sustained",
+      "w8a8/offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a8/offline",
+      "w8a8/online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a8/online",
+      "w8a8/sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a8/sustained",
+      "w8a16/offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/offline",
+      "w8a16/online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/online",
+      "w8a16/sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/sustained",
+      "w4a16/offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/offline",
+      "w4a16/online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/online",
+      "w4a16/sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/sustained"
+    },
+    "precision_dirs": {
+      "BF16": "bf16",
+      "FP8": "fp8",
+      "W8A8": "w8a8",
+      "W8A16": "w8a16",
+      "W4A16": "w4a16"
+    },
+    "precision_model_map": {
+      "BF16": {
+        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+        "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+        "dtype_override": "bfloat16"
+      },
+      "FP8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+        "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "Static per-tensor FP8 (weights + activations). Requires Ampere+ (A100, A800, H20). Skipped automatically on FP16-only hardware."
+      },
+      "W8A8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights + INT8 activations via compressed-tensors. Exercises native int8 tensor cores."
+      },
+      "W8A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+        "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights, FP16 activations. Weight-only quantization — reduces memory bandwidth, not compute dtype."
+      },
+      "W4A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+        "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+        "engine_kwargs": {
+          "quantization": "gptq"
+        },
+        "_note": "INT4 weights, FP16 activations via GPTQ Marlin kernels. Weight-only quantization — larger memory saving than W8A16."
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/accuracy/accuracy.json
new file mode 100644
index 00000000..e2c86fd4
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.57,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W4A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/offline/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/offline/result.json
new file mode 100644
index 00000000..2beadb1b
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2151.78,
+          "throughput_tokens_per_sec_per_chip": 2151.78,
+          "elapsed_seconds_median": 16.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2107.59,
+          "throughput_tokens_per_sec_per_chip": 2107.59,
+          "elapsed_seconds_median": 16.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2144.09,
+          "throughput_tokens_per_sec_per_chip": 2144.09,
+          "elapsed_seconds_median": 16.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2208.93,
+          "throughput_tokens_per_sec_per_chip": 2208.93,
+          "elapsed_seconds_median": 15.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:27:50",
+    "run_id": "ce081f96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_ce081f96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T09:23:29.519979+00:00",
+    "benchmark_end_time": "2026-04-30T09:27:50.159108+00:00",
+    "benchmark_elapsed_minutes": 4.3,
+    "model_load_seconds": 71.1
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/online/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/online/result.json
new file mode 100644
index 00000000..0bc84f2d
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 57.25,
+          "ttft_ms_p90": 96.93,
+          "ttft_ms_p99": 3210.47,
+          "tpot_ms_p50": 21.98,
+          "tpot_ms_p90": 36.21,
+          "tpot_ms_p99": 40.53,
+          "elapsed_seconds_median": 66.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 65.61,
+          "ttft_ms_p90": 87.43,
+          "ttft_ms_p99": 98.67,
+          "tpot_ms_p50": 35.19,
+          "tpot_ms_p90": 36.47,
+          "tpot_ms_p99": 39.7,
+          "elapsed_seconds_median": 34.8,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 63.45,
+          "ttft_ms_p90": 96.95,
+          "ttft_ms_p99": 116.42,
+          "tpot_ms_p50": 45.94,
+          "tpot_ms_p90": 50.69,
+          "tpot_ms_p99": 57.79,
+          "elapsed_seconds_median": 19.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 60.0,
+          "ttft_ms_p90": 97.76,
+          "ttft_ms_p99": 121.5,
+          "tpot_ms_p50": 49.66,
+          "tpot_ms_p90": 59.83,
+          "tpot_ms_p99": 75.38,
+          "elapsed_seconds_median": 14.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:36:16",
+    "run_id": "ce081f96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_ce081f96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T09:29:31.834008+00:00",
+    "benchmark_end_time": "2026-04-30T09:36:16.456822+00:00",
+    "benchmark_elapsed_minutes": 6.7,
+    "model_load_seconds": 69.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/result.json
new file mode 100644
index 00000000..074a24c8
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2151.78,
+          "throughput_tokens_per_sec_per_chip": 2151.78,
+          "elapsed_seconds_median": 16.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2107.59,
+          "throughput_tokens_per_sec_per_chip": 2107.59,
+          "elapsed_seconds_median": 16.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2144.09,
+          "throughput_tokens_per_sec_per_chip": 2144.09,
+          "elapsed_seconds_median": 16.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2208.93,
+          "throughput_tokens_per_sec_per_chip": 2208.93,
+          "elapsed_seconds_median": 15.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 57.25,
+          "ttft_ms_p90": 96.93,
+          "ttft_ms_p99": 3210.47,
+          "tpot_ms_p50": 21.98,
+          "tpot_ms_p90": 36.21,
+          "tpot_ms_p99": 40.53,
+          "elapsed_seconds_median": 66.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 65.61,
+          "ttft_ms_p90": 87.43,
+          "ttft_ms_p99": 98.67,
+          "tpot_ms_p50": 35.19,
+          "tpot_ms_p90": 36.47,
+          "tpot_ms_p99": 39.7,
+          "elapsed_seconds_median": 34.8,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 63.45,
+          "ttft_ms_p90": 96.95,
+          "ttft_ms_p99": 116.42,
+          "tpot_ms_p50": 45.94,
+          "tpot_ms_p90": 50.69,
+          "tpot_ms_p99": 57.79,
+          "elapsed_seconds_median": 19.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 60.0,
+          "ttft_ms_p90": 97.76,
+          "ttft_ms_p99": 121.5,
+          "tpot_ms_p50": 49.66,
+          "tpot_ms_p90": 59.83,
+          "tpot_ms_p99": 75.38,
+          "elapsed_seconds_median": 14.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 702.0,
+          "tokens_out": 42136,
+          "tokens_in": 0,
+          "requests_completed": 240,
+          "ttft_ms_p50": 36.4,
+          "ttft_ms_p99": 3215.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 788.1,
+          "tokens_out": 47275,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.4,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 776.1,
+          "tokens_out": 46574,
+          "tokens_in": 0,
+          "requests_completed": 261,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 51.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 792.4,
+          "tokens_out": 47547,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 50.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 780.6,
+          "tokens_out": 46845,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 53.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 783.4,
+          "tokens_out": 47022,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 778.0,
+          "tokens_out": 46682,
+          "tokens_in": 0,
+          "requests_completed": 259,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 50.6
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 790.4,
+          "tokens_out": 47403,
+          "tokens_in": 0,
+          "requests_completed": 268,
+          "ttft_ms_p50": 35.4,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 780.9,
+          "tokens_out": 46849,
+          "tokens_in": 0,
+          "requests_completed": 264,
+          "ttft_ms_p50": 35.6,
+          "ttft_ms_p99": 52.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 774.9,
+          "tokens_out": 46503,
+          "tokens_in": 0,
+          "requests_completed": 258,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 51.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 789.4,
+          "tokens_out": 47353,
+          "tokens_in": 0,
+          "requests_completed": 265,
+          "ttft_ms_p50": 35.5,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 780.1,
+          "tokens_out": 46810,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.5,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 779.7,
+          "tokens_out": 46798,
+          "tokens_in": 0,
+          "requests_completed": 259,
+          "ttft_ms_p50": 35.5,
+          "ttft_ms_p99": 53.6
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 789.6,
+          "tokens_out": 47370,
+          "tokens_in": 0,
+          "requests_completed": 267,
+          "ttft_ms_p50": 35.4,
+          "ttft_ms_p99": 55.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 777.5,
+      "throttle_ratio": 0.886,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3160.0
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.57,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W4A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:27:50",
+    "run_id": "ce081f96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_ce081f96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T09:23:29.519979+00:00",
+    "benchmark_end_time": "2026-04-30T09:27:50.159108+00:00",
+    "benchmark_elapsed_minutes": 26.1,
+    "model_load_seconds": 71.1,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/offline",
+      "online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/online",
+      "sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/sustained/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/sustained/result.json
new file mode 100644
index 00000000..0e3ed7e4
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w4a16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 702.0,
+          "tokens_out": 42136,
+          "tokens_in": 0,
+          "requests_completed": 240,
+          "ttft_ms_p50": 36.4,
+          "ttft_ms_p99": 3215.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 788.1,
+          "tokens_out": 47275,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.4,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 776.1,
+          "tokens_out": 46574,
+          "tokens_in": 0,
+          "requests_completed": 261,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 51.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 792.4,
+          "tokens_out": 47547,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 50.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 780.6,
+          "tokens_out": 46845,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 53.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 783.4,
+          "tokens_out": 47022,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 778.0,
+          "tokens_out": 46682,
+          "tokens_in": 0,
+          "requests_completed": 259,
+          "ttft_ms_p50": 35.2,
+          "ttft_ms_p99": 50.6
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 790.4,
+          "tokens_out": 47403,
+          "tokens_in": 0,
+          "requests_completed": 268,
+          "ttft_ms_p50": 35.4,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 780.9,
+          "tokens_out": 46849,
+          "tokens_in": 0,
+          "requests_completed": 264,
+          "ttft_ms_p50": 35.6,
+          "ttft_ms_p99": 52.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 774.9,
+          "tokens_out": 46503,
+          "tokens_in": 0,
+          "requests_completed": 258,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 51.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 789.4,
+          "tokens_out": 47353,
+          "tokens_in": 0,
+          "requests_completed": 265,
+          "ttft_ms_p50": 35.5,
+          "ttft_ms_p99": 55.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 780.1,
+          "tokens_out": 46810,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.5,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 779.7,
+          "tokens_out": 46798,
+          "tokens_in": 0,
+          "requests_completed": 259,
+          "ttft_ms_p50": 35.5,
+          "ttft_ms_p99": 53.6
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 789.6,
+          "tokens_out": 47370,
+          "tokens_in": 0,
+          "requests_completed": 267,
+          "ttft_ms_p50": 35.4,
+          "ttft_ms_p99": 55.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 777.5,
+      "throttle_ratio": 0.886,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3160.0
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:53:08",
+    "run_id": "ce081f96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_ce081f96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T09:38:04.691953+00:00",
+    "benchmark_end_time": "2026-04-30T09:53:08.303371+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 74.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/accuracy/accuracy.json
new file mode 100644
index 00000000..60c0d5fb
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.59,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W8A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/offline/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/offline/result.json
new file mode 100644
index 00000000..3fa0c538
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3994.04,
+          "throughput_tokens_per_sec_per_chip": 3994.04,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3995.3,
+          "throughput_tokens_per_sec_per_chip": 3995.3,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3993.64,
+          "throughput_tokens_per_sec_per_chip": 3993.64,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3995.48,
+          "throughput_tokens_per_sec_per_chip": 3995.48,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:53:38",
+    "run_id": "abbf6933",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_abbf6933",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:51:11.064170+00:00",
+    "benchmark_end_time": "2026-04-30T08:53:38.642090+00:00",
+    "benchmark_elapsed_minutes": 2.5,
+    "model_load_seconds": 69.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/online/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/online/result.json
new file mode 100644
index 00000000..8226db5c
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 36.12,
+          "ttft_ms_p90": 69.07,
+          "ttft_ms_p99": 3268.43,
+          "tpot_ms_p50": 8.37,
+          "tpot_ms_p90": 9.71,
+          "tpot_ms_p99": 14.4,
+          "elapsed_seconds_median": 64.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 37.67,
+          "ttft_ms_p90": 55.4,
+          "ttft_ms_p99": 66.01,
+          "tpot_ms_p50": 11.2,
+          "tpot_ms_p90": 12.6,
+          "tpot_ms_p99": 14.08,
+          "elapsed_seconds_median": 31.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 54.14,
+          "ttft_ms_p90": 78.87,
+          "ttft_ms_p99": 95.15,
+          "tpot_ms_p50": 30.91,
+          "tpot_ms_p90": 37.44,
+          "tpot_ms_p99": 44.41,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 56.58,
+          "ttft_ms_p90": 83.52,
+          "ttft_ms_p99": 102.84,
+          "tpot_ms_p50": 41.53,
+          "tpot_ms_p90": 50.68,
+          "tpot_ms_p99": 75.53,
+          "elapsed_seconds_median": 12.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:01:35",
+    "run_id": "abbf6933",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_abbf6933",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:55:21.326171+00:00",
+    "benchmark_end_time": "2026-04-30T09:01:35.215175+00:00",
+    "benchmark_elapsed_minutes": 6.2,
+    "model_load_seconds": 70.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/result.json
new file mode 100644
index 00000000..59a8d13b
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3994.04,
+          "throughput_tokens_per_sec_per_chip": 3994.04,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3995.3,
+          "throughput_tokens_per_sec_per_chip": 3995.3,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3993.64,
+          "throughput_tokens_per_sec_per_chip": 3993.64,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3995.48,
+          "throughput_tokens_per_sec_per_chip": 3995.48,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 36.12,
+          "ttft_ms_p90": 69.07,
+          "ttft_ms_p99": 3268.43,
+          "tpot_ms_p50": 8.37,
+          "tpot_ms_p90": 9.71,
+          "tpot_ms_p99": 14.4,
+          "elapsed_seconds_median": 64.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 37.67,
+          "ttft_ms_p90": 55.4,
+          "ttft_ms_p99": 66.01,
+          "tpot_ms_p50": 11.2,
+          "tpot_ms_p90": 12.6,
+          "tpot_ms_p99": 14.08,
+          "elapsed_seconds_median": 31.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 54.14,
+          "ttft_ms_p90": 78.87,
+          "ttft_ms_p99": 95.15,
+          "tpot_ms_p50": 30.91,
+          "tpot_ms_p90": 37.44,
+          "tpot_ms_p99": 44.41,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 56.58,
+          "ttft_ms_p90": 83.52,
+          "ttft_ms_p99": 102.84,
+          "tpot_ms_p50": 41.53,
+          "tpot_ms_p90": 50.68,
+          "tpot_ms_p99": 75.53,
+          "elapsed_seconds_median": 12.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 862.6,
+          "tokens_out": 51782,
+          "tokens_in": 0,
+          "requests_completed": 285,
+          "ttft_ms_p50": 34.2,
+          "ttft_ms_p99": 3609.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.6,
+          "tokens_out": 57403,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.0,
+          "ttft_ms_p99": 55.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 964.1,
+          "tokens_out": 57850,
+          "tokens_in": 0,
+          "requests_completed": 315,
+          "ttft_ms_p50": 32.9,
+          "ttft_ms_p99": 49.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 949.6,
+          "tokens_out": 56948,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 52.8
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.8,
+          "tokens_out": 57415,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.1,
+          "ttft_ms_p99": 51.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 959.2,
+          "tokens_out": 57545,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 32.7,
+          "ttft_ms_p99": 50.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.0,
+          "tokens_out": 57358,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.0,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 951.3,
+          "tokens_out": 57077,
+          "tokens_in": 0,
+          "requests_completed": 310,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 50.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.1,
+          "tokens_out": 57391,
+          "tokens_in": 0,
+          "requests_completed": 314,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 51.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 960.7,
+          "tokens_out": 57646,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.1,
+          "ttft_ms_p99": 51.1
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 958.1,
+          "tokens_out": 57460,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.1,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 954.6,
+          "tokens_out": 57271,
+          "tokens_in": 0,
+          "requests_completed": 314,
+          "ttft_ms_p50": 33.5,
+          "ttft_ms_p99": 52.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 954.0,
+          "tokens_out": 57231,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.4,
+          "ttft_ms_p99": 56.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.1,
+          "tokens_out": 57378,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 49.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 949.7,
+      "throttle_ratio": 0.895,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3559.4
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.59,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W8A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:53:38",
+    "run_id": "abbf6933",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_abbf6933",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:51:11.064170+00:00",
+    "benchmark_end_time": "2026-04-30T08:53:38.642090+00:00",
+    "benchmark_elapsed_minutes": 23.7,
+    "model_load_seconds": 69.7,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/offline",
+      "online": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/online",
+      "sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/sustained/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/sustained/result.json
new file mode 100644
index 00000000..4878ff90
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_29a32aea/w8a16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T08:03:04.688337+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tPXB\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tPXB\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 862.6,
+          "tokens_out": 51782,
+          "tokens_in": 0,
+          "requests_completed": 285,
+          "ttft_ms_p50": 34.2,
+          "ttft_ms_p99": 3609.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.6,
+          "tokens_out": 57403,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.0,
+          "ttft_ms_p99": 55.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 964.1,
+          "tokens_out": 57850,
+          "tokens_in": 0,
+          "requests_completed": 315,
+          "ttft_ms_p50": 32.9,
+          "ttft_ms_p99": 49.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 949.6,
+          "tokens_out": 56948,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 52.8
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.8,
+          "tokens_out": 57415,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.1,
+          "ttft_ms_p99": 51.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 959.2,
+          "tokens_out": 57545,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 32.7,
+          "ttft_ms_p99": 50.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.0,
+          "tokens_out": 57358,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.0,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 951.3,
+          "tokens_out": 57077,
+          "tokens_in": 0,
+          "requests_completed": 310,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 50.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.1,
+          "tokens_out": 57391,
+          "tokens_in": 0,
+          "requests_completed": 314,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 51.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 960.7,
+          "tokens_out": 57646,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.1,
+          "ttft_ms_p99": 51.1
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 958.1,
+          "tokens_out": 57460,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.1,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 954.6,
+          "tokens_out": 57271,
+          "tokens_in": 0,
+          "requests_completed": 314,
+          "ttft_ms_p50": 33.5,
+          "ttft_ms_p99": 52.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 954.0,
+          "tokens_out": 57231,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.4,
+          "ttft_ms_p99": 56.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.1,
+          "tokens_out": 57378,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 49.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 949.7,
+      "throttle_ratio": 0.895,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3559.4
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:18:39",
+    "run_id": "abbf6933",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_abbf6933",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T09:03:36.171327+00:00",
+    "benchmark_end_time": "2026-04-30T09:18:39.104477+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 83.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/accuracy/accuracy.json
new file mode 100644
index 00000000..37f4d824
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/env_info.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/env_info.json
new file mode 100644
index 00000000..813ac789
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-05-09T07:24:11.439226+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/interactive/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/interactive/result.json
new file mode 100644
index 00000000..c2ab1376
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T07:24:11.439226+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 2971.73,
+      "ttft_ms_p90": 3094.21,
+      "ttft_ms_p99": 3156.35,
+      "tpot_ms_p50": 13.3,
+      "tpot_ms_p90": 13.34,
+      "tpot_ms_p99": 13.37,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 599.3
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "08:11:09",
+    "run_id": "4f45791f",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T07:51:10.821530+00:00",
+    "benchmark_end_time": "2026-05-09T08:11:09.370299+00:00",
+    "benchmark_elapsed_minutes": 20.0,
+    "model_load_seconds": 136.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/offline/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/offline/result.json
new file mode 100644
index 00000000..ad6ea168
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/offline/result.json
@@ -0,0 +1,154 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T07:24:11.439226+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 69.59,
+          "throughput_tokens_per_sec_per_chip": 69.59,
+          "elapsed_seconds_median": 184.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 69.6,
+          "throughput_tokens_per_sec_per_chip": 69.6,
+          "elapsed_seconds_median": 184.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "07:47:42",
+    "run_id": "4f45791f",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T07:29:11.140810+00:00",
+    "benchmark_end_time": "2026-05-09T07:47:42.068128+00:00",
+    "benchmark_elapsed_minutes": 18.5,
+    "model_load_seconds": 80.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/online/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/online/result.json
new file mode 100644
index 00000000..a86a4c36
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/online/result.json
@@ -0,0 +1,168 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T07:24:11.439226+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 83128.05,
+          "ttft_ms_p90": 154238.99,
+          "ttft_ms_p99": 170546.4,
+          "tpot_ms_p50": 128.57,
+          "tpot_ms_p90": 211.06,
+          "tpot_ms_p99": 226.9,
+          "elapsed_seconds_median": 362.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 116443.21,
+          "ttft_ms_p90": 209635.11,
+          "ttft_ms_p99": 237132.86,
+          "tpot_ms_p50": 128.45,
+          "tpot_ms_p90": 210.75,
+          "tpot_ms_p99": 226.84,
+          "elapsed_seconds_median": 358.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 150859.34,
+          "ttft_ms_p90": 267250.36,
+          "ttft_ms_p99": 298587.01,
+          "tpot_ms_p50": 128.52,
+          "tpot_ms_p90": 210.81,
+          "tpot_ms_p99": 227.0,
+          "elapsed_seconds_median": 356.2,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "09:24:50",
+    "run_id": "4f45791f",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T08:48:55.526410+00:00",
+    "benchmark_end_time": "2026-05-09T09:24:50.718566+00:00",
+    "benchmark_elapsed_minutes": 35.9,
+    "model_load_seconds": 160.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/result.json
new file mode 100644
index 00000000..5eb9289c
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/result.json
@@ -0,0 +1,551 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T07:24:11.439226+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "interactive",
+      "sustained",
+      "online",
+      "speculative"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 2,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 69.59,
+          "throughput_tokens_per_sec_per_chip": 69.59,
+          "elapsed_seconds_median": 184.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 69.6,
+          "throughput_tokens_per_sec_per_chip": 69.6,
+          "elapsed_seconds_median": 184.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 2971.73,
+      "ttft_ms_p90": 3094.21,
+      "ttft_ms_p99": 3156.35,
+      "tpot_ms_p50": 13.3,
+      "tpot_ms_p90": 13.34,
+      "tpot_ms_p99": 13.37,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 599.3
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 30.0,
+          "tokens_out": 1800,
+          "tokens_in": 0,
+          "requests_completed": 8,
+          "ttft_ms_p50": 21060.2,
+          "ttft_ms_p99": 30620.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12973.6,
+          "ttft_ms_p99": 23038.0
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12726.7,
+          "ttft_ms_p99": 22480.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12784.9,
+          "ttft_ms_p99": 22623.8
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13034.8,
+          "ttft_ms_p99": 23137.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12580.3,
+          "ttft_ms_p99": 22602.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 89.4,
+          "tokens_out": 5400,
+          "tokens_in": 0,
+          "requests_completed": 24,
+          "ttft_ms_p50": 12807.2,
+          "ttft_ms_p99": 22623.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.2,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13020.5,
+          "ttft_ms_p99": 23312.8
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.2,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12557.1,
+          "ttft_ms_p99": 22757.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12436.6,
+          "ttft_ms_p99": 22553.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13014.2,
+          "ttft_ms_p99": 23107.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12519.8,
+          "ttft_ms_p99": 22658.9
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12681.3,
+          "ttft_ms_p99": 22718.9
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13039.8,
+          "ttft_ms_p99": 23137.1
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12753.6,
+          "ttft_ms_p99": 22520.4
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12791.4,
+          "ttft_ms_p99": 22658.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13051.0,
+          "ttft_ms_p99": 23138.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12589.1,
+          "ttft_ms_p99": 22595.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12810.6,
+          "ttft_ms_p99": 22597.2
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13023.1,
+          "ttft_ms_p99": 23253.2
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12585.5,
+          "ttft_ms_p99": 22615.8
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12514.3,
+          "ttft_ms_p99": 22785.1
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12850.5,
+          "ttft_ms_p99": 23035.8
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12578.8,
+          "ttft_ms_p99": 22827.1
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12602.0,
+          "ttft_ms_p99": 22692.5
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12942.4,
+          "ttft_ms_p99": 22964.2
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12986.3,
+          "ttft_ms_p99": 23014.5
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12792.2,
+          "ttft_ms_p99": 22573.6
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12968.9,
+          "ttft_ms_p99": 22869.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 61.1,
+      "throttle_ratio": 0.671,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": -168.2
+    },
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 83128.05,
+          "ttft_ms_p90": 154238.99,
+          "ttft_ms_p99": 170546.4,
+          "tpot_ms_p50": 128.57,
+          "tpot_ms_p90": 211.06,
+          "tpot_ms_p99": 226.9,
+          "elapsed_seconds_median": 362.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 116443.21,
+          "ttft_ms_p90": 209635.11,
+          "ttft_ms_p99": 237132.86,
+          "tpot_ms_p50": 128.45,
+          "tpot_ms_p90": 210.75,
+          "tpot_ms_p99": 226.84,
+          "elapsed_seconds_median": 358.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 150859.34,
+          "ttft_ms_p90": 267250.36,
+          "ttft_ms_p99": 298587.01,
+          "tpot_ms_p50": 128.52,
+          "tpot_ms_p90": 210.81,
+          "tpot_ms_p99": 227.0,
+          "elapsed_seconds_median": 356.2,
+          "sla_met": false
+        }
+      ]
+    },
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 45.04,
+          "throughput_tokens_per_sec_per_chip": 45.04,
+          "elapsed_seconds_median": 285.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 45.04,
+          "throughput_tokens_per_sec_per_chip": 45.04,
+          "elapsed_seconds_median": 285.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "07:47:42",
+    "run_id": "4f45791f",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T07:29:11.140810+00:00",
+    "benchmark_end_time": "2026-05-09T07:47:42.068128+00:00",
+    "benchmark_elapsed_minutes": 133.6,
+    "model_load_seconds": 80.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'interactive', 'sustained', 'online', 'speculative'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/offline",
+      "interactive": "results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/interactive",
+      "sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/sustained",
+      "online": "results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/online",
+      "speculative": "results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/speculative"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/speculative/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/speculative/result.json
new file mode 100644
index 00000000..ab653100
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/speculative/result.json
@@ -0,0 +1,154 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T07:24:11.439226+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "speculative",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 45.04,
+          "throughput_tokens_per_sec_per_chip": 45.04,
+          "elapsed_seconds_median": 285.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 45.04,
+          "throughput_tokens_per_sec_per_chip": 45.04,
+          "elapsed_seconds_median": 285.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "09:57:54",
+    "run_id": "4f45791f",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T09:29:15.832962+00:00",
+    "benchmark_end_time": "2026-05-09T09:57:54.181380+00:00",
+    "benchmark_elapsed_minutes": 28.6,
+    "model_load_seconds": 172.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/sustained/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/sustained/result.json
new file mode 100644
index 00000000..bb68a8c6
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f/sustained/result.json
@@ -0,0 +1,428 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T07:24:11.439226+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 30.0,
+          "tokens_out": 1800,
+          "tokens_in": 0,
+          "requests_completed": 8,
+          "ttft_ms_p50": 21060.2,
+          "ttft_ms_p99": 30620.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12973.6,
+          "ttft_ms_p99": 23038.0
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12726.7,
+          "ttft_ms_p99": 22480.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12784.9,
+          "ttft_ms_p99": 22623.8
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13034.8,
+          "ttft_ms_p99": 23137.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12580.3,
+          "ttft_ms_p99": 22602.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 89.4,
+          "tokens_out": 5400,
+          "tokens_in": 0,
+          "requests_completed": 24,
+          "ttft_ms_p50": 12807.2,
+          "ttft_ms_p99": 22623.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.2,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13020.5,
+          "ttft_ms_p99": 23312.8
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.2,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12557.1,
+          "ttft_ms_p99": 22757.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12436.6,
+          "ttft_ms_p99": 22553.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13014.2,
+          "ttft_ms_p99": 23107.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12519.8,
+          "ttft_ms_p99": 22658.9
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12681.3,
+          "ttft_ms_p99": 22718.9
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13039.8,
+          "ttft_ms_p99": 23137.1
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12753.6,
+          "ttft_ms_p99": 22520.4
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12791.4,
+          "ttft_ms_p99": 22658.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13051.0,
+          "ttft_ms_p99": 23138.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12589.1,
+          "ttft_ms_p99": 22595.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12810.6,
+          "ttft_ms_p99": 22597.2
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13023.1,
+          "ttft_ms_p99": 23253.2
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12585.5,
+          "ttft_ms_p99": 22615.8
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12514.3,
+          "ttft_ms_p99": 22785.1
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12850.5,
+          "ttft_ms_p99": 23035.8
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12578.8,
+          "ttft_ms_p99": 22827.1
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12602.0,
+          "ttft_ms_p99": 22692.5
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12942.4,
+          "ttft_ms_p99": 22964.2
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12986.3,
+          "ttft_ms_p99": 23014.5
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12792.2,
+          "ttft_ms_p99": 22573.6
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12968.9,
+          "ttft_ms_p99": 22869.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 61.1,
+      "throttle_ratio": 0.671,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": -168.2
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "08:45:00",
+    "run_id": "4f45791f",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_4f45791f",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T08:14:26.663896+00:00",
+    "benchmark_end_time": "2026-05-09T08:45:00.753253+00:00",
+    "benchmark_elapsed_minutes": 30.6,
+    "model_load_seconds": 130.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/accuracy/accuracy.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/accuracy/accuracy.json
similarity index 100%
rename from results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/accuracy/accuracy.json
rename to results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/accuracy/accuracy.json
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/env_info.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/env_info.json
new file mode 100644
index 00000000..df3069ce
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-05-09T05:17:28.177530+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/interactive/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/interactive/result.json
new file mode 100644
index 00000000..4fb147b5
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:17:28.177530+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 17.46,
+      "ttft_ms_p90": 18.77,
+      "ttft_ms_p99": 22.71,
+      "tpot_ms_p50": 1.76,
+      "tpot_ms_p90": 1.78,
+      "tpot_ms_p99": 1.85,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 53.3
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "05:30:27",
+    "run_id": "280f3db2",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:27:45.855974+00:00",
+    "benchmark_end_time": "2026-05-09T05:30:27.695194+00:00",
+    "benchmark_elapsed_minutes": 2.7,
+    "model_load_seconds": 66.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/offline/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/offline/result.json
new file mode 100644
index 00000000..cb12a852
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/offline/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:17:28.177530+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 34907.16,
+          "throughput_tokens_per_sec_per_chip": 34907.16,
+          "elapsed_seconds_median": 1.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 35318.72,
+          "throughput_tokens_per_sec_per_chip": 35318.72,
+          "elapsed_seconds_median": 1.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 35341.25,
+          "throughput_tokens_per_sec_per_chip": 35341.25,
+          "elapsed_seconds_median": 1.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "05:22:34",
+    "run_id": "280f3db2",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:22:15.032061+00:00",
+    "benchmark_end_time": "2026-05-09T05:22:34.478538+00:00",
+    "benchmark_elapsed_minutes": 0.3,
+    "model_load_seconds": 60.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/online/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/online/result.json
new file mode 100644
index 00000000..cece4dc2
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/online/result.json
@@ -0,0 +1,156 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:17:28.177530+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 19.48,
+          "ttft_ms_p90": 32.59,
+          "ttft_ms_p99": 2242.09,
+          "tpot_ms_p50": 2.43,
+          "tpot_ms_p90": 2.85,
+          "tpot_ms_p99": 6.7,
+          "elapsed_seconds_median": 31.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 28.79,
+          "ttft_ms_p90": 37.57,
+          "ttft_ms_p99": 44.56,
+          "tpot_ms_p50": 4.56,
+          "tpot_ms_p90": 5.59,
+          "tpot_ms_p99": 9.8,
+          "elapsed_seconds_median": 7.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "05:26:01",
+    "run_id": "280f3db2",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:24:02.675359+00:00",
+    "benchmark_end_time": "2026-05-09T05:26:01.825237+00:00",
+    "benchmark_elapsed_minutes": 2.0,
+    "model_load_seconds": 55.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/result.json
new file mode 100644
index 00000000..4e34a091
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/result.json
@@ -0,0 +1,371 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:17:28.177530+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 34907.16,
+          "throughput_tokens_per_sec_per_chip": 34907.16,
+          "elapsed_seconds_median": 1.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 35318.72,
+          "throughput_tokens_per_sec_per_chip": 35318.72,
+          "elapsed_seconds_median": 1.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 35341.25,
+          "throughput_tokens_per_sec_per_chip": 35341.25,
+          "elapsed_seconds_median": 1.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 19.48,
+          "ttft_ms_p90": 32.59,
+          "ttft_ms_p99": 2242.09,
+          "tpot_ms_p50": 2.43,
+          "tpot_ms_p90": 2.85,
+          "tpot_ms_p99": 6.7,
+          "elapsed_seconds_median": 31.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 28.79,
+          "ttft_ms_p90": 37.57,
+          "ttft_ms_p99": 44.56,
+          "tpot_ms_p50": 4.56,
+          "tpot_ms_p90": 5.59,
+          "tpot_ms_p99": 9.8,
+          "elapsed_seconds_median": 7.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 17.46,
+      "ttft_ms_p90": 18.77,
+      "ttft_ms_p99": 22.71,
+      "tpot_ms_p50": 1.76,
+      "tpot_ms_p90": 1.78,
+      "tpot_ms_p99": 1.85,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 53.3
+    },
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7020.2,
+          "tokens_out": 421272,
+          "tokens_in": 0,
+          "requests_completed": 2260,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 3103.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7497.5,
+          "tokens_out": 449990,
+          "tokens_in": 0,
+          "requests_completed": 2402,
+          "ttft_ms_p50": 25.1,
+          "ttft_ms_p99": 44.6
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7463.0,
+          "tokens_out": 447695,
+          "tokens_in": 0,
+          "requests_completed": 2397,
+          "ttft_ms_p50": 22.7,
+          "ttft_ms_p99": 42.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7409.6,
+          "tokens_out": 444555,
+          "tokens_in": 0,
+          "requests_completed": 2372,
+          "ttft_ms_p50": 23.2,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7414.8,
+          "tokens_out": 445029,
+          "tokens_in": 0,
+          "requests_completed": 2374,
+          "ttft_ms_p50": 22.1,
+          "ttft_ms_p99": 42.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7398.7,
+          "tokens_out": 443888,
+          "tokens_in": 0,
+          "requests_completed": 2366,
+          "ttft_ms_p50": 22.9,
+          "ttft_ms_p99": 43.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7454.4,
+          "tokens_out": 447128,
+          "tokens_in": 0,
+          "requests_completed": 2389,
+          "ttft_ms_p50": 22.5,
+          "ttft_ms_p99": 43.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7395.5,
+          "tokens_out": 443937,
+          "tokens_in": 0,
+          "requests_completed": 2372,
+          "ttft_ms_p50": 22.8,
+          "ttft_ms_p99": 43.1
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7477.0,
+          "tokens_out": 448440,
+          "tokens_in": 0,
+          "requests_completed": 2398,
+          "ttft_ms_p50": 26.0,
+          "ttft_ms_p99": 43.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7481.1,
+          "tokens_out": 449028,
+          "tokens_in": 0,
+          "requests_completed": 2403,
+          "ttft_ms_p50": 22.7,
+          "ttft_ms_p99": 44.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7420.7,
+          "tokens_out": 445294,
+          "tokens_in": 0,
+          "requests_completed": 2376,
+          "ttft_ms_p50": 21.6,
+          "ttft_ms_p99": 44.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7434.3,
+          "tokens_out": 446024,
+          "tokens_in": 0,
+          "requests_completed": 2386,
+          "ttft_ms_p50": 24.1,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7485.4,
+          "tokens_out": 448857,
+          "tokens_in": 0,
+          "requests_completed": 2392,
+          "ttft_ms_p50": 22.7,
+          "ttft_ms_p99": 43.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7420.0,
+          "tokens_out": 445468,
+          "tokens_in": 0,
+          "requests_completed": 2379,
+          "ttft_ms_p50": 22.4,
+          "ttft_ms_p99": 43.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 7412.3,
+      "throttle_ratio": 0.936,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -3059.6
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.41,
+    "baseline_delta": 0.03,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "05:22:34",
+    "run_id": "280f3db2",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:22:15.032061+00:00",
+    "benchmark_end_time": "2026-05-09T05:22:34.478538+00:00",
+    "benchmark_elapsed_minutes": 20.0,
+    "model_load_seconds": 60.4,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/offline",
+      "online": "results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/online",
+      "interactive": "results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/interactive",
+      "sustained": "results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/sustained/result.json b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/sustained/result.json
new file mode 100644
index 00000000..0c601e54
--- /dev/null
+++ b/results/community/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-09T05:17:28.177530+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\tNODE\t64-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\tSYS\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\tSYS\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \tSYS\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tSYS\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7020.2,
+          "tokens_out": 421272,
+          "tokens_in": 0,
+          "requests_completed": 2260,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 3103.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7497.5,
+          "tokens_out": 449990,
+          "tokens_in": 0,
+          "requests_completed": 2402,
+          "ttft_ms_p50": 25.1,
+          "ttft_ms_p99": 44.6
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7463.0,
+          "tokens_out": 447695,
+          "tokens_in": 0,
+          "requests_completed": 2397,
+          "ttft_ms_p50": 22.7,
+          "ttft_ms_p99": 42.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7409.6,
+          "tokens_out": 444555,
+          "tokens_in": 0,
+          "requests_completed": 2372,
+          "ttft_ms_p50": 23.2,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7414.8,
+          "tokens_out": 445029,
+          "tokens_in": 0,
+          "requests_completed": 2374,
+          "ttft_ms_p50": 22.1,
+          "ttft_ms_p99": 42.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7398.7,
+          "tokens_out": 443888,
+          "tokens_in": 0,
+          "requests_completed": 2366,
+          "ttft_ms_p50": 22.9,
+          "ttft_ms_p99": 43.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7454.4,
+          "tokens_out": 447128,
+          "tokens_in": 0,
+          "requests_completed": 2389,
+          "ttft_ms_p50": 22.5,
+          "ttft_ms_p99": 43.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7395.5,
+          "tokens_out": 443937,
+          "tokens_in": 0,
+          "requests_completed": 2372,
+          "ttft_ms_p50": 22.8,
+          "ttft_ms_p99": 43.1
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7477.0,
+          "tokens_out": 448440,
+          "tokens_in": 0,
+          "requests_completed": 2398,
+          "ttft_ms_p50": 26.0,
+          "ttft_ms_p99": 43.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7481.1,
+          "tokens_out": 449028,
+          "tokens_in": 0,
+          "requests_completed": 2403,
+          "ttft_ms_p50": 22.7,
+          "ttft_ms_p99": 44.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7420.7,
+          "tokens_out": 445294,
+          "tokens_in": 0,
+          "requests_completed": 2376,
+          "ttft_ms_p50": 21.6,
+          "ttft_ms_p99": 44.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7434.3,
+          "tokens_out": 446024,
+          "tokens_in": 0,
+          "requests_completed": 2386,
+          "ttft_ms_p50": 24.1,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7485.4,
+          "tokens_out": 448857,
+          "tokens_in": 0,
+          "requests_completed": 2392,
+          "ttft_ms_p50": 22.7,
+          "ttft_ms_p99": 43.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7420.0,
+          "tokens_out": 445468,
+          "tokens_in": 0,
+          "requests_completed": 2379,
+          "ttft_ms_p50": 22.4,
+          "ttft_ms_p99": 43.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 7412.3,
+      "throttle_ratio": 0.936,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -3059.6
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-09",
+    "time": "05:47:01",
+    "run_id": "280f3db2",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_280f3db2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-09T05:31:59.919623+00:00",
+    "benchmark_end_time": "2026-05-09T05:47:01.325708+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 55.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/accuracy/accuracy.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/accuracy/accuracy.json
new file mode 100644
index 00000000..5b260195
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.61,
+  "baseline_delta": 0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/burst/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/burst/result.json
new file mode 100644
index 00000000..87444f16
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/burst/result.json
@@ -0,0 +1,164 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:15:18.766397+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "burst",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 39.79,
+      "steady_ttft_p99_ms": 88.73,
+      "burst_ttft_p50_ms": 56.4,
+      "burst_ttft_p99_ms": 91.21,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 1.028,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 765.97,
+          "burst_ttft_p99_ms": 94.46
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 60.79,
+          "burst_ttft_p99_ms": 90.42
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 62.08,
+          "burst_ttft_p99_ms": 89.13
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:39:28",
+    "run_id": "93928a91",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:31:37.700771+00:00",
+    "benchmark_end_time": "2026-05-07T08:39:28.136330+00:00",
+    "benchmark_elapsed_minutes": 7.8,
+    "model_load_seconds": 55.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/env_info.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/env_info.json
new file mode 100644
index 00000000..bf163b17
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-05-07T07:15:18.766397+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A800-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7763 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/interactive/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/interactive/result.json
new file mode 100644
index 00000000..70d8e422
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:15:18.766397+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 31.14,
+      "ttft_ms_p90": 42.5,
+      "ttft_ms_p99": 58.74,
+      "tpot_ms_p50": 11.0,
+      "tpot_ms_p90": 11.05,
+      "tpot_ms_p99": 11.1,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 325.3
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "07:45:43",
+    "run_id": "93928a91",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T07:29:25.547148+00:00",
+    "benchmark_end_time": "2026-05-07T07:45:43.129744+00:00",
+    "benchmark_elapsed_minutes": 16.3,
+    "model_load_seconds": 61.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/offline/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/offline/result.json
new file mode 100644
index 00000000..a0d2d653
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/offline/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:15:18.766397+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 3808.98,
+          "throughput_tokens_per_sec_per_chip": 3808.98,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 3850.31,
+          "throughput_tokens_per_sec_per_chip": 3850.31,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 3846.1,
+          "throughput_tokens_per_sec_per_chip": 3846.1,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "07:21:41",
+    "run_id": "93928a91",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T07:19:47.324731+00:00",
+    "benchmark_end_time": "2026-05-07T07:21:41.048200+00:00",
+    "benchmark_elapsed_minutes": 1.9,
+    "model_load_seconds": 59.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/online/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/online/result.json
new file mode 100644
index 00000000..5b921271
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/online/result.json
@@ -0,0 +1,168 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:15:18.766397+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 100,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.79,
+          "ttft_ms_p90": 59.82,
+          "ttft_ms_p99": 1339.64,
+          "tpot_ms_p50": 12.88,
+          "tpot_ms_p90": 14.51,
+          "tpot_ms_p99": 15.86,
+          "elapsed_seconds_median": 65.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.32,
+          "ttft_ms_p90": 74.21,
+          "ttft_ms_p99": 86.11,
+          "tpot_ms_p50": 30.61,
+          "tpot_ms_p90": 35.77,
+          "tpot_ms_p99": 43.52,
+          "elapsed_seconds_median": 16.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 52.52,
+          "ttft_ms_p90": 70.44,
+          "ttft_ms_p99": 166.6,
+          "tpot_ms_p50": 38.9,
+          "tpot_ms_p90": 51.41,
+          "tpot_ms_p99": 138.25,
+          "elapsed_seconds_median": 10.2,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "07:27:56",
+    "run_id": "93928a91",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T07:23:22.713485+00:00",
+    "benchmark_end_time": "2026-05-07T07:27:56.704379+00:00",
+    "benchmark_elapsed_minutes": 4.6,
+    "model_load_seconds": 73.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/result.json
new file mode 100644
index 00000000..e58bf7fa
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/result.json
@@ -0,0 +1,615 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:15:18.766397+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained",
+      "speculative",
+      "burst"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 3808.98,
+          "throughput_tokens_per_sec_per_chip": 3808.98,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 3850.31,
+          "throughput_tokens_per_sec_per_chip": 3850.31,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 3846.1,
+          "throughput_tokens_per_sec_per_chip": 3846.1,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 100,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.79,
+          "ttft_ms_p90": 59.82,
+          "ttft_ms_p99": 1339.64,
+          "tpot_ms_p50": 12.88,
+          "tpot_ms_p90": 14.51,
+          "tpot_ms_p99": 15.86,
+          "elapsed_seconds_median": 65.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.32,
+          "ttft_ms_p90": 74.21,
+          "ttft_ms_p99": 86.11,
+          "tpot_ms_p50": 30.61,
+          "tpot_ms_p90": 35.77,
+          "tpot_ms_p99": 43.52,
+          "elapsed_seconds_median": 16.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 52.52,
+          "ttft_ms_p90": 70.44,
+          "ttft_ms_p99": 166.6,
+          "tpot_ms_p50": 38.9,
+          "tpot_ms_p90": 51.41,
+          "tpot_ms_p99": 138.25,
+          "elapsed_seconds_median": 10.2,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 31.14,
+      "ttft_ms_p90": 42.5,
+      "ttft_ms_p99": 58.74,
+      "tpot_ms_p50": 11.0,
+      "tpot_ms_p90": 11.05,
+      "tpot_ms_p99": 11.1,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 325.3
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 628.4,
+          "tokens_out": 37735,
+          "tokens_in": 0,
+          "requests_completed": 201,
+          "ttft_ms_p50": 41.3,
+          "ttft_ms_p99": 1767.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.1,
+          "tokens_out": 39719,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.7,
+          "tokens_out": 39696,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.7,
+          "tokens_out": 39883,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 55.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.5,
+          "tokens_out": 39875,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.0,
+          "tokens_out": 40332,
+          "tokens_in": 0,
+          "requests_completed": 215,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.2,
+          "tokens_out": 39719,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.5,
+          "ttft_ms_p99": 57.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.9,
+          "tokens_out": 39843,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.4,
+          "tokens_out": 39839,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.3,
+          "tokens_out": 39692,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.5
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.0,
+          "tokens_out": 39660,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.8,
+          "tokens_out": 39901,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.5,
+          "tokens_out": 39869,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.2,
+          "tokens_out": 40332,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 57.8
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.5,
+          "tokens_out": 39785,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p99": 58.3
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.4,
+          "tokens_out": 39925,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.3,
+          "tokens_out": 39683,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 667.4,
+          "tokens_out": 40046,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.9
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39630,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 49.5
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39633,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.8,
+          "tokens_out": 39891,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.5
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.7,
+          "tokens_out": 39946,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.8,
+          "ttft_ms_p99": 56.4
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.7,
+          "tokens_out": 40114,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.8,
+          "tokens_out": 39762,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.7,
+          "tokens_out": 40006,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.7,
+          "tokens_out": 39882,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 40.3,
+          "ttft_ms_p99": 58.9
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.0,
+          "tokens_out": 39843,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.4,
+          "ttft_ms_p99": 57.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39623,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.8,
+          "ttft_ms_p99": 47.3
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.4,
+          "tokens_out": 39654,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 56.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 664.1,
+      "throttle_ratio": 0.982,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2.0
+    },
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 738.63,
+          "throughput_tokens_per_sec_per_chip": 738.63,
+          "elapsed_seconds_median": 46.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 739.28,
+          "throughput_tokens_per_sec_per_chip": 739.28,
+          "elapsed_seconds_median": 46.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 738.93,
+          "throughput_tokens_per_sec_per_chip": 738.93,
+          "elapsed_seconds_median": 46.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 39.79,
+      "steady_ttft_p99_ms": 88.73,
+      "burst_ttft_p50_ms": 56.4,
+      "burst_ttft_p99_ms": 91.21,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 1.028,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 765.97,
+          "burst_ttft_p99_ms": 94.46
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 60.79,
+          "burst_ttft_p99_ms": 90.42
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 62.08,
+          "burst_ttft_p99_ms": 89.13
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.61,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "07:21:41",
+    "run_id": "93928a91",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T07:19:47.324731+00:00",
+    "benchmark_end_time": "2026-05-07T07:21:41.048200+00:00",
+    "benchmark_elapsed_minutes": 70.1,
+    "model_load_seconds": 59.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained', 'speculative', 'burst'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/offline",
+      "online": "results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/online",
+      "interactive": "results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/interactive",
+      "sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/sustained",
+      "speculative": "results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/speculative",
+      "burst": "results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/burst"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/speculative/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/speculative/result.json
new file mode 100644
index 00000000..a4acbb9e
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/speculative/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:15:18.766397+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "speculative",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 738.63,
+          "throughput_tokens_per_sec_per_chip": 738.63,
+          "elapsed_seconds_median": 46.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 739.28,
+          "throughput_tokens_per_sec_per_chip": 739.28,
+          "elapsed_seconds_median": 46.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 738.93,
+          "throughput_tokens_per_sec_per_chip": 738.93,
+          "elapsed_seconds_median": 46.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:30:19",
+    "run_id": "93928a91",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:20:54.145018+00:00",
+    "benchmark_end_time": "2026-05-07T08:30:19.120188+00:00",
+    "benchmark_elapsed_minutes": 9.4,
+    "model_load_seconds": 166.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/sustained/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/sustained/result.json
new file mode 100644
index 00000000..f5d98b09
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91/sustained/result.json
@@ -0,0 +1,428 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:15:18.766397+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 628.4,
+          "tokens_out": 37735,
+          "tokens_in": 0,
+          "requests_completed": 201,
+          "ttft_ms_p50": 41.3,
+          "ttft_ms_p99": 1767.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.1,
+          "tokens_out": 39719,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.7,
+          "tokens_out": 39696,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.7,
+          "tokens_out": 39883,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 55.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.5,
+          "tokens_out": 39875,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.0,
+          "tokens_out": 40332,
+          "tokens_in": 0,
+          "requests_completed": 215,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.2,
+          "tokens_out": 39719,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.5,
+          "ttft_ms_p99": 57.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.9,
+          "tokens_out": 39843,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.4,
+          "tokens_out": 39839,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.3,
+          "tokens_out": 39692,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.5
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.0,
+          "tokens_out": 39660,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.8,
+          "tokens_out": 39901,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.5,
+          "tokens_out": 39869,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.2,
+          "tokens_out": 40332,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 57.8
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.5,
+          "tokens_out": 39785,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p99": 58.3
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.4,
+          "tokens_out": 39925,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.3,
+          "tokens_out": 39683,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 58.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 667.4,
+          "tokens_out": 40046,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.9
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39630,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 49.5
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39633,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.8,
+          "tokens_out": 39891,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.5
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 665.7,
+          "tokens_out": 39946,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.8,
+          "ttft_ms_p99": 56.4
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.7,
+          "tokens_out": 40114,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p99": 58.2
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.8,
+          "tokens_out": 39762,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.7,
+          "tokens_out": 40006,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.7,
+          "tokens_out": 39882,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 40.3,
+          "ttft_ms_p99": 58.9
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 664.0,
+          "tokens_out": 39843,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.4,
+          "ttft_ms_p99": 57.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39623,
+          "tokens_in": 0,
+          "requests_completed": 210,
+          "ttft_ms_p50": 39.8,
+          "ttft_ms_p99": 47.3
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.4,
+          "tokens_out": 39654,
+          "tokens_in": 0,
+          "requests_completed": 209,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 56.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 664.1,
+      "throttle_ratio": 0.982,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2.0
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:17:02",
+    "run_id": "93928a91",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_A_nvidia_sglang_c43a8309_93928a91",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T07:46:58.710502+00:00",
+    "benchmark_end_time": "2026-05-07T08:17:02.619499+00:00",
+    "benchmark_elapsed_minutes": 30.1,
+    "model_load_seconds": 51.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/accuracy/accuracy.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/accuracy/accuracy.json
new file mode 100644
index 00000000..37f4d824
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/offline/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/offline/result.json
new file mode 100644
index 00000000..ef48e8b5
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3839.22,
+          "throughput_tokens_per_sec_per_chip": 3839.22,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3837.82,
+          "throughput_tokens_per_sec_per_chip": 3837.82,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3860.62,
+          "throughput_tokens_per_sec_per_chip": 3860.62,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3841.02,
+          "throughput_tokens_per_sec_per_chip": 3841.02,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:47:27",
+    "run_id": "8eb86278",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:44:56.309394+00:00",
+    "benchmark_end_time": "2026-05-07T08:47:27.583870+00:00",
+    "benchmark_elapsed_minutes": 2.5,
+    "model_load_seconds": 54.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/online/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/online/result.json
new file mode 100644
index 00000000..d9959965
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.62,
+          "ttft_ms_p90": 60.95,
+          "ttft_ms_p99": 2890.67,
+          "tpot_ms_p50": 12.83,
+          "tpot_ms_p90": 14.52,
+          "tpot_ms_p99": 16.44,
+          "elapsed_seconds_median": 65.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 42.3,
+          "ttft_ms_p90": 58.83,
+          "ttft_ms_p99": 67.63,
+          "tpot_ms_p50": 16.09,
+          "tpot_ms_p90": 18.1,
+          "tpot_ms_p99": 19.77,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.31,
+          "ttft_ms_p90": 74.03,
+          "ttft_ms_p99": 87.69,
+          "tpot_ms_p50": 30.28,
+          "tpot_ms_p90": 35.96,
+          "tpot_ms_p99": 41.34,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 53.07,
+          "ttft_ms_p90": 76.66,
+          "ttft_ms_p99": 94.56,
+          "tpot_ms_p50": 37.4,
+          "tpot_ms_p90": 42.77,
+          "tpot_ms_p99": 61.19,
+          "elapsed_seconds_median": 12.2,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:55:45",
+    "run_id": "8eb86278",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:49:27.978151+00:00",
+    "benchmark_end_time": "2026-05-07T08:55:45.393807+00:00",
+    "benchmark_elapsed_minutes": 6.3,
+    "model_load_seconds": 86.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/result.json
new file mode 100644
index 00000000..fc617f64
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3839.22,
+          "throughput_tokens_per_sec_per_chip": 3839.22,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3837.82,
+          "throughput_tokens_per_sec_per_chip": 3837.82,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3860.62,
+          "throughput_tokens_per_sec_per_chip": 3860.62,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3841.02,
+          "throughput_tokens_per_sec_per_chip": 3841.02,
+          "elapsed_seconds_median": 9.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.62,
+          "ttft_ms_p90": 60.95,
+          "ttft_ms_p99": 2890.67,
+          "tpot_ms_p50": 12.83,
+          "tpot_ms_p90": 14.52,
+          "tpot_ms_p99": 16.44,
+          "elapsed_seconds_median": 65.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 42.3,
+          "ttft_ms_p90": 58.83,
+          "ttft_ms_p99": 67.63,
+          "tpot_ms_p50": 16.09,
+          "tpot_ms_p90": 18.1,
+          "tpot_ms_p99": 19.77,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.31,
+          "ttft_ms_p90": 74.03,
+          "ttft_ms_p99": 87.69,
+          "tpot_ms_p50": 30.28,
+          "tpot_ms_p90": 35.96,
+          "tpot_ms_p99": 41.34,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 53.07,
+          "ttft_ms_p90": 76.66,
+          "ttft_ms_p99": 94.56,
+          "tpot_ms_p50": 37.4,
+          "tpot_ms_p90": 42.77,
+          "tpot_ms_p99": 61.19,
+          "elapsed_seconds_median": 12.2,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 581.4,
+          "tokens_out": 34896,
+          "tokens_in": 0,
+          "requests_completed": 191,
+          "ttft_ms_p50": 41.3,
+          "ttft_ms_p99": 5888.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 650.0,
+          "tokens_out": 39002,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 56.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 675.0,
+          "tokens_out": 40500,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 49.7
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 651.6,
+          "tokens_out": 39092,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 39.7,
+          "ttft_ms_p99": 56.5
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 667.4,
+          "tokens_out": 40053,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 58.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39637,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 59.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.3,
+          "tokens_out": 40332,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.6,
+          "ttft_ms_p99": 57.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.7,
+          "tokens_out": 39336,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 55.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.6,
+          "tokens_out": 40119,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 52.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 654.4,
+          "tokens_out": 39267,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.8,
+          "ttft_ms_p99": 59.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.6,
+          "tokens_out": 39823,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 656.1,
+          "tokens_out": 39344,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.9,
+          "tokens_out": 40024,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.6
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.7,
+          "tokens_out": 39767,
+          "tokens_in": 0,
+          "requests_completed": 215,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 656.2,
+      "throttle_ratio": 0.861,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -5832.1
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:47:27",
+    "run_id": "8eb86278",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:44:56.309394+00:00",
+    "benchmark_end_time": "2026-05-07T08:47:27.583870+00:00",
+    "benchmark_elapsed_minutes": 23.9,
+    "model_load_seconds": 54.4,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/offline",
+      "online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/online",
+      "sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/sustained/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/sustained/result.json
new file mode 100644
index 00000000..951f6b69
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 581.4,
+          "tokens_out": 34896,
+          "tokens_in": 0,
+          "requests_completed": 191,
+          "ttft_ms_p50": 41.3,
+          "ttft_ms_p99": 5888.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 650.0,
+          "tokens_out": 39002,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 40.0,
+          "ttft_ms_p99": 56.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 675.0,
+          "tokens_out": 40500,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 49.7
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 651.6,
+          "tokens_out": 39092,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 39.7,
+          "ttft_ms_p99": 56.5
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 667.4,
+          "tokens_out": 40053,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 58.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.5,
+          "tokens_out": 39637,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 59.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 672.3,
+          "tokens_out": 40332,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.6,
+          "ttft_ms_p99": 57.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.7,
+          "tokens_out": 39336,
+          "tokens_in": 0,
+          "requests_completed": 213,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 55.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 668.6,
+          "tokens_out": 40119,
+          "tokens_in": 0,
+          "requests_completed": 216,
+          "ttft_ms_p50": 40.1,
+          "ttft_ms_p99": 52.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 654.4,
+          "tokens_out": 39267,
+          "tokens_in": 0,
+          "requests_completed": 214,
+          "ttft_ms_p50": 39.8,
+          "ttft_ms_p99": 59.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.6,
+          "tokens_out": 39823,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 656.1,
+          "tokens_out": 39344,
+          "tokens_in": 0,
+          "requests_completed": 212,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 666.9,
+          "tokens_out": 40024,
+          "tokens_in": 0,
+          "requests_completed": 217,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.6
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.7,
+          "tokens_out": 39767,
+          "tokens_in": 0,
+          "requests_completed": 215,
+          "ttft_ms_p50": 39.9,
+          "ttft_ms_p99": 56.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 656.2,
+      "throttle_ratio": 0.861,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -5832.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "09:13:45",
+    "run_id": "8eb86278",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:58:41.478087+00:00",
+    "benchmark_end_time": "2026-05-07T09:13:45.970102+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 117.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/env_info.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/env_info.json
new file mode 100644
index 00000000..dce5e92c
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-05-07T08:41:03.357410+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A800-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7763 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/result.json
new file mode 100644
index 00000000..0d63e281
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/result.json
@@ -0,0 +1,963 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original",
+    "_note": "suite model_id. Each precision level uses its own quantized checkpoint."
+  },
+  "task": {
+    "scenarios_run": [
+      "accuracy",
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "precision_levels_run": [
+      "BF16",
+      "FP8",
+      "W8A8",
+      "W8A16",
+      "W4A16"
+    ],
+    "precision_levels_skipped": [
+      "FP16"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "quantization": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+          "best_throughput_tokens_per_sec": 3860.62,
+          "accuracy_score": 0.55,
+          "accuracy_baseline_delta": -0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 2123.3,
+          "speedup_vs_bf16": 1.0,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3839.22,
+              "throughput_tokens_per_sec_per_chip": 3839.22,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 3837.82,
+              "throughput_tokens_per_sec_per_chip": 3837.82,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 3860.62,
+              "throughput_tokens_per_sec_per_chip": 3860.62,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 3841.02,
+              "throughput_tokens_per_sec_per_chip": 3841.02,
+              "elapsed_seconds_median": 9.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "bf16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": null
+        },
+        {
+          "precision": "W8A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+          "best_throughput_tokens_per_sec": 4024.12,
+          "accuracy_score": 0.59,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 2374.2,
+          "speedup_vs_bf16": 1.042,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 4024.12,
+              "throughput_tokens_per_sec_per_chip": 4024.12,
+              "elapsed_seconds_median": 8.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 4018.97,
+              "throughput_tokens_per_sec_per_chip": 4018.97,
+              "elapsed_seconds_median": 8.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 4015.67,
+              "throughput_tokens_per_sec_per_chip": 4015.67,
+              "elapsed_seconds_median": 8.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 4012.73,
+              "throughput_tokens_per_sec_per_chip": 4012.73,
+              "elapsed_seconds_median": 8.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a16",
+          "effective_dtype": "auto",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W4A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+          "best_throughput_tokens_per_sec": 2227.61,
+          "accuracy_score": 0.57,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 1269.7,
+          "speedup_vs_bf16": 0.577,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 2210.43,
+              "throughput_tokens_per_sec_per_chip": 2210.43,
+              "elapsed_seconds_median": 15.7,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 2147.56,
+              "throughput_tokens_per_sec_per_chip": 2147.56,
+              "elapsed_seconds_median": 16.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 2227.61,
+              "throughput_tokens_per_sec_per_chip": 2227.61,
+              "elapsed_seconds_median": 15.7,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 2158.28,
+              "throughput_tokens_per_sec_per_chip": 2158.28,
+              "elapsed_seconds_median": 16.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w4a16",
+          "effective_dtype": "auto",
+          "quantization_method": "gptq"
+        }
+      ]
+    },
+    "derived": {},
+    "quantization_online": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 40.62,
+              "ttft_ms_p90": 60.95,
+              "ttft_ms_p99": 2890.67,
+              "tpot_ms_p50": 12.83,
+              "tpot_ms_p90": 14.52,
+              "tpot_ms_p99": 16.44,
+              "elapsed_seconds_median": 65.5,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 42.3,
+              "ttft_ms_p90": 58.83,
+              "ttft_ms_p99": 67.63,
+              "tpot_ms_p50": 16.09,
+              "tpot_ms_p90": 18.1,
+              "tpot_ms_p99": 19.77,
+              "elapsed_seconds_median": 32.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 53.31,
+              "ttft_ms_p90": 74.03,
+              "ttft_ms_p99": 87.69,
+              "tpot_ms_p50": 30.28,
+              "tpot_ms_p90": 35.96,
+              "tpot_ms_p99": 41.34,
+              "elapsed_seconds_median": 16.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 53.07,
+              "ttft_ms_p90": 76.66,
+              "ttft_ms_p99": 94.56,
+              "tpot_ms_p50": 37.4,
+              "tpot_ms_p90": 42.77,
+              "tpot_ms_p99": 61.19,
+              "elapsed_seconds_median": 12.2,
+              "sla_met": true
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 34.62,
+              "ttft_ms_p90": 60.74,
+              "ttft_ms_p99": 1546.3,
+              "tpot_ms_p50": 8.35,
+              "tpot_ms_p90": 9.42,
+              "tpot_ms_p99": 11.52,
+              "elapsed_seconds_median": 64.6,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 36.02,
+              "ttft_ms_p90": 52.89,
+              "ttft_ms_p99": 60.18,
+              "tpot_ms_p50": 11.06,
+              "tpot_ms_p90": 12.37,
+              "tpot_ms_p99": 13.91,
+              "elapsed_seconds_median": 31.2,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 53.44,
+              "ttft_ms_p90": 78.16,
+              "ttft_ms_p99": 93.72,
+              "tpot_ms_p50": 30.62,
+              "tpot_ms_p90": 36.67,
+              "tpot_ms_p99": 43.08,
+              "elapsed_seconds_median": 16.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 54.19,
+              "ttft_ms_p90": 81.26,
+              "ttft_ms_p99": 99.23,
+              "tpot_ms_p50": 40.84,
+              "tpot_ms_p90": 47.97,
+              "tpot_ms_p99": 73.29,
+              "elapsed_seconds_median": 12.8,
+              "sla_met": true
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 55.27,
+              "ttft_ms_p90": 99.64,
+              "ttft_ms_p99": 6498.74,
+              "tpot_ms_p50": 22.31,
+              "tpot_ms_p90": 34.92,
+              "tpot_ms_p99": 43.43,
+              "elapsed_seconds_median": 66.4,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 62.97,
+              "ttft_ms_p90": 84.16,
+              "ttft_ms_p99": 98.61,
+              "tpot_ms_p50": 35.03,
+              "tpot_ms_p90": 36.27,
+              "tpot_ms_p99": 39.72,
+              "elapsed_seconds_median": 34.8,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 61.87,
+              "ttft_ms_p90": 95.61,
+              "ttft_ms_p99": 113.42,
+              "tpot_ms_p50": 45.62,
+              "tpot_ms_p90": 50.37,
+              "tpot_ms_p99": 56.81,
+              "elapsed_seconds_median": 19.3,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 58.8,
+              "ttft_ms_p90": 92.82,
+              "ttft_ms_p99": 117.29,
+              "tpot_ms_p50": 49.13,
+              "tpot_ms_p90": 57.9,
+              "tpot_ms_p99": 74.91,
+              "elapsed_seconds_median": 14.9,
+              "sla_met": true
+            }
+          ]
+        }
+      ]
+    },
+    "quantization_sustained": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "sustained_throughput_tokens_per_sec": 656.2,
+          "throttle_ratio": 0.861,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -5832.1,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 581.4,
+              "tokens_out": 34896,
+              "tokens_in": 0,
+              "requests_completed": 191,
+              "ttft_ms_p50": 41.3,
+              "ttft_ms_p99": 5888.1
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 650.0,
+              "tokens_out": 39002,
+              "tokens_in": 0,
+              "requests_completed": 211,
+              "ttft_ms_p50": 40.0,
+              "ttft_ms_p99": 56.9
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 675.0,
+              "tokens_out": 40500,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 49.7
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 651.6,
+              "tokens_out": 39092,
+              "tokens_in": 0,
+              "requests_completed": 216,
+              "ttft_ms_p50": 39.7,
+              "ttft_ms_p99": 56.5
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 667.4,
+              "tokens_out": 40053,
+              "tokens_in": 0,
+              "requests_completed": 216,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 58.4
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 660.5,
+              "tokens_out": 39637,
+              "tokens_in": 0,
+              "requests_completed": 214,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 59.0
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 672.3,
+              "tokens_out": 40332,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.6,
+              "ttft_ms_p99": 57.1
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 655.7,
+              "tokens_out": 39336,
+              "tokens_in": 0,
+              "requests_completed": 213,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 55.2
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 668.6,
+              "tokens_out": 40119,
+              "tokens_in": 0,
+              "requests_completed": 216,
+              "ttft_ms_p50": 40.1,
+              "ttft_ms_p99": 52.8
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 654.4,
+              "tokens_out": 39267,
+              "tokens_in": 0,
+              "requests_completed": 214,
+              "ttft_ms_p50": 39.8,
+              "ttft_ms_p99": 59.7
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 663.6,
+              "tokens_out": 39823,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 56.3
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 656.1,
+              "tokens_out": 39344,
+              "tokens_in": 0,
+              "requests_completed": 212,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 56.0
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 666.9,
+              "tokens_out": 40024,
+              "tokens_in": 0,
+              "requests_completed": 217,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 56.6
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 662.7,
+              "tokens_out": 39767,
+              "tokens_in": 0,
+              "requests_completed": 215,
+              "ttft_ms_p50": 39.9,
+              "ttft_ms_p99": 56.0
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "sustained_throughput_tokens_per_sec": 953.4,
+          "throttle_ratio": 0.919,
+          "throttle_onset_minute": null,
+          "ttft_p99_drift_ms": -2106.5,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 889.3,
+              "tokens_out": 53371,
+              "tokens_in": 0,
+              "requests_completed": 293,
+              "ttft_ms_p50": 34.3,
+              "ttft_ms_p99": 2159.5
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 957.0,
+              "tokens_out": 57429,
+              "tokens_in": 0,
+              "requests_completed": 313,
+              "ttft_ms_p50": 33.7,
+              "ttft_ms_p99": 52.1
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 962.6,
+              "tokens_out": 57761,
+              "tokens_in": 0,
+              "requests_completed": 316,
+              "ttft_ms_p50": 33.6,
+              "ttft_ms_p99": 51.4
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 956.0,
+              "tokens_out": 57366,
+              "tokens_in": 0,
+              "requests_completed": 316,
+              "ttft_ms_p50": 33.7,
+              "ttft_ms_p99": 53.3
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 963.5,
+              "tokens_out": 57774,
+              "tokens_in": 0,
+              "requests_completed": 314,
+              "ttft_ms_p50": 33.5,
+              "ttft_ms_p99": 50.5
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 950.6,
+              "tokens_out": 57041,
+              "tokens_in": 0,
+              "requests_completed": 311,
+              "ttft_ms_p50": 33.6,
+              "ttft_ms_p99": 51.8
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 957.8,
+              "tokens_out": 57474,
+              "tokens_in": 0,
+              "requests_completed": 313,
+              "ttft_ms_p50": 33.9,
+              "ttft_ms_p99": 53.5
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 962.7,
+              "tokens_out": 57767,
+              "tokens_in": 0,
+              "requests_completed": 315,
+              "ttft_ms_p50": 33.6,
+              "ttft_ms_p99": 52.2
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 951.5,
+              "tokens_out": 57098,
+              "tokens_in": 0,
+              "requests_completed": 311,
+              "ttft_ms_p50": 33.7,
+              "ttft_ms_p99": 53.5
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 967.2,
+              "tokens_out": 58021,
+              "tokens_in": 0,
+              "requests_completed": 316,
+              "ttft_ms_p50": 33.6,
+              "ttft_ms_p99": 52.7
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 956.2,
+              "tokens_out": 57378,
+              "tokens_in": 0,
+              "requests_completed": 317,
+              "ttft_ms_p50": 33.6,
+              "ttft_ms_p99": 52.1
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 953.3,
+              "tokens_out": 57190,
+              "tokens_in": 0,
+              "requests_completed": 310,
+              "ttft_ms_p50": 33.5,
+              "ttft_ms_p99": 52.4
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 965.3,
+              "tokens_out": 57931,
+              "tokens_in": 0,
+              "requests_completed": 316,
+              "ttft_ms_p50": 33.7,
+              "ttft_ms_p99": 53.8
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 954.5,
+              "tokens_out": 57268,
+              "tokens_in": 0,
+              "requests_completed": 312,
+              "ttft_ms_p50": 33.6,
+              "ttft_ms_p99": 53.0
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "sustained_throughput_tokens_per_sec": 776.2,
+          "throttle_ratio": 0.828,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -7501.6,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 656.9,
+              "tokens_out": 39423,
+              "tokens_in": 0,
+              "requests_completed": 221,
+              "ttft_ms_p50": 40.5,
+              "ttft_ms_p99": 7553.3
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 777.5,
+              "tokens_out": 46657,
+              "tokens_in": 0,
+              "requests_completed": 263,
+              "ttft_ms_p50": 35.9,
+              "ttft_ms_p99": 51.1
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 782.5,
+              "tokens_out": 46961,
+              "tokens_in": 0,
+              "requests_completed": 264,
+              "ttft_ms_p50": 35.6,
+              "ttft_ms_p99": 51.3
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 793.8,
+              "tokens_out": 47629,
+              "tokens_in": 0,
+              "requests_completed": 263,
+              "ttft_ms_p50": 35.7,
+              "ttft_ms_p99": 51.6
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 785.2,
+              "tokens_out": 47106,
+              "tokens_in": 0,
+              "requests_completed": 266,
+              "ttft_ms_p50": 35.7,
+              "ttft_ms_p99": 53.1
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 782.6,
+              "tokens_out": 46958,
+              "tokens_in": 0,
+              "requests_completed": 265,
+              "ttft_ms_p50": 35.7,
+              "ttft_ms_p99": 52.1
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 784.8,
+              "tokens_out": 47066,
+              "tokens_in": 0,
+              "requests_completed": 260,
+              "ttft_ms_p50": 35.8,
+              "ttft_ms_p99": 51.7
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 793.7,
+              "tokens_out": 47628,
+              "tokens_in": 0,
+              "requests_completed": 266,
+              "ttft_ms_p50": 35.8,
+              "ttft_ms_p99": 53.2
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 779.5,
+              "tokens_out": 46778,
+              "tokens_in": 0,
+              "requests_completed": 262,
+              "ttft_ms_p50": 35.9,
+              "ttft_ms_p99": 54.0
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 781.4,
+              "tokens_out": 46903,
+              "tokens_in": 0,
+              "requests_completed": 263,
+              "ttft_ms_p50": 35.7,
+              "ttft_ms_p99": 52.2
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 790.8,
+              "tokens_out": 47444,
+              "tokens_in": 0,
+              "requests_completed": 267,
+              "ttft_ms_p50": 35.8,
+              "ttft_ms_p99": 53.1
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 788.0,
+              "tokens_out": 47284,
+              "tokens_in": 0,
+              "requests_completed": 262,
+              "ttft_ms_p50": 35.7,
+              "ttft_ms_p99": 53.3
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 782.9,
+              "tokens_out": 46949,
+              "tokens_in": 0,
+              "requests_completed": 265,
+              "ttft_ms_p50": 35.8,
+              "ttft_ms_p99": 53.8
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 787.0,
+              "tokens_out": 47223,
+              "tokens_in": 0,
+              "requests_completed": 264,
+              "ttft_ms_p50": 35.1,
+              "ttft_ms_p99": 51.7
+            }
+          ]
+        }
+      ]
+    }
+  },
+  "accuracy": null,
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:47:27",
+    "run_id": "8eb86278",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:44:56.309394+00:00",
+    "benchmark_end_time": "2026-05-07T08:47:27.583870+00:00",
+    "benchmark_elapsed_minutes": 73.6,
+    "model_load_seconds": 54.4,
+    "benchmark_elapsed_minutes_note": "Sum of per-precision benchmark_elapsed_minutes (excludes sleep gaps and orchestrator overhead).",
+    "scenario_dirs": {
+      "bf16/offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/offline",
+      "bf16/online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/online",
+      "bf16/sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/bf16/sustained",
+      "fp8/offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/fp8/offline",
+      "fp8/online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/fp8/online",
+      "fp8/sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/fp8/sustained",
+      "w8a8/offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a8/offline",
+      "w8a8/online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a8/online",
+      "w8a8/sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a8/sustained",
+      "w8a16/offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/offline",
+      "w8a16/online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/online",
+      "w8a16/sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/sustained",
+      "w4a16/offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/offline",
+      "w4a16/online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/online",
+      "w4a16/sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/sustained"
+    },
+    "precision_dirs": {
+      "BF16": "bf16",
+      "FP8": "fp8",
+      "W8A8": "w8a8",
+      "W8A16": "w8a16",
+      "W4A16": "w4a16"
+    },
+    "precision_model_map": {
+      "BF16": {
+        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+        "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+        "dtype_override": "bfloat16"
+      },
+      "FP8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+        "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "Static per-tensor FP8 (weights + activations). Requires Ampere+ (A100, A800, H20). Skipped automatically on FP16-only hardware."
+      },
+      "W8A8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights + INT8 activations via compressed-tensors. Exercises native int8 tensor cores."
+      },
+      "W8A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+        "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights, FP16 activations. Weight-only quantization — reduces memory bandwidth, not compute dtype."
+      },
+      "W4A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+        "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+        "engine_kwargs": {
+          "quantization": "gptq"
+        },
+        "_note": "INT4 weights, FP16 activations via GPTQ Marlin kernels. Weight-only quantization — larger memory saving than W8A16."
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/accuracy/accuracy.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/accuracy/accuracy.json
new file mode 100644
index 00000000..e2c86fd4
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.57,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W4A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/offline/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/offline/result.json
new file mode 100644
index 00000000..569537fc
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2210.43,
+          "throughput_tokens_per_sec_per_chip": 2210.43,
+          "elapsed_seconds_median": 15.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2147.56,
+          "throughput_tokens_per_sec_per_chip": 2147.56,
+          "elapsed_seconds_median": 16.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2227.61,
+          "throughput_tokens_per_sec_per_chip": 2227.61,
+          "elapsed_seconds_median": 15.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2158.28,
+          "throughput_tokens_per_sec_per_chip": 2158.28,
+          "elapsed_seconds_median": 16.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "10:03:43",
+    "run_id": "b616229d",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_b616229d",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:59:26.530242+00:00",
+    "benchmark_end_time": "2026-05-07T10:03:43.484063+00:00",
+    "benchmark_elapsed_minutes": 4.3,
+    "model_load_seconds": 55.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/online/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/online/result.json
new file mode 100644
index 00000000..81812dec
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 55.27,
+          "ttft_ms_p90": 99.64,
+          "ttft_ms_p99": 6498.74,
+          "tpot_ms_p50": 22.31,
+          "tpot_ms_p90": 34.92,
+          "tpot_ms_p99": 43.43,
+          "elapsed_seconds_median": 66.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 62.97,
+          "ttft_ms_p90": 84.16,
+          "ttft_ms_p99": 98.61,
+          "tpot_ms_p50": 35.03,
+          "tpot_ms_p90": 36.27,
+          "tpot_ms_p99": 39.72,
+          "elapsed_seconds_median": 34.8,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 61.87,
+          "ttft_ms_p90": 95.61,
+          "ttft_ms_p99": 113.42,
+          "tpot_ms_p50": 45.62,
+          "tpot_ms_p90": 50.37,
+          "tpot_ms_p99": 56.81,
+          "elapsed_seconds_median": 19.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 58.8,
+          "ttft_ms_p90": 92.82,
+          "ttft_ms_p99": 117.29,
+          "tpot_ms_p50": 49.13,
+          "tpot_ms_p90": 57.9,
+          "tpot_ms_p99": 74.91,
+          "elapsed_seconds_median": 14.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "10:13:40",
+    "run_id": "b616229d",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_b616229d",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T10:06:55.540921+00:00",
+    "benchmark_end_time": "2026-05-07T10:13:40.174051+00:00",
+    "benchmark_elapsed_minutes": 6.7,
+    "model_load_seconds": 131.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/result.json
new file mode 100644
index 00000000..b2cc4ef6
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2210.43,
+          "throughput_tokens_per_sec_per_chip": 2210.43,
+          "elapsed_seconds_median": 15.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2147.56,
+          "throughput_tokens_per_sec_per_chip": 2147.56,
+          "elapsed_seconds_median": 16.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2227.61,
+          "throughput_tokens_per_sec_per_chip": 2227.61,
+          "elapsed_seconds_median": 15.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2158.28,
+          "throughput_tokens_per_sec_per_chip": 2158.28,
+          "elapsed_seconds_median": 16.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 55.27,
+          "ttft_ms_p90": 99.64,
+          "ttft_ms_p99": 6498.74,
+          "tpot_ms_p50": 22.31,
+          "tpot_ms_p90": 34.92,
+          "tpot_ms_p99": 43.43,
+          "elapsed_seconds_median": 66.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 62.97,
+          "ttft_ms_p90": 84.16,
+          "ttft_ms_p99": 98.61,
+          "tpot_ms_p50": 35.03,
+          "tpot_ms_p90": 36.27,
+          "tpot_ms_p99": 39.72,
+          "elapsed_seconds_median": 34.8,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 61.87,
+          "ttft_ms_p90": 95.61,
+          "ttft_ms_p99": 113.42,
+          "tpot_ms_p50": 45.62,
+          "tpot_ms_p90": 50.37,
+          "tpot_ms_p99": 56.81,
+          "elapsed_seconds_median": 19.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 58.8,
+          "ttft_ms_p90": 92.82,
+          "ttft_ms_p99": 117.29,
+          "tpot_ms_p50": 49.13,
+          "tpot_ms_p90": 57.9,
+          "tpot_ms_p99": 74.91,
+          "elapsed_seconds_median": 14.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 656.9,
+          "tokens_out": 39423,
+          "tokens_in": 0,
+          "requests_completed": 221,
+          "ttft_ms_p50": 40.5,
+          "ttft_ms_p99": 7553.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 777.5,
+          "tokens_out": 46657,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 51.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 782.5,
+          "tokens_out": 46961,
+          "tokens_in": 0,
+          "requests_completed": 264,
+          "ttft_ms_p50": 35.6,
+          "ttft_ms_p99": 51.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 793.8,
+          "tokens_out": 47629,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 51.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 785.2,
+          "tokens_out": 47106,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 53.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 782.6,
+          "tokens_out": 46958,
+          "tokens_in": 0,
+          "requests_completed": 265,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 784.8,
+          "tokens_out": 47066,
+          "tokens_in": 0,
+          "requests_completed": 260,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 51.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 793.7,
+          "tokens_out": 47628,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 53.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 779.5,
+          "tokens_out": 46778,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 781.4,
+          "tokens_out": 46903,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 790.8,
+          "tokens_out": 47444,
+          "tokens_in": 0,
+          "requests_completed": 267,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 53.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 788.0,
+          "tokens_out": 47284,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 53.3
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 782.9,
+          "tokens_out": 46949,
+          "tokens_in": 0,
+          "requests_completed": 265,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 53.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 787.0,
+          "tokens_out": 47223,
+          "tokens_in": 0,
+          "requests_completed": 264,
+          "ttft_ms_p50": 35.1,
+          "ttft_ms_p99": 51.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 776.2,
+      "throttle_ratio": 0.828,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -7501.6
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.57,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W4A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "10:03:43",
+    "run_id": "b616229d",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_b616229d",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:59:26.530242+00:00",
+    "benchmark_end_time": "2026-05-07T10:03:43.484063+00:00",
+    "benchmark_elapsed_minutes": 26.0,
+    "model_load_seconds": 55.4,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/offline",
+      "online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/online",
+      "sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/sustained/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/sustained/result.json
new file mode 100644
index 00000000..9b214d44
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w4a16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 656.9,
+          "tokens_out": 39423,
+          "tokens_in": 0,
+          "requests_completed": 221,
+          "ttft_ms_p50": 40.5,
+          "ttft_ms_p99": 7553.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 777.5,
+          "tokens_out": 46657,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 51.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 782.5,
+          "tokens_out": 46961,
+          "tokens_in": 0,
+          "requests_completed": 264,
+          "ttft_ms_p50": 35.6,
+          "ttft_ms_p99": 51.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 793.8,
+          "tokens_out": 47629,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 51.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 785.2,
+          "tokens_out": 47106,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 53.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 782.6,
+          "tokens_out": 46958,
+          "tokens_in": 0,
+          "requests_completed": 265,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 784.8,
+          "tokens_out": 47066,
+          "tokens_in": 0,
+          "requests_completed": 260,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 51.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 793.7,
+          "tokens_out": 47628,
+          "tokens_in": 0,
+          "requests_completed": 266,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 53.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 779.5,
+          "tokens_out": 46778,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 781.4,
+          "tokens_out": 46903,
+          "tokens_in": 0,
+          "requests_completed": 263,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 790.8,
+          "tokens_out": 47444,
+          "tokens_in": 0,
+          "requests_completed": 267,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 53.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 788.0,
+          "tokens_out": 47284,
+          "tokens_in": 0,
+          "requests_completed": 262,
+          "ttft_ms_p50": 35.7,
+          "ttft_ms_p99": 53.3
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 782.9,
+          "tokens_out": 46949,
+          "tokens_in": 0,
+          "requests_completed": 265,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 53.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 787.0,
+          "tokens_out": 47223,
+          "tokens_in": 0,
+          "requests_completed": 264,
+          "ttft_ms_p50": 35.1,
+          "ttft_ms_p99": 51.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 776.2,
+      "throttle_ratio": 0.828,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -7501.6
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "10:32:02",
+    "run_id": "b616229d",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_b616229d",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T10:17:00.381932+00:00",
+    "benchmark_end_time": "2026-05-07T10:32:02.983113+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 136.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/accuracy/accuracy.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/accuracy/accuracy.json
new file mode 100644
index 00000000..60c0d5fb
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.59,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W8A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/offline/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/offline/result.json
new file mode 100644
index 00000000..251c36d4
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 4024.12,
+          "throughput_tokens_per_sec_per_chip": 4024.12,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 4018.97,
+          "throughput_tokens_per_sec_per_chip": 4018.97,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 4015.67,
+          "throughput_tokens_per_sec_per_chip": 4015.67,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 4012.73,
+          "throughput_tokens_per_sec_per_chip": 4012.73,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "09:28:57",
+    "run_id": "94455a09",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_94455a09",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:26:32.275114+00:00",
+    "benchmark_end_time": "2026-05-07T09:28:57.186231+00:00",
+    "benchmark_elapsed_minutes": 2.4,
+    "model_load_seconds": 42.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/online/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/online/result.json
new file mode 100644
index 00000000..a3ca2f9c
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 34.62,
+          "ttft_ms_p90": 60.74,
+          "ttft_ms_p99": 1546.3,
+          "tpot_ms_p50": 8.35,
+          "tpot_ms_p90": 9.42,
+          "tpot_ms_p99": 11.52,
+          "elapsed_seconds_median": 64.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 36.02,
+          "ttft_ms_p90": 52.89,
+          "ttft_ms_p99": 60.18,
+          "tpot_ms_p50": 11.06,
+          "tpot_ms_p90": 12.37,
+          "tpot_ms_p99": 13.91,
+          "elapsed_seconds_median": 31.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.44,
+          "ttft_ms_p90": 78.16,
+          "ttft_ms_p99": 93.72,
+          "tpot_ms_p50": 30.62,
+          "tpot_ms_p90": 36.67,
+          "tpot_ms_p99": 43.08,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 54.19,
+          "ttft_ms_p90": 81.26,
+          "ttft_ms_p99": 99.23,
+          "tpot_ms_p50": 40.84,
+          "tpot_ms_p90": 47.97,
+          "tpot_ms_p99": 73.29,
+          "elapsed_seconds_median": 12.8,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "09:37:01",
+    "run_id": "94455a09",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_94455a09",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:30:48.092008+00:00",
+    "benchmark_end_time": "2026-05-07T09:37:01.606268+00:00",
+    "benchmark_elapsed_minutes": 6.2,
+    "model_load_seconds": 84.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/result.json
new file mode 100644
index 00000000..6d974465
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 4024.12,
+          "throughput_tokens_per_sec_per_chip": 4024.12,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 4018.97,
+          "throughput_tokens_per_sec_per_chip": 4018.97,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 4015.67,
+          "throughput_tokens_per_sec_per_chip": 4015.67,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 4012.73,
+          "throughput_tokens_per_sec_per_chip": 4012.73,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 34.62,
+          "ttft_ms_p90": 60.74,
+          "ttft_ms_p99": 1546.3,
+          "tpot_ms_p50": 8.35,
+          "tpot_ms_p90": 9.42,
+          "tpot_ms_p99": 11.52,
+          "elapsed_seconds_median": 64.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 36.02,
+          "ttft_ms_p90": 52.89,
+          "ttft_ms_p99": 60.18,
+          "tpot_ms_p50": 11.06,
+          "tpot_ms_p90": 12.37,
+          "tpot_ms_p99": 13.91,
+          "elapsed_seconds_median": 31.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 53.44,
+          "ttft_ms_p90": 78.16,
+          "ttft_ms_p99": 93.72,
+          "tpot_ms_p50": 30.62,
+          "tpot_ms_p90": 36.67,
+          "tpot_ms_p99": 43.08,
+          "elapsed_seconds_median": 16.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 54.19,
+          "ttft_ms_p90": 81.26,
+          "ttft_ms_p99": 99.23,
+          "tpot_ms_p50": 40.84,
+          "tpot_ms_p90": 47.97,
+          "tpot_ms_p99": 73.29,
+          "elapsed_seconds_median": 12.8,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 889.3,
+          "tokens_out": 53371,
+          "tokens_in": 0,
+          "requests_completed": 293,
+          "ttft_ms_p50": 34.3,
+          "ttft_ms_p99": 2159.5
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 957.0,
+          "tokens_out": 57429,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 962.6,
+          "tokens_out": 57761,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 51.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.0,
+          "tokens_out": 57366,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 53.3
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 963.5,
+          "tokens_out": 57774,
+          "tokens_in": 0,
+          "requests_completed": 314,
+          "ttft_ms_p50": 33.5,
+          "ttft_ms_p99": 50.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 950.6,
+          "tokens_out": 57041,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 957.8,
+          "tokens_out": 57474,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.9,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 962.7,
+          "tokens_out": 57767,
+          "tokens_in": 0,
+          "requests_completed": 315,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 951.5,
+          "tokens_out": 57098,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 967.2,
+          "tokens_out": 58021,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 52.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.2,
+          "tokens_out": 57378,
+          "tokens_in": 0,
+          "requests_completed": 317,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 953.3,
+          "tokens_out": 57190,
+          "tokens_in": 0,
+          "requests_completed": 310,
+          "ttft_ms_p50": 33.5,
+          "ttft_ms_p99": 52.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 965.3,
+          "tokens_out": 57931,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 53.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 954.5,
+          "tokens_out": 57268,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 53.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 953.4,
+      "throttle_ratio": 0.919,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2106.5
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.59,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W8A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "09:28:57",
+    "run_id": "94455a09",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_94455a09",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:26:32.275114+00:00",
+    "benchmark_end_time": "2026-05-07T09:28:57.186231+00:00",
+    "benchmark_elapsed_minutes": 23.7,
+    "model_load_seconds": 42.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/offline",
+      "online": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/online",
+      "sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/sustained/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/sustained/result.json
new file mode 100644
index 00000000..78ca4db7
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_8eb86278/w8a16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T08:41:03.357410+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 889.3,
+          "tokens_out": 53371,
+          "tokens_in": 0,
+          "requests_completed": 293,
+          "ttft_ms_p50": 34.3,
+          "ttft_ms_p99": 2159.5
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 957.0,
+          "tokens_out": 57429,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 962.6,
+          "tokens_out": 57761,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 51.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.0,
+          "tokens_out": 57366,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 53.3
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 963.5,
+          "tokens_out": 57774,
+          "tokens_in": 0,
+          "requests_completed": 314,
+          "ttft_ms_p50": 33.5,
+          "ttft_ms_p99": 50.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 950.6,
+          "tokens_out": 57041,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 957.8,
+          "tokens_out": 57474,
+          "tokens_in": 0,
+          "requests_completed": 313,
+          "ttft_ms_p50": 33.9,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 962.7,
+          "tokens_out": 57767,
+          "tokens_in": 0,
+          "requests_completed": 315,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 951.5,
+          "tokens_out": 57098,
+          "tokens_in": 0,
+          "requests_completed": 311,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 967.2,
+          "tokens_out": 58021,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 52.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 956.2,
+          "tokens_out": 57378,
+          "tokens_in": 0,
+          "requests_completed": 317,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 953.3,
+          "tokens_out": 57190,
+          "tokens_in": 0,
+          "requests_completed": 310,
+          "ttft_ms_p50": 33.5,
+          "ttft_ms_p99": 52.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 965.3,
+          "tokens_out": 57931,
+          "tokens_in": 0,
+          "requests_completed": 316,
+          "ttft_ms_p50": 33.7,
+          "ttft_ms_p99": 53.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 954.5,
+          "tokens_out": 57268,
+          "tokens_in": 0,
+          "requests_completed": 312,
+          "ttft_ms_p50": 33.6,
+          "ttft_ms_p99": 53.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 953.4,
+      "throttle_ratio": 0.919,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2106.5
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "09:54:07",
+    "run_id": "94455a09",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_C_nvidia_sglang_c43a8309_94455a09",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:39:03.943887+00:00",
+    "benchmark_end_time": "2026-05-07T09:54:07.201192+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 97.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/accuracy/accuracy.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/accuracy/accuracy.json
new file mode 100644
index 00000000..37f4d824
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/env_info.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/env_info.json
new file mode 100644
index 00000000..da48eb3c
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-05-07T10:52:35.716348+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A800-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7763 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/interactive/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/interactive/result.json
new file mode 100644
index 00000000..85d7c9c6
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T10:52:35.716348+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 2930.57,
+      "ttft_ms_p90": 3048.65,
+      "ttft_ms_p99": 3114.45,
+      "tpot_ms_p50": 13.29,
+      "tpot_ms_p90": 13.34,
+      "tpot_ms_p99": 13.37,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 593.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "11:37:20",
+    "run_id": "1992bcc0",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T11:17:34.002807+00:00",
+    "benchmark_end_time": "2026-05-07T11:37:20.392956+00:00",
+    "benchmark_elapsed_minutes": 19.8,
+    "model_load_seconds": 122.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/offline/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/offline/result.json
new file mode 100644
index 00000000..493f3ee0
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/offline/result.json
@@ -0,0 +1,154 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T10:52:35.716348+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 69.83,
+          "throughput_tokens_per_sec_per_chip": 69.83,
+          "elapsed_seconds_median": 184.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 69.8,
+          "throughput_tokens_per_sec_per_chip": 69.8,
+          "elapsed_seconds_median": 184.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "11:14:30",
+    "run_id": "1992bcc0",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T10:56:04.137696+00:00",
+    "benchmark_end_time": "2026-05-07T11:14:30.254228+00:00",
+    "benchmark_elapsed_minutes": 18.4,
+    "model_load_seconds": 48.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/online/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/online/result.json
new file mode 100644
index 00000000..21481fa6
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/online/result.json
@@ -0,0 +1,168 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T10:52:35.716348+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 81189.61,
+          "ttft_ms_p90": 150755.95,
+          "ttft_ms_p99": 166489.39,
+          "tpot_ms_p50": 128.63,
+          "tpot_ms_p90": 211.13,
+          "tpot_ms_p99": 227.02,
+          "elapsed_seconds_median": 360.8,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 116802.47,
+          "ttft_ms_p90": 210291.74,
+          "ttft_ms_p99": 238549.48,
+          "tpot_ms_p50": 128.73,
+          "tpot_ms_p90": 211.59,
+          "tpot_ms_p99": 227.45,
+          "elapsed_seconds_median": 359.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 151139.12,
+          "ttft_ms_p90": 267788.55,
+          "ttft_ms_p99": 299162.46,
+          "tpot_ms_p50": 128.7,
+          "tpot_ms_p90": 211.15,
+          "tpot_ms_p99": 226.99,
+          "elapsed_seconds_median": 356.5,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "12:48:18",
+    "run_id": "1992bcc0",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T12:12:24.408117+00:00",
+    "benchmark_end_time": "2026-05-07T12:48:18.444325+00:00",
+    "benchmark_elapsed_minutes": 35.9,
+    "model_load_seconds": 62.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/result.json
new file mode 100644
index 00000000..19795a21
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/result.json
@@ -0,0 +1,551 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T10:52:35.716348+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "interactive",
+      "sustained",
+      "online",
+      "speculative"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 2,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 69.83,
+          "throughput_tokens_per_sec_per_chip": 69.83,
+          "elapsed_seconds_median": 184.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 69.8,
+          "throughput_tokens_per_sec_per_chip": 69.8,
+          "elapsed_seconds_median": 184.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 2930.57,
+      "ttft_ms_p90": 3048.65,
+      "ttft_ms_p99": 3114.45,
+      "tpot_ms_p50": 13.29,
+      "tpot_ms_p90": 13.34,
+      "tpot_ms_p99": 13.37,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 593.1
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 30.0,
+          "tokens_out": 1800,
+          "tokens_in": 0,
+          "requests_completed": 8,
+          "ttft_ms_p50": 18025.2,
+          "ttft_ms_p99": 27610.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13000.0,
+          "ttft_ms_p99": 23096.7
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12720.3,
+          "ttft_ms_p99": 22496.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 89.5,
+          "tokens_out": 5400,
+          "tokens_in": 0,
+          "requests_completed": 24,
+          "ttft_ms_p50": 12766.5,
+          "ttft_ms_p99": 22799.3
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.3,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13037.1,
+          "ttft_ms_p99": 23097.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12530.2,
+          "ttft_ms_p99": 22328.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12713.3,
+          "ttft_ms_p99": 22593.6
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12988.3,
+          "ttft_ms_p99": 23269.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12533.6,
+          "ttft_ms_p99": 22729.7
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12410.6,
+          "ttft_ms_p99": 22526.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12997.3,
+          "ttft_ms_p99": 23090.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12444.4,
+          "ttft_ms_p99": 22530.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12602.2,
+          "ttft_ms_p99": 22621.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12976.9,
+          "ttft_ms_p99": 23039.4
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12667.5,
+          "ttft_ms_p99": 22389.1
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12697.4,
+          "ttft_ms_p99": 22485.3
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12990.7,
+          "ttft_ms_p99": 23079.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12545.7,
+          "ttft_ms_p99": 22560.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12772.7,
+          "ttft_ms_p99": 22571.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12985.6,
+          "ttft_ms_p99": 23236.6
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12553.3,
+          "ttft_ms_p99": 22583.1
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12485.0,
+          "ttft_ms_p99": 22745.9
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12820.4,
+          "ttft_ms_p99": 23010.5
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12564.8,
+          "ttft_ms_p99": 22787.1
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 89.5,
+          "tokens_out": 5400,
+          "tokens_in": 0,
+          "requests_completed": 24,
+          "ttft_ms_p50": 12760.2,
+          "ttft_ms_p99": 23006.6
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.1,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13106.9,
+          "ttft_ms_p99": 23269.8
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.3,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12823.8,
+          "ttft_ms_p99": 22659.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12859.9,
+          "ttft_ms_p99": 22780.9
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13104.1,
+          "ttft_ms_p99": 23273.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 62.1,
+      "throttle_ratio": 0.67,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": 176.4
+    },
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 81189.61,
+          "ttft_ms_p90": 150755.95,
+          "ttft_ms_p99": 166489.39,
+          "tpot_ms_p50": 128.63,
+          "tpot_ms_p90": 211.13,
+          "tpot_ms_p99": 227.02,
+          "elapsed_seconds_median": 360.8,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 116802.47,
+          "ttft_ms_p90": 210291.74,
+          "ttft_ms_p99": 238549.48,
+          "tpot_ms_p50": 128.73,
+          "tpot_ms_p90": 211.59,
+          "tpot_ms_p99": 227.45,
+          "elapsed_seconds_median": 359.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 151139.12,
+          "ttft_ms_p90": 267788.55,
+          "ttft_ms_p99": 299162.46,
+          "tpot_ms_p50": 128.7,
+          "tpot_ms_p90": 211.15,
+          "tpot_ms_p99": 226.99,
+          "elapsed_seconds_median": 356.5,
+          "sla_met": false
+        }
+      ]
+    },
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 45.11,
+          "throughput_tokens_per_sec_per_chip": 45.11,
+          "elapsed_seconds_median": 284.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 45.11,
+          "throughput_tokens_per_sec_per_chip": 45.11,
+          "elapsed_seconds_median": 284.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "11:14:30",
+    "run_id": "1992bcc0",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T10:56:04.137696+00:00",
+    "benchmark_end_time": "2026-05-07T11:14:30.254228+00:00",
+    "benchmark_elapsed_minutes": 133.5,
+    "model_load_seconds": 48.4,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'interactive', 'sustained', 'online', 'speculative'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/offline",
+      "interactive": "results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/interactive",
+      "sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/sustained",
+      "online": "results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/online",
+      "speculative": "results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/speculative"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/speculative/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/speculative/result.json
new file mode 100644
index 00000000..d6108caf
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/speculative/result.json
@@ -0,0 +1,154 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T10:52:35.716348+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "speculative",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 45.11,
+          "throughput_tokens_per_sec_per_chip": 45.11,
+          "elapsed_seconds_median": 284.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 45.11,
+          "throughput_tokens_per_sec_per_chip": 45.11,
+          "elapsed_seconds_median": 284.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "13:20:08",
+    "run_id": "1992bcc0",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T12:51:36.084339+00:00",
+    "benchmark_end_time": "2026-05-07T13:20:08.289202+00:00",
+    "benchmark_elapsed_minutes": 28.5,
+    "model_load_seconds": 136.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/sustained/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/sustained/result.json
new file mode 100644
index 00000000..690f3a3b
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0/sustained/result.json
@@ -0,0 +1,428 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T10:52:35.716348+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tPXB\tNODE\t64-127,192-254\t1\t\tN/A\nNIC0\tSYS\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tPXB\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tNODE\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 30.0,
+          "tokens_out": 1800,
+          "tokens_in": 0,
+          "requests_completed": 8,
+          "ttft_ms_p50": 18025.2,
+          "ttft_ms_p99": 27610.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13000.0,
+          "ttft_ms_p99": 23096.7
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12720.3,
+          "ttft_ms_p99": 22496.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 89.5,
+          "tokens_out": 5400,
+          "tokens_in": 0,
+          "requests_completed": 24,
+          "ttft_ms_p50": 12766.5,
+          "ttft_ms_p99": 22799.3
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.3,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13037.1,
+          "ttft_ms_p99": 23097.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12530.2,
+          "ttft_ms_p99": 22328.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12713.3,
+          "ttft_ms_p99": 22593.6
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12988.3,
+          "ttft_ms_p99": 23269.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12533.6,
+          "ttft_ms_p99": 22729.7
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12410.6,
+          "ttft_ms_p99": 22526.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12997.3,
+          "ttft_ms_p99": 23090.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12444.4,
+          "ttft_ms_p99": 22530.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12602.2,
+          "ttft_ms_p99": 22621.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12976.9,
+          "ttft_ms_p99": 23039.4
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12667.5,
+          "ttft_ms_p99": 22389.1
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12697.4,
+          "ttft_ms_p99": 22485.3
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12990.7,
+          "ttft_ms_p99": 23079.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12545.7,
+          "ttft_ms_p99": 22560.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12772.7,
+          "ttft_ms_p99": 22571.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12985.6,
+          "ttft_ms_p99": 23236.6
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12553.3,
+          "ttft_ms_p99": 22583.1
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12485.0,
+          "ttft_ms_p99": 22745.9
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12820.4,
+          "ttft_ms_p99": 23010.5
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12564.8,
+          "ttft_ms_p99": 22787.1
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 89.5,
+          "tokens_out": 5400,
+          "tokens_in": 0,
+          "requests_completed": 24,
+          "ttft_ms_p50": 12760.2,
+          "ttft_ms_p99": 23006.6
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.1,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13106.9,
+          "ttft_ms_p99": 23269.8
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.3,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12823.8,
+          "ttft_ms_p99": 22659.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 12859.9,
+          "ttft_ms_p99": 22780.9
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 60.0,
+          "tokens_out": 3600,
+          "tokens_in": 0,
+          "requests_completed": 16,
+          "ttft_ms_p50": 13104.1,
+          "ttft_ms_p99": 23273.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 62.1,
+      "throttle_ratio": 0.67,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": 176.4
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "12:10:57",
+    "run_id": "1992bcc0",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_D_nvidia_sglang_c43a8309_1992bcc0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T11:40:03.456694+00:00",
+    "benchmark_end_time": "2026-05-07T12:10:57.486926+00:00",
+    "benchmark_elapsed_minutes": 30.9,
+    "model_load_seconds": 106.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/accuracy/accuracy.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/accuracy/accuracy.json
new file mode 100644
index 00000000..c35ff064
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.41,
+  "baseline_delta": 0.03,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/env_info.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/env_info.json
new file mode 100644
index 00000000..6f79d2a5
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-05-08T06:01:51.625025+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A800-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tNODE\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tNODE\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7763 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/interactive/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/interactive/result.json
new file mode 100644
index 00000000..d67c576f
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-08T06:01:51.625025+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tNODE\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tNODE\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 16.88,
+      "ttft_ms_p90": 17.77,
+      "ttft_ms_p99": 19.3,
+      "tpot_ms_p50": 1.75,
+      "tpot_ms_p90": 1.77,
+      "tpot_ms_p99": 1.84,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 52.7
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "06:12:29",
+    "run_id": "088cfa14",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-08T06:09:51.154286+00:00",
+    "benchmark_end_time": "2026-05-08T06:12:29.847870+00:00",
+    "benchmark_elapsed_minutes": 2.6,
+    "model_load_seconds": 32.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/offline/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/offline/result.json
new file mode 100644
index 00000000..62b6ca4e
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/offline/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-08T06:01:51.625025+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tNODE\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tNODE\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 36706.23,
+          "throughput_tokens_per_sec_per_chip": 36706.23,
+          "elapsed_seconds_median": 1.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 36713.29,
+          "throughput_tokens_per_sec_per_chip": 36713.29,
+          "elapsed_seconds_median": 1.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 36252.6,
+          "throughput_tokens_per_sec_per_chip": 36252.6,
+          "elapsed_seconds_median": 1.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "06:05:58",
+    "run_id": "088cfa14",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-08T06:05:43.054956+00:00",
+    "benchmark_end_time": "2026-05-08T06:05:58.915362+00:00",
+    "benchmark_elapsed_minutes": 0.3,
+    "model_load_seconds": 40.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/online/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/online/result.json
new file mode 100644
index 00000000..f25540d1
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/online/result.json
@@ -0,0 +1,156 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-08T06:01:51.625025+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tNODE\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tNODE\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 19.18,
+          "ttft_ms_p90": 31.91,
+          "ttft_ms_p99": 1454.8,
+          "tpot_ms_p50": 2.42,
+          "tpot_ms_p90": 2.82,
+          "tpot_ms_p99": 3.64,
+          "elapsed_seconds_median": 31.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 28.84,
+          "ttft_ms_p90": 38.18,
+          "ttft_ms_p99": 45.64,
+          "tpot_ms_p50": 4.62,
+          "tpot_ms_p90": 5.66,
+          "tpot_ms_p99": 10.56,
+          "elapsed_seconds_median": 7.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "06:08:54",
+    "run_id": "088cfa14",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-08T06:06:56.193536+00:00",
+    "benchmark_end_time": "2026-05-08T06:08:54.919553+00:00",
+    "benchmark_elapsed_minutes": 2.0,
+    "model_load_seconds": 32.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/result.json
new file mode 100644
index 00000000..45b66d4e
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/result.json
@@ -0,0 +1,371 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-08T06:01:51.625025+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tNODE\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tNODE\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 36706.23,
+          "throughput_tokens_per_sec_per_chip": 36706.23,
+          "elapsed_seconds_median": 1.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 36713.29,
+          "throughput_tokens_per_sec_per_chip": 36713.29,
+          "elapsed_seconds_median": 1.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 36252.6,
+          "throughput_tokens_per_sec_per_chip": 36252.6,
+          "elapsed_seconds_median": 1.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 19.18,
+          "ttft_ms_p90": 31.91,
+          "ttft_ms_p99": 1454.8,
+          "tpot_ms_p50": 2.42,
+          "tpot_ms_p90": 2.82,
+          "tpot_ms_p99": 3.64,
+          "elapsed_seconds_median": 31.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 28.84,
+          "ttft_ms_p90": 38.18,
+          "ttft_ms_p99": 45.64,
+          "tpot_ms_p50": 4.62,
+          "tpot_ms_p90": 5.66,
+          "tpot_ms_p99": 10.56,
+          "elapsed_seconds_median": 7.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 16.88,
+      "ttft_ms_p90": 17.77,
+      "ttft_ms_p99": 19.3,
+      "tpot_ms_p50": 1.75,
+      "tpot_ms_p90": 1.77,
+      "tpot_ms_p99": 1.84,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 52.7
+    },
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6876.7,
+          "tokens_out": 412634,
+          "tokens_in": 0,
+          "requests_completed": 2224,
+          "ttft_ms_p50": 24.7,
+          "ttft_ms_p99": 3802.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7317.5,
+          "tokens_out": 439021,
+          "tokens_in": 0,
+          "requests_completed": 2366,
+          "ttft_ms_p50": 21.2,
+          "ttft_ms_p99": 43.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7381.5,
+          "tokens_out": 443011,
+          "tokens_in": 0,
+          "requests_completed": 2380,
+          "ttft_ms_p50": 21.1,
+          "ttft_ms_p99": 44.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7335.8,
+          "tokens_out": 440208,
+          "tokens_in": 0,
+          "requests_completed": 2361,
+          "ttft_ms_p50": 21.5,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7359.0,
+          "tokens_out": 441625,
+          "tokens_in": 0,
+          "requests_completed": 2373,
+          "ttft_ms_p50": 21.4,
+          "ttft_ms_p99": 44.6
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7434.2,
+          "tokens_out": 445978,
+          "tokens_in": 0,
+          "requests_completed": 2394,
+          "ttft_ms_p50": 22.9,
+          "ttft_ms_p99": 45.2
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7321.9,
+          "tokens_out": 439358,
+          "tokens_in": 0,
+          "requests_completed": 2367,
+          "ttft_ms_p50": 21.5,
+          "ttft_ms_p99": 45.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7325.9,
+          "tokens_out": 439445,
+          "tokens_in": 0,
+          "requests_completed": 2360,
+          "ttft_ms_p50": 21.6,
+          "ttft_ms_p99": 45.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7424.4,
+          "tokens_out": 445514,
+          "tokens_in": 0,
+          "requests_completed": 2390,
+          "ttft_ms_p50": 21.4,
+          "ttft_ms_p99": 45.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7356.4,
+          "tokens_out": 441402,
+          "tokens_in": 0,
+          "requests_completed": 2367,
+          "ttft_ms_p50": 21.5,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7302.7,
+          "tokens_out": 438070,
+          "tokens_in": 0,
+          "requests_completed": 2353,
+          "ttft_ms_p50": 21.0,
+          "ttft_ms_p99": 44.8
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7383.8,
+          "tokens_out": 443102,
+          "tokens_in": 0,
+          "requests_completed": 2384,
+          "ttft_ms_p50": 22.1,
+          "ttft_ms_p99": 45.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7300.9,
+          "tokens_out": 438169,
+          "tokens_in": 0,
+          "requests_completed": 2355,
+          "ttft_ms_p50": 21.3,
+          "ttft_ms_p99": 45.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7405.6,
+          "tokens_out": 444160,
+          "tokens_in": 0,
+          "requests_completed": 2392,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 46.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 7323.3,
+      "throttle_ratio": 0.925,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -3755.5
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.41,
+    "baseline_delta": 0.03,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "06:05:58",
+    "run_id": "088cfa14",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-08T06:05:43.054956+00:00",
+    "benchmark_end_time": "2026-05-08T06:05:58.915362+00:00",
+    "benchmark_elapsed_minutes": 19.9,
+    "model_load_seconds": 40.2,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/offline",
+      "online": "results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/online",
+      "interactive": "results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/interactive",
+      "sustained": "results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/sustained/result.json b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/sustained/result.json
new file mode 100644
index 00000000..87c9ef54
--- /dev/null
+++ b/results/community/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA A800-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-08T06:01:51.625025+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A800-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tNODE\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tNODE\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tNODE\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7763 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6876.7,
+          "tokens_out": 412634,
+          "tokens_in": 0,
+          "requests_completed": 2224,
+          "ttft_ms_p50": 24.7,
+          "ttft_ms_p99": 3802.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7317.5,
+          "tokens_out": 439021,
+          "tokens_in": 0,
+          "requests_completed": 2366,
+          "ttft_ms_p50": 21.2,
+          "ttft_ms_p99": 43.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7381.5,
+          "tokens_out": 443011,
+          "tokens_in": 0,
+          "requests_completed": 2380,
+          "ttft_ms_p50": 21.1,
+          "ttft_ms_p99": 44.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7335.8,
+          "tokens_out": 440208,
+          "tokens_in": 0,
+          "requests_completed": 2361,
+          "ttft_ms_p50": 21.5,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7359.0,
+          "tokens_out": 441625,
+          "tokens_in": 0,
+          "requests_completed": 2373,
+          "ttft_ms_p50": 21.4,
+          "ttft_ms_p99": 44.6
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7434.2,
+          "tokens_out": 445978,
+          "tokens_in": 0,
+          "requests_completed": 2394,
+          "ttft_ms_p50": 22.9,
+          "ttft_ms_p99": 45.2
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7321.9,
+          "tokens_out": 439358,
+          "tokens_in": 0,
+          "requests_completed": 2367,
+          "ttft_ms_p50": 21.5,
+          "ttft_ms_p99": 45.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7325.9,
+          "tokens_out": 439445,
+          "tokens_in": 0,
+          "requests_completed": 2360,
+          "ttft_ms_p50": 21.6,
+          "ttft_ms_p99": 45.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7424.4,
+          "tokens_out": 445514,
+          "tokens_in": 0,
+          "requests_completed": 2390,
+          "ttft_ms_p50": 21.4,
+          "ttft_ms_p99": 45.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7356.4,
+          "tokens_out": 441402,
+          "tokens_in": 0,
+          "requests_completed": 2367,
+          "ttft_ms_p50": 21.5,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7302.7,
+          "tokens_out": 438070,
+          "tokens_in": 0,
+          "requests_completed": 2353,
+          "ttft_ms_p50": 21.0,
+          "ttft_ms_p99": 44.8
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7383.8,
+          "tokens_out": 443102,
+          "tokens_in": 0,
+          "requests_completed": 2384,
+          "ttft_ms_p50": 22.1,
+          "ttft_ms_p99": 45.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7300.9,
+          "tokens_out": 438169,
+          "tokens_in": 0,
+          "requests_completed": 2355,
+          "ttft_ms_p50": 21.3,
+          "ttft_ms_p99": 45.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 7405.6,
+          "tokens_out": 444160,
+          "tokens_in": 0,
+          "requests_completed": 2392,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 46.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 7323.3,
+      "throttle_ratio": 0.925,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -3755.5
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-08",
+    "time": "06:28:51",
+    "run_id": "088cfa14",
+    "run_name": "nvidia_a800_sxm4_80gbx1_suite_F_nvidia_sglang_c43a8309_088cfa14",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-08T06:13:50.418242+00:00",
+    "benchmark_end_time": "2026-05-08T06:28:51.771182+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 51.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/accuracy/accuracy.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/accuracy/accuracy.json
new file mode 100644
index 00000000..c700e987
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.62,
+  "baseline_delta": 0.02,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/burst/result.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/burst/result.json
new file mode 100644
index 00000000..0693ae77
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/burst/result.json
@@ -0,0 +1,161 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T10:08:28.757340+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "burst",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 14.03,
+      "steady_ttft_p99_ms": 26.7,
+      "burst_ttft_p50_ms": 15.6,
+      "burst_ttft_p99_ms": 22.99,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 0.861,
+      "recovery_time_seconds": 1.0,
+      "recovery_time_seconds_per_cycle": [
+        1.54,
+        0.47
+      ],
+      "_recovery_definition": "Median seconds within the post-burst steady window before rolling TTFT p99 drops below 1.5x the long-term steady baseline. Lower is better; None means it never recovered within the window.",
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 33.62,
+          "burst_ttft_p99_ms": 22.98
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 19.28,
+          "burst_ttft_p99_ms": 22.86
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 19.65,
+          "burst_ttft_p99_ms": 23.15
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "12:15:49",
+    "run_id": "2c345026",
+    "run_name": "nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T12:07:57.193442+00:00",
+    "benchmark_end_time": "2026-05-22T12:15:49.102951+00:00",
+    "benchmark_elapsed_minutes": 7.9,
+    "model_load_seconds": 14.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/env_info.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/env_info.json
new file mode 100644
index 00000000..0e3dfd10
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/env_info.json
@@ -0,0 +1,44 @@
+{
+  "collected_at": "2026-05-22T10:08:28.757340+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA B200",
+      "vendor": "NVIDIA",
+      "memory_gb": 179.1,
+      "driver_version": "595.71.05",
+      "firmware_version": null,
+      "compute_capability": "10.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "Intel(R) Xeon(R) Platinum 8559C",
+    "physical_cores": 96,
+    "logical_cores": 192,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1996.0,
+  "pcie_generation": "PCIe Gen 5",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.5 LTS",
+  "python_version": "3.12.13",
+  "kernel_version": "6.17.0-1013-aws",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/interactive/result.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/interactive/result.json
new file mode 100644
index 00000000..d230dd40
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/interactive/result.json
@@ -0,0 +1,139 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T10:08:28.757340+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 10.26,
+      "ttft_ms_p90": 16.13,
+      "ttft_ms_p99": 24.23,
+      "tpot_ms_p50": 3.45,
+      "tpot_ms_p90": 3.45,
+      "tpot_ms_p99": 3.49,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 182.7,
+      "ttft_ms_p99_reliability": {
+        "n": 3,
+        "mean": 19.4,
+        "std": 13.18,
+        "cv_pct": 67.91,
+        "stability": "high-variance",
+        "runs": [
+          34.58,
+          12.75,
+          10.88
+        ]
+      }
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:36:27",
+    "run_id": "2c345026",
+    "run_name": "nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:27:18.873868+00:00",
+    "benchmark_end_time": "2026-05-22T11:36:27.754777+00:00",
+    "benchmark_elapsed_minutes": 9.1,
+    "model_load_seconds": 14.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/offline/result.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/offline/result.json
new file mode 100644
index 00000000..7068ab41
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/offline/result.json
@@ -0,0 +1,196 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T10:08:28.757340+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 14604.36,
+          "throughput_tokens_per_sec_per_chip": 14604.36,
+          "throughput_tokens_per_sec_total": 25459.53,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14634.97,
+            "std": 124.14,
+            "cv_pct": 0.85,
+            "stability": "stable",
+            "runs": [
+              14529.0,
+              14604.36,
+              14771.55
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 14716.67,
+          "throughput_tokens_per_sec_per_chip": 14716.67,
+          "throughput_tokens_per_sec_total": 25633.8,
+          "elapsed_seconds_median": 2.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14720.92,
+            "std": 25.18,
+            "cv_pct": 0.17,
+            "stability": "stable",
+            "runs": [
+              14716.67,
+              14698.13,
+              14747.96
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 14609.48,
+          "throughput_tokens_per_sec_per_chip": 14609.48,
+          "throughput_tokens_per_sec_total": 25506.35,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14662.48,
+            "std": 95.33,
+            "cv_pct": 0.65,
+            "stability": "stable",
+            "runs": [
+              14605.42,
+              14609.48,
+              14772.53
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "10:20:27",
+    "run_id": "2c345026",
+    "run_name": "nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T10:19:58.812205+00:00",
+    "benchmark_end_time": "2026-05-22T10:20:27.746312+00:00",
+    "benchmark_elapsed_minutes": 0.5,
+    "model_load_seconds": 22.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/online/result.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/online/result.json
new file mode 100644
index 00000000..958a5578
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/online/result.json
@@ -0,0 +1,195 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T10:08:28.757340+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 14.4,
+          "ttft_ms_p90": 21.6,
+          "ttft_ms_p99": 32.36,
+          "tpot_ms_p50": 3.63,
+          "tpot_ms_p90": 3.73,
+          "tpot_ms_p99": 3.86,
+          "elapsed_seconds_median": 64.3,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 25.5,
+            "std": 9.68,
+            "cv_pct": 37.95,
+            "stability": "high-variance",
+            "runs": [
+              36.67,
+              19.85,
+              19.97
+            ]
+          }
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 15.89,
+          "ttft_ms_p90": 20.5,
+          "ttft_ms_p99": 23.82,
+          "tpot_ms_p50": 4.28,
+          "tpot_ms_p90": 4.42,
+          "tpot_ms_p99": 4.58,
+          "elapsed_seconds_median": 13.6,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 23.61,
+            "std": 0.42,
+            "cv_pct": 1.8,
+            "stability": "stable",
+            "runs": [
+              23.88,
+              23.12,
+              23.82
+            ]
+          }
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 28.96,
+          "ttft_ms_p90": 1153.67,
+          "ttft_ms_p99": 1319.26,
+          "tpot_ms_p50": 5.78,
+          "tpot_ms_p90": 6.18,
+          "tpot_ms_p99": 7.6,
+          "elapsed_seconds_median": 6.5,
+          "sla_met": false,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 1276.65,
+            "std": 70.77,
+            "cv_pct": 5.54,
+            "stability": "noisy",
+            "runs": [
+              1196.4,
+              1303.42,
+              1330.13
+            ]
+          }
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "10:25:43",
+    "run_id": "2c345026",
+    "run_name": "nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T10:21:19.944643+00:00",
+    "benchmark_end_time": "2026-05-22T10:25:43.914737+00:00",
+    "benchmark_elapsed_minutes": 4.4,
+    "model_load_seconds": 32.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/result.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/result.json
new file mode 100644
index 00000000..b1e1251f
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/result.json
@@ -0,0 +1,694 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T10:08:28.757340+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained",
+      "burst"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 14604.36,
+          "throughput_tokens_per_sec_per_chip": 14604.36,
+          "throughput_tokens_per_sec_total": 25459.53,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14634.97,
+            "std": 124.14,
+            "cv_pct": 0.85,
+            "stability": "stable",
+            "runs": [
+              14529.0,
+              14604.36,
+              14771.55
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 14716.67,
+          "throughput_tokens_per_sec_per_chip": 14716.67,
+          "throughput_tokens_per_sec_total": 25633.8,
+          "elapsed_seconds_median": 2.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14720.92,
+            "std": 25.18,
+            "cv_pct": 0.17,
+            "stability": "stable",
+            "runs": [
+              14716.67,
+              14698.13,
+              14747.96
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 14609.48,
+          "throughput_tokens_per_sec_per_chip": 14609.48,
+          "throughput_tokens_per_sec_total": 25506.35,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14662.48,
+            "std": 95.33,
+            "cv_pct": 0.65,
+            "stability": "stable",
+            "runs": [
+              14605.42,
+              14609.48,
+              14772.53
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 14.4,
+          "ttft_ms_p90": 21.6,
+          "ttft_ms_p99": 32.36,
+          "tpot_ms_p50": 3.63,
+          "tpot_ms_p90": 3.73,
+          "tpot_ms_p99": 3.86,
+          "elapsed_seconds_median": 64.3,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 25.5,
+            "std": 9.68,
+            "cv_pct": 37.95,
+            "stability": "high-variance",
+            "runs": [
+              36.67,
+              19.85,
+              19.97
+            ]
+          }
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 15.89,
+          "ttft_ms_p90": 20.5,
+          "ttft_ms_p99": 23.82,
+          "tpot_ms_p50": 4.28,
+          "tpot_ms_p90": 4.42,
+          "tpot_ms_p99": 4.58,
+          "elapsed_seconds_median": 13.6,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 23.61,
+            "std": 0.42,
+            "cv_pct": 1.8,
+            "stability": "stable",
+            "runs": [
+              23.88,
+              23.12,
+              23.82
+            ]
+          }
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 28.96,
+          "ttft_ms_p90": 1153.67,
+          "ttft_ms_p99": 1319.26,
+          "tpot_ms_p50": 5.78,
+          "tpot_ms_p90": 6.18,
+          "tpot_ms_p99": 7.6,
+          "elapsed_seconds_median": 6.5,
+          "sla_met": false,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 1276.65,
+            "std": 70.77,
+            "cv_pct": 5.54,
+            "stability": "noisy",
+            "runs": [
+              1196.4,
+              1303.42,
+              1330.13
+            ]
+          }
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 10.26,
+      "ttft_ms_p90": 16.13,
+      "ttft_ms_p99": 24.23,
+      "tpot_ms_p50": 3.45,
+      "tpot_ms_p90": 3.45,
+      "tpot_ms_p99": 3.49,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 182.7,
+      "ttft_ms_p99_reliability": {
+        "n": 3,
+        "mean": 19.4,
+        "std": 13.18,
+        "cv_pct": 67.91,
+        "stability": "high-variance",
+        "runs": [
+          34.58,
+          12.75,
+          10.88
+        ]
+      }
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 2144.2,
+          "tokens_out": 128750,
+          "tokens_in": 0,
+          "requests_completed": 373,
+          "ttft_ms_p50": 14.4,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2164.2,
+          "tokens_out": 129771,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2185.9,
+          "tokens_out": 131171,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 18.8
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2179.1,
+          "tokens_out": 130749,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 17.9
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2185.8,
+          "tokens_out": 131201,
+          "tokens_in": 0,
+          "requests_completed": 378,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2167.9,
+          "tokens_out": 130007,
+          "tokens_in": 0,
+          "requests_completed": 374,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 18.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2174.5,
+          "tokens_out": 130510,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2186.6,
+          "tokens_out": 131197,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 17.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2171.7,
+          "tokens_out": 130309,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.5
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2186.5,
+          "tokens_out": 131207,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2186.8,
+          "tokens_out": 131151,
+          "tokens_in": 0,
+          "requests_completed": 379,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2177.8,
+          "tokens_out": 130678,
+          "tokens_in": 0,
+          "requests_completed": 373,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2175.4,
+          "tokens_out": 130541,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2180.4,
+          "tokens_out": 130871,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.7
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2167.4,
+          "tokens_out": 130014,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 18.0
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2177.5,
+          "tokens_out": 130582,
+          "tokens_in": 0,
+          "requests_completed": 373,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2190.4,
+          "tokens_out": 131464,
+          "tokens_in": 0,
+          "requests_completed": 380,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 18.8
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2179.4,
+          "tokens_out": 130785,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 18.0
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2174.4,
+          "tokens_out": 130481,
+          "tokens_in": 0,
+          "requests_completed": 378,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2191.7,
+          "tokens_out": 131426,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 17.5
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2154.5,
+          "tokens_out": 129318,
+          "tokens_in": 0,
+          "requests_completed": 371,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.4
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2205.5,
+          "tokens_out": 132338,
+          "tokens_in": 0,
+          "requests_completed": 380,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2166.4,
+          "tokens_out": 130010,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2181.6,
+          "tokens_out": 130895,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2187.3,
+          "tokens_out": 131242,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2174.6,
+          "tokens_out": 130401,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2162.4,
+          "tokens_out": 129814,
+          "tokens_in": 0,
+          "requests_completed": 374,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2182.0,
+          "tokens_out": 130929,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.0
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2188.6,
+          "tokens_out": 131229,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 2178.8,
+      "throttle_ratio": 0.977,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 0.2,
+      "throughput_post_warmup_reliability": {
+        "n": 28,
+        "mean": 2178.8,
+        "std": 10.7,
+        "cv_pct": 0.49,
+        "stability": "stable",
+        "runs": [
+          2164.2,
+          2185.9,
+          2179.1,
+          2185.8,
+          2167.9,
+          2174.5,
+          2186.6,
+          2171.7,
+          2186.5,
+          2186.8,
+          2177.8,
+          2175.4,
+          2180.4,
+          2167.4,
+          2177.5,
+          2190.4,
+          2179.4,
+          2174.4,
+          2191.7,
+          2154.5,
+          2205.5,
+          2166.4,
+          2181.6,
+          2187.3,
+          2174.6,
+          2162.4,
+          2182.0,
+          2188.6
+        ]
+      }
+    },
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 14.03,
+      "steady_ttft_p99_ms": 26.7,
+      "burst_ttft_p50_ms": 15.6,
+      "burst_ttft_p99_ms": 22.99,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 0.861,
+      "recovery_time_seconds": 1.0,
+      "recovery_time_seconds_per_cycle": [
+        1.54,
+        0.47
+      ],
+      "_recovery_definition": "Median seconds within the post-burst steady window before rolling TTFT p99 drops below 1.5x the long-term steady baseline. Lower is better; None means it never recovered within the window.",
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 33.62,
+          "burst_ttft_p99_ms": 22.98
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 19.28,
+          "burst_ttft_p99_ms": 22.86
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 19.65,
+          "burst_ttft_p99_ms": 23.15
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.62,
+    "baseline_delta": 0.02,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "10:20:27",
+    "run_id": "2c345026",
+    "run_name": "nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": "Partial run: ['offline', 'online', 'interactive', 'sustained', 'burst'] succeeded, ['speculative'] failed.",
+    "benchmark_start_time": "2026-05-22T10:19:58.812205+00:00",
+    "benchmark_end_time": "2026-05-22T10:20:27.746312+00:00",
+    "benchmark_elapsed_minutes": 51.9,
+    "model_load_seconds": 22.6,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained', 'burst'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/offline",
+      "online": "results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/online",
+      "interactive": "results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/interactive",
+      "sustained": "results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/sustained",
+      "burst": "results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/burst"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/sustained/result.json b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/sustained/result.json
new file mode 100644
index 00000000..80ec4dad
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026/sustained/result.json
@@ -0,0 +1,456 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T10:08:28.757340+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 2144.2,
+          "tokens_out": 128750,
+          "tokens_in": 0,
+          "requests_completed": 373,
+          "ttft_ms_p50": 14.4,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2164.2,
+          "tokens_out": 129771,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2185.9,
+          "tokens_out": 131171,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 18.8
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2179.1,
+          "tokens_out": 130749,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 17.9
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2185.8,
+          "tokens_out": 131201,
+          "tokens_in": 0,
+          "requests_completed": 378,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2167.9,
+          "tokens_out": 130007,
+          "tokens_in": 0,
+          "requests_completed": 374,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 18.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2174.5,
+          "tokens_out": 130510,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2186.6,
+          "tokens_out": 131197,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 17.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2171.7,
+          "tokens_out": 130309,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.5
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2186.5,
+          "tokens_out": 131207,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2186.8,
+          "tokens_out": 131151,
+          "tokens_in": 0,
+          "requests_completed": 379,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2177.8,
+          "tokens_out": 130678,
+          "tokens_in": 0,
+          "requests_completed": 373,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2175.4,
+          "tokens_out": 130541,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2180.4,
+          "tokens_out": 130871,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.7
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2167.4,
+          "tokens_out": 130014,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 18.0
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2177.5,
+          "tokens_out": 130582,
+          "tokens_in": 0,
+          "requests_completed": 373,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2190.4,
+          "tokens_out": 131464,
+          "tokens_in": 0,
+          "requests_completed": 380,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 18.8
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2179.4,
+          "tokens_out": 130785,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 18.0
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2174.4,
+          "tokens_out": 130481,
+          "tokens_in": 0,
+          "requests_completed": 378,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2191.7,
+          "tokens_out": 131426,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 17.5
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2154.5,
+          "tokens_out": 129318,
+          "tokens_in": 0,
+          "requests_completed": 371,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.4
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2205.5,
+          "tokens_out": 132338,
+          "tokens_in": 0,
+          "requests_completed": 380,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2166.4,
+          "tokens_out": 130010,
+          "tokens_in": 0,
+          "requests_completed": 377,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2181.6,
+          "tokens_out": 130895,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2187.3,
+          "tokens_out": 131242,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2174.6,
+          "tokens_out": 130401,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2162.4,
+          "tokens_out": 129814,
+          "tokens_in": 0,
+          "requests_completed": 374,
+          "ttft_ms_p50": 14.1,
+          "ttft_ms_p99": 19.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2182.0,
+          "tokens_out": 130929,
+          "tokens_in": 0,
+          "requests_completed": 376,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.0
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2188.6,
+          "tokens_out": 131229,
+          "tokens_in": 0,
+          "requests_completed": 375,
+          "ttft_ms_p50": 14.2,
+          "ttft_ms_p99": 19.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 2178.8,
+      "throttle_ratio": 0.977,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 0.2,
+      "throughput_post_warmup_reliability": {
+        "n": 28,
+        "mean": 2178.8,
+        "std": 10.7,
+        "cv_pct": 0.49,
+        "stability": "stable",
+        "runs": [
+          2164.2,
+          2185.9,
+          2179.1,
+          2185.8,
+          2167.9,
+          2174.5,
+          2186.6,
+          2171.7,
+          2186.5,
+          2186.8,
+          2177.8,
+          2175.4,
+          2180.4,
+          2167.4,
+          2177.5,
+          2190.4,
+          2179.4,
+          2174.4,
+          2191.7,
+          2154.5,
+          2205.5,
+          2166.4,
+          2181.6,
+          2187.3,
+          2174.6,
+          2162.4,
+          2182.0,
+          2188.6
+        ]
+      }
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "12:07:04",
+    "run_id": "2c345026",
+    "run_name": "nvidia_b200x1_suite_A_nvidia_vllm020_0f6c56e4_2c345026",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:37:01.802929+00:00",
+    "benchmark_end_time": "2026-05-22T12:07:04.553612+00:00",
+    "benchmark_elapsed_minutes": 30.0,
+    "model_load_seconds": 15.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/accuracy/accuracy.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/accuracy/accuracy.json
new file mode 100644
index 00000000..95fced50
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.56,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/offline/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/offline/result.json
new file mode 100644
index 00000000..9d89c5b8
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/offline/result.json
@@ -0,0 +1,221 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 14871.53,
+          "throughput_tokens_per_sec_per_chip": 14871.53,
+          "throughput_tokens_per_sec_total": 26690.47,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14902.66,
+            "std": 138.54,
+            "cv_pct": 0.93,
+            "stability": "stable",
+            "runs": [
+              15054.11,
+              14871.53,
+              14782.33
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 14978.33,
+          "throughput_tokens_per_sec_per_chip": 14978.33,
+          "throughput_tokens_per_sec_total": 26758.41,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14974.05,
+            "std": 48.37,
+            "cv_pct": 0.32,
+            "stability": "stable",
+            "runs": [
+              14923.68,
+              14978.33,
+              15020.15
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 14970.34,
+          "throughput_tokens_per_sec_per_chip": 14970.34,
+          "throughput_tokens_per_sec_total": 26748.09,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14952.51,
+            "std": 88.96,
+            "cv_pct": 0.59,
+            "stability": "stable",
+            "runs": [
+              15031.2,
+              14855.98,
+              14970.34
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 14891.3,
+          "throughput_tokens_per_sec_per_chip": 14891.3,
+          "throughput_tokens_per_sec_total": 26609.16,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14889.98,
+            "std": 10.96,
+            "cv_pct": 0.07,
+            "stability": "stable",
+            "runs": [
+              14900.22,
+              14878.42,
+              14891.3
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:02:20",
+    "run_id": "ea976bca",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:01:41.553802+00:00",
+    "benchmark_end_time": "2026-05-22T11:02:20.190019+00:00",
+    "benchmark_elapsed_minutes": 0.6,
+    "model_load_seconds": 16.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/result.json
new file mode 100644
index 00000000..e9939d8b
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/result.json
@@ -0,0 +1,228 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 14871.53,
+          "throughput_tokens_per_sec_per_chip": 14871.53,
+          "throughput_tokens_per_sec_total": 26690.47,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14902.66,
+            "std": 138.54,
+            "cv_pct": 0.93,
+            "stability": "stable",
+            "runs": [
+              15054.11,
+              14871.53,
+              14782.33
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 14978.33,
+          "throughput_tokens_per_sec_per_chip": 14978.33,
+          "throughput_tokens_per_sec_total": 26758.41,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14974.05,
+            "std": 48.37,
+            "cv_pct": 0.32,
+            "stability": "stable",
+            "runs": [
+              14923.68,
+              14978.33,
+              15020.15
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 14970.34,
+          "throughput_tokens_per_sec_per_chip": 14970.34,
+          "throughput_tokens_per_sec_total": 26748.09,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14952.51,
+            "std": 88.96,
+            "cv_pct": 0.59,
+            "stability": "stable",
+            "runs": [
+              15031.2,
+              14855.98,
+              14970.34
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 14891.3,
+          "throughput_tokens_per_sec_per_chip": 14891.3,
+          "throughput_tokens_per_sec_total": 26609.16,
+          "elapsed_seconds_median": 2.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 14889.98,
+            "std": 10.96,
+            "cv_pct": 0.07,
+            "stability": "stable",
+            "runs": [
+              14900.22,
+              14878.42,
+              14891.3
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.56,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:02:20",
+    "run_id": "ea976bca",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:01:41.553802+00:00",
+    "benchmark_end_time": "2026-05-22T11:02:20.190019+00:00",
+    "benchmark_elapsed_minutes": 0.6,
+    "model_load_seconds": 16.2,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/env_info.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/env_info.json
new file mode 100644
index 00000000..23ec7e16
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/env_info.json
@@ -0,0 +1,44 @@
+{
+  "collected_at": "2026-05-22T11:00:26.756229+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA B200",
+      "vendor": "NVIDIA",
+      "memory_gb": 179.1,
+      "driver_version": "595.71.05",
+      "firmware_version": null,
+      "compute_capability": "10.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "Intel(R) Xeon(R) Platinum 8559C",
+    "physical_cores": 96,
+    "logical_cores": 192,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1996.0,
+  "pcie_generation": "PCIe Gen 5",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.5 LTS",
+  "python_version": "3.12.13",
+  "kernel_version": "6.17.0-1013-aws",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/accuracy/accuracy.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/accuracy/accuracy.json
new file mode 100644
index 00000000..d345a007
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.56,
+  "baseline_delta": -0.02,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "FP8",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/offline/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/offline/result.json
new file mode 100644
index 00000000..2dc2f5f0
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/offline/result.json
@@ -0,0 +1,221 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+    "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 19428.03,
+          "throughput_tokens_per_sec_per_chip": 19428.03,
+          "throughput_tokens_per_sec_total": 34894.39,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19417.97,
+            "std": 21.51,
+            "cv_pct": 0.11,
+            "stability": "stable",
+            "runs": [
+              19393.27,
+              19428.03,
+              19432.61
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 19471.49,
+          "throughput_tokens_per_sec_per_chip": 19471.49,
+          "throughput_tokens_per_sec_total": 34942.69,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19468.3,
+            "std": 62.9,
+            "cv_pct": 0.32,
+            "stability": "stable",
+            "runs": [
+              19471.49,
+              19403.86,
+              19529.54
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 19439.08,
+          "throughput_tokens_per_sec_per_chip": 19439.08,
+          "throughput_tokens_per_sec_total": 34947.06,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19377.92,
+            "std": 122.1,
+            "cv_pct": 0.63,
+            "stability": "stable",
+            "runs": [
+              19237.32,
+              19439.08,
+              19457.35
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 19515.3,
+          "throughput_tokens_per_sec_per_chip": 19515.3,
+          "throughput_tokens_per_sec_total": 34972.45,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19518.76,
+            "std": 6.5,
+            "cv_pct": 0.03,
+            "stability": "stable",
+            "runs": [
+              19515.3,
+              19514.73,
+              19526.26
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:04:38",
+    "run_id": "87ccc74d",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_87ccc74d",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:04:09.398030+00:00",
+    "benchmark_end_time": "2026-05-22T11:04:38.829294+00:00",
+    "benchmark_elapsed_minutes": 0.5,
+    "model_load_seconds": 31.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/result.json
new file mode 100644
index 00000000..b7deb51f
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/result.json
@@ -0,0 +1,228 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+    "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 19428.03,
+          "throughput_tokens_per_sec_per_chip": 19428.03,
+          "throughput_tokens_per_sec_total": 34894.39,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19417.97,
+            "std": 21.51,
+            "cv_pct": 0.11,
+            "stability": "stable",
+            "runs": [
+              19393.27,
+              19428.03,
+              19432.61
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 19471.49,
+          "throughput_tokens_per_sec_per_chip": 19471.49,
+          "throughput_tokens_per_sec_total": 34942.69,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19468.3,
+            "std": 62.9,
+            "cv_pct": 0.32,
+            "stability": "stable",
+            "runs": [
+              19471.49,
+              19403.86,
+              19529.54
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 19439.08,
+          "throughput_tokens_per_sec_per_chip": 19439.08,
+          "throughput_tokens_per_sec_total": 34947.06,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19377.92,
+            "std": 122.1,
+            "cv_pct": 0.63,
+            "stability": "stable",
+            "runs": [
+              19237.32,
+              19439.08,
+              19457.35
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 19515.3,
+          "throughput_tokens_per_sec_per_chip": 19515.3,
+          "throughput_tokens_per_sec_total": 34972.45,
+          "elapsed_seconds_median": 1.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 19518.76,
+            "std": 6.5,
+            "cv_pct": 0.03,
+            "stability": "stable",
+            "runs": [
+              19515.3,
+              19514.73,
+              19526.26
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.56,
+    "baseline_delta": -0.02,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "FP8",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:04:38",
+    "run_id": "87ccc74d",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_87ccc74d",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:04:09.398030+00:00",
+    "benchmark_end_time": "2026-05-22T11:04:38.829294+00:00",
+    "benchmark_elapsed_minutes": 0.5,
+    "model_load_seconds": 31.3,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/result.json
new file mode 100644
index 00000000..9808c373
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/result.json
@@ -0,0 +1,603 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original",
+    "_note": "suite model_id. Each precision level uses its own quantized checkpoint."
+  },
+  "task": {
+    "scenarios_run": [
+      "accuracy",
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "precision_levels_run": [
+      "BF16",
+      "FP8",
+      "W8A8",
+      "W8A16",
+      "W4A16"
+    ],
+    "precision_levels_skipped": [
+      "FP16"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "quantization": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+          "best_throughput_tokens_per_sec": 14978.33,
+          "accuracy_score": 0.56,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 8387.9,
+          "speedup_vs_bf16": 1.0,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 14871.53,
+              "throughput_tokens_per_sec_per_chip": 14871.53,
+              "throughput_tokens_per_sec_total": 26690.47,
+              "elapsed_seconds_median": 2.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 14902.66,
+                "std": 138.54,
+                "cv_pct": 0.93,
+                "stability": "stable",
+                "runs": [
+                  15054.11,
+                  14871.53,
+                  14782.33
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 14978.33,
+              "throughput_tokens_per_sec_per_chip": 14978.33,
+              "throughput_tokens_per_sec_total": 26758.41,
+              "elapsed_seconds_median": 2.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 14974.05,
+                "std": 48.37,
+                "cv_pct": 0.32,
+                "stability": "stable",
+                "runs": [
+                  14923.68,
+                  14978.33,
+                  15020.15
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 14970.34,
+              "throughput_tokens_per_sec_per_chip": 14970.34,
+              "throughput_tokens_per_sec_total": 26748.09,
+              "elapsed_seconds_median": 2.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 14952.51,
+                "std": 88.96,
+                "cv_pct": 0.59,
+                "stability": "stable",
+                "runs": [
+                  15031.2,
+                  14855.98,
+                  14970.34
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 14891.3,
+              "throughput_tokens_per_sec_per_chip": 14891.3,
+              "throughput_tokens_per_sec_total": 26609.16,
+              "elapsed_seconds_median": 2.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 14889.98,
+                "std": 10.96,
+                "cv_pct": 0.07,
+                "stability": "stable",
+                "runs": [
+                  14900.22,
+                  14878.42,
+                  14891.3
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "bf16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": null
+        },
+        {
+          "precision": "FP8",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+          "best_throughput_tokens_per_sec": 19515.3,
+          "accuracy_score": 0.56,
+          "accuracy_baseline_delta": -0.02,
+          "accuracy_valid": true,
+          "quality_efficiency": 10928.6,
+          "speedup_vs_bf16": 1.303,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 19428.03,
+              "throughput_tokens_per_sec_per_chip": 19428.03,
+              "throughput_tokens_per_sec_total": 34894.39,
+              "elapsed_seconds_median": 1.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 19417.97,
+                "std": 21.51,
+                "cv_pct": 0.11,
+                "stability": "stable",
+                "runs": [
+                  19393.27,
+                  19428.03,
+                  19432.61
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 19471.49,
+              "throughput_tokens_per_sec_per_chip": 19471.49,
+              "throughput_tokens_per_sec_total": 34942.69,
+              "elapsed_seconds_median": 1.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 19468.3,
+                "std": 62.9,
+                "cv_pct": 0.32,
+                "stability": "stable",
+                "runs": [
+                  19471.49,
+                  19403.86,
+                  19529.54
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 19439.08,
+              "throughput_tokens_per_sec_per_chip": 19439.08,
+              "throughput_tokens_per_sec_total": 34947.06,
+              "elapsed_seconds_median": 1.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 19377.92,
+                "std": 122.1,
+                "cv_pct": 0.63,
+                "stability": "stable",
+                "runs": [
+                  19237.32,
+                  19439.08,
+                  19457.35
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 19515.3,
+              "throughput_tokens_per_sec_per_chip": 19515.3,
+              "throughput_tokens_per_sec_total": 34972.45,
+              "elapsed_seconds_median": 1.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 19518.76,
+                "std": 6.5,
+                "cv_pct": 0.03,
+                "stability": "stable",
+                "runs": [
+                  19515.3,
+                  19514.73,
+                  19526.26
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "fp8",
+          "effective_dtype": "bfloat16",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W8A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+          "best_throughput_tokens_per_sec": 8893.35,
+          "accuracy_score": 0.59,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 5247.1,
+          "speedup_vs_bf16": 0.594,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 8851.94,
+              "throughput_tokens_per_sec_per_chip": 8851.94,
+              "throughput_tokens_per_sec_total": 15963.0,
+              "elapsed_seconds_median": 4.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 8806.03,
+                "std": 91.92,
+                "cv_pct": 1.04,
+                "stability": "stable",
+                "runs": [
+                  8851.94,
+                  8700.21,
+                  8865.95
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 8857.65,
+              "throughput_tokens_per_sec_per_chip": 8857.65,
+              "throughput_tokens_per_sec_total": 16020.53,
+              "elapsed_seconds_median": 3.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 8829.44,
+                "std": 137.52,
+                "cv_pct": 1.56,
+                "stability": "stable",
+                "runs": [
+                  8950.67,
+                  8680.0,
+                  8857.65
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 8893.35,
+              "throughput_tokens_per_sec_per_chip": 8893.35,
+              "throughput_tokens_per_sec_total": 15970.82,
+              "elapsed_seconds_median": 4.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 8827.61,
+                "std": 134.27,
+                "cv_pct": 1.52,
+                "stability": "stable",
+                "runs": [
+                  8916.33,
+                  8673.13,
+                  8893.35
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 8773.67,
+              "throughput_tokens_per_sec_per_chip": 8773.67,
+              "throughput_tokens_per_sec_total": 15760.82,
+              "elapsed_seconds_median": 4.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 8771.22,
+                "std": 98.04,
+                "cv_pct": 1.12,
+                "stability": "stable",
+                "runs": [
+                  8773.67,
+                  8671.98,
+                  8868.01
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W4A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+          "best_throughput_tokens_per_sec": 6347.31,
+          "accuracy_score": 0.55,
+          "accuracy_baseline_delta": -0.02,
+          "accuracy_valid": true,
+          "quality_efficiency": 3491.0,
+          "speedup_vs_bf16": 0.424,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 6347.31,
+              "throughput_tokens_per_sec_per_chip": 6347.31,
+              "throughput_tokens_per_sec_total": 11527.65,
+              "elapsed_seconds_median": 5.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 6344.22,
+                "std": 6.19,
+                "cv_pct": 0.1,
+                "stability": "stable",
+                "runs": [
+                  6347.31,
+                  6348.26,
+                  6337.09
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 6317.14,
+              "throughput_tokens_per_sec_per_chip": 6317.14,
+              "throughput_tokens_per_sec_total": 11472.26,
+              "elapsed_seconds_median": 5.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 6222.32,
+                "std": 180.96,
+                "cv_pct": 2.91,
+                "stability": "stable",
+                "runs": [
+                  6336.15,
+                  6013.65,
+                  6317.14
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 6340.01,
+              "throughput_tokens_per_sec_per_chip": 6340.01,
+              "throughput_tokens_per_sec_total": 11514.39,
+              "elapsed_seconds_median": 5.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 6510.1,
+                "std": 295.71,
+                "cv_pct": 4.54,
+                "stability": "noisy",
+                "runs": [
+                  6338.74,
+                  6851.56,
+                  6340.01
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 6343.3,
+              "throughput_tokens_per_sec_per_chip": 6343.3,
+              "throughput_tokens_per_sec_total": 11520.37,
+              "elapsed_seconds_median": 5.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "throughput_tokens_per_sec_reliability": {
+                "n": 3,
+                "mean": 6338.61,
+                "std": 10.39,
+                "cv_pct": 0.16,
+                "stability": "stable",
+                "runs": [
+                  6343.3,
+                  6345.83,
+                  6326.7
+                ]
+              },
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w4a16",
+          "effective_dtype": "float16",
+          "quantization_method": "gptq"
+        }
+      ]
+    },
+    "derived": {}
+  },
+  "accuracy": null,
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:02:20",
+    "run_id": "ea976bca",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:01:41.553802+00:00",
+    "benchmark_end_time": "2026-05-22T11:02:20.190019+00:00",
+    "benchmark_elapsed_minutes": 3.7,
+    "model_load_seconds": 16.2,
+    "benchmark_elapsed_minutes_note": "Sum of per-precision benchmark_elapsed_minutes (excludes sleep gaps and orchestrator overhead).",
+    "scenario_dirs": {
+      "bf16/offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/bf16/offline",
+      "fp8/offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/fp8/offline",
+      "w8a8/offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a8/offline",
+      "w8a8/online": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a8/online",
+      "w8a8/sustained": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a8/sustained",
+      "w8a16/offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/offline",
+      "w4a16/offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/offline"
+    },
+    "precision_dirs": {
+      "BF16": "bf16",
+      "FP8": "fp8",
+      "W8A8": "w8a8",
+      "W8A16": "w8a16",
+      "W4A16": "w4a16"
+    },
+    "precision_model_map": {
+      "BF16": {
+        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+        "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+        "dtype_override": "bfloat16"
+      },
+      "FP8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+        "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "Static per-tensor FP8 (weights + activations). Requires Ampere+ (A100, A800, H20). Skipped automatically on FP16-only hardware."
+      },
+      "W8A8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights + INT8 activations via compressed-tensors. Exercises native int8 tensor cores."
+      },
+      "W8A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+        "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights, FP16 activations. Weight-only quantization \u2014 reduces memory bandwidth, not compute dtype."
+      },
+      "W4A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+        "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+        "engine_kwargs": {
+          "quantization": "gptq"
+        },
+        "_note": "INT4 weights, FP16 activations via GPTQ Marlin kernels. Weight-only quantization \u2014 larger memory saving than W8A16."
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/accuracy/accuracy.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/accuracy/accuracy.json
new file mode 100644
index 00000000..9443ecf5
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.02,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "W4A16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/offline/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/offline/result.json
new file mode 100644
index 00000000..29559b78
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/offline/result.json
@@ -0,0 +1,221 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "float16",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 6347.31,
+          "throughput_tokens_per_sec_per_chip": 6347.31,
+          "throughput_tokens_per_sec_total": 11527.65,
+          "elapsed_seconds_median": 5.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6344.22,
+            "std": 6.19,
+            "cv_pct": 0.1,
+            "stability": "stable",
+            "runs": [
+              6347.31,
+              6348.26,
+              6337.09
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 6317.14,
+          "throughput_tokens_per_sec_per_chip": 6317.14,
+          "throughput_tokens_per_sec_total": 11472.26,
+          "elapsed_seconds_median": 5.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6222.32,
+            "std": 180.96,
+            "cv_pct": 2.91,
+            "stability": "stable",
+            "runs": [
+              6336.15,
+              6013.65,
+              6317.14
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 6340.01,
+          "throughput_tokens_per_sec_per_chip": 6340.01,
+          "throughput_tokens_per_sec_total": 11514.39,
+          "elapsed_seconds_median": 5.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6510.1,
+            "std": 295.71,
+            "cv_pct": 4.54,
+            "stability": "noisy",
+            "runs": [
+              6338.74,
+              6851.56,
+              6340.01
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 6343.3,
+          "throughput_tokens_per_sec_per_chip": 6343.3,
+          "throughput_tokens_per_sec_total": 11520.37,
+          "elapsed_seconds_median": 5.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6338.61,
+            "std": 10.39,
+            "cv_pct": 0.16,
+            "stability": "stable",
+            "runs": [
+              6343.3,
+              6345.83,
+              6326.7
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:12:27",
+    "run_id": "109a7792",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_109a7792",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:10:59.753021+00:00",
+    "benchmark_end_time": "2026-05-22T11:12:27.008821+00:00",
+    "benchmark_elapsed_minutes": 1.5,
+    "model_load_seconds": 16.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/result.json
new file mode 100644
index 00000000..5ca60e8e
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/result.json
@@ -0,0 +1,228 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "float16",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 6347.31,
+          "throughput_tokens_per_sec_per_chip": 6347.31,
+          "throughput_tokens_per_sec_total": 11527.65,
+          "elapsed_seconds_median": 5.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6344.22,
+            "std": 6.19,
+            "cv_pct": 0.1,
+            "stability": "stable",
+            "runs": [
+              6347.31,
+              6348.26,
+              6337.09
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 6317.14,
+          "throughput_tokens_per_sec_per_chip": 6317.14,
+          "throughput_tokens_per_sec_total": 11472.26,
+          "elapsed_seconds_median": 5.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6222.32,
+            "std": 180.96,
+            "cv_pct": 2.91,
+            "stability": "stable",
+            "runs": [
+              6336.15,
+              6013.65,
+              6317.14
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 6340.01,
+          "throughput_tokens_per_sec_per_chip": 6340.01,
+          "throughput_tokens_per_sec_total": 11514.39,
+          "elapsed_seconds_median": 5.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6510.1,
+            "std": 295.71,
+            "cv_pct": 4.54,
+            "stability": "noisy",
+            "runs": [
+              6338.74,
+              6851.56,
+              6340.01
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 6343.3,
+          "throughput_tokens_per_sec_per_chip": 6343.3,
+          "throughput_tokens_per_sec_total": 11520.37,
+          "elapsed_seconds_median": 5.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 6338.61,
+            "std": 10.39,
+            "cv_pct": 0.16,
+            "stability": "stable",
+            "runs": [
+              6343.3,
+              6345.83,
+              6326.7
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.02,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "W4A16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:12:27",
+    "run_id": "109a7792",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_109a7792",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:10:59.753021+00:00",
+    "benchmark_end_time": "2026-05-22T11:12:27.008821+00:00",
+    "benchmark_elapsed_minutes": 1.5,
+    "model_load_seconds": 16.9,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w4a16/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/accuracy/accuracy.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/accuracy/accuracy.json
new file mode 100644
index 00000000..296afa87
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.59,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "W8A16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/offline/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/offline/result.json
new file mode 100644
index 00000000..e0604587
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/offline/result.json
@@ -0,0 +1,221 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 8851.94,
+          "throughput_tokens_per_sec_per_chip": 8851.94,
+          "throughput_tokens_per_sec_total": 15963.0,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8806.03,
+            "std": 91.92,
+            "cv_pct": 1.04,
+            "stability": "stable",
+            "runs": [
+              8851.94,
+              8700.21,
+              8865.95
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 8857.65,
+          "throughput_tokens_per_sec_per_chip": 8857.65,
+          "throughput_tokens_per_sec_total": 16020.53,
+          "elapsed_seconds_median": 3.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8829.44,
+            "std": 137.52,
+            "cv_pct": 1.56,
+            "stability": "stable",
+            "runs": [
+              8950.67,
+              8680.0,
+              8857.65
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 8893.35,
+          "throughput_tokens_per_sec_per_chip": 8893.35,
+          "throughput_tokens_per_sec_total": 15970.82,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8827.61,
+            "std": 134.27,
+            "cv_pct": 1.52,
+            "stability": "stable",
+            "runs": [
+              8916.33,
+              8673.13,
+              8893.35
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 8773.67,
+          "throughput_tokens_per_sec_per_chip": 8773.67,
+          "throughput_tokens_per_sec_total": 15760.82,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8771.22,
+            "std": 98.04,
+            "cv_pct": 1.12,
+            "stability": "stable",
+            "runs": [
+              8773.67,
+              8671.98,
+              8868.01
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:09:15",
+    "run_id": "051e6316",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_051e6316",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:08:09.836599+00:00",
+    "benchmark_end_time": "2026-05-22T11:09:15.364459+00:00",
+    "benchmark_elapsed_minutes": 1.1,
+    "model_load_seconds": 18.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/result.json b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/result.json
new file mode 100644
index 00000000..0ada0247
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/result.json
@@ -0,0 +1,228 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:00:26.756229+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 8851.94,
+          "throughput_tokens_per_sec_per_chip": 8851.94,
+          "throughput_tokens_per_sec_total": 15963.0,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8806.03,
+            "std": 91.92,
+            "cv_pct": 1.04,
+            "stability": "stable",
+            "runs": [
+              8851.94,
+              8700.21,
+              8865.95
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 8857.65,
+          "throughput_tokens_per_sec_per_chip": 8857.65,
+          "throughput_tokens_per_sec_total": 16020.53,
+          "elapsed_seconds_median": 3.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8829.44,
+            "std": 137.52,
+            "cv_pct": 1.56,
+            "stability": "stable",
+            "runs": [
+              8950.67,
+              8680.0,
+              8857.65
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 8893.35,
+          "throughput_tokens_per_sec_per_chip": 8893.35,
+          "throughput_tokens_per_sec_total": 15970.82,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8827.61,
+            "std": 134.27,
+            "cv_pct": 1.52,
+            "stability": "stable",
+            "runs": [
+              8916.33,
+              8673.13,
+              8893.35
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 8773.67,
+          "throughput_tokens_per_sec_per_chip": 8773.67,
+          "throughput_tokens_per_sec_total": 15760.82,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 8771.22,
+            "std": 98.04,
+            "cv_pct": 1.12,
+            "stability": "stable",
+            "runs": [
+              8773.67,
+              8671.98,
+              8868.01
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.59,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "W8A16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:09:15",
+    "run_id": "051e6316",
+    "run_name": "nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_051e6316",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:08:09.836599+00:00",
+    "benchmark_end_time": "2026-05-22T11:09:15.364459+00:00",
+    "benchmark_elapsed_minutes": 1.1,
+    "model_load_seconds": 18.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_b200x1_suite_C_nvidia_vllm020_0f6c56e4_ea976bca/w8a16/offline"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/accuracy/accuracy.json b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/accuracy/accuracy.json
new file mode 100644
index 00000000..3e6d6c6c
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/env_info.json b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/env_info.json
new file mode 100644
index 00000000..f2bb120e
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/env_info.json
@@ -0,0 +1,44 @@
+{
+  "collected_at": "2026-05-22T11:13:53.452954+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA B200",
+      "vendor": "NVIDIA",
+      "memory_gb": 179.1,
+      "driver_version": "595.71.05",
+      "firmware_version": null,
+      "compute_capability": "10.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "Intel(R) Xeon(R) Platinum 8559C",
+    "physical_cores": 96,
+    "logical_cores": 192,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1996.0,
+  "pcie_generation": "PCIe Gen 5",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.5 LTS",
+  "python_version": "3.12.13",
+  "kernel_version": "6.17.0-1013-aws",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/interactive/result.json b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/interactive/result.json
new file mode 100644
index 00000000..36b21c41
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/interactive/result.json
@@ -0,0 +1,138 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:13:53.452954+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 599.84,
+      "ttft_ms_p90": 620.68,
+      "ttft_ms_p99": 636.57,
+      "tpot_ms_p50": 4.09,
+      "tpot_ms_p90": 4.11,
+      "tpot_ms_p99": 4.12,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 164.1,
+      "ttft_ms_p99_reliability": {
+        "n": 2,
+        "mean": 637.52,
+        "std": 2.57,
+        "cv_pct": 0.4,
+        "stability": "stable",
+        "runs": [
+          639.34,
+          635.7
+        ]
+      }
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-25",
+    "time": "08:49:00",
+    "run_id": "c35cf907",
+    "run_name": "nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-25T08:43:32.662876+00:00",
+    "benchmark_end_time": "2026-05-25T08:49:00.780269+00:00",
+    "benchmark_elapsed_minutes": 5.5,
+    "model_load_seconds": 32.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/offline/result.json b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/offline/result.json
new file mode 100644
index 00000000..b3228379
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/offline/result.json
@@ -0,0 +1,169 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:13:53.452954+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 392.64,
+          "throughput_tokens_per_sec_per_chip": 392.64,
+          "throughput_tokens_per_sec_total": 44317.41,
+          "elapsed_seconds_median": 32.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 2,
+            "mean": 392.64,
+            "std": 0.28,
+            "cv_pct": 0.07,
+            "stability": "stable",
+            "runs": [
+              392.84,
+              392.44
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 392.15,
+          "throughput_tokens_per_sec_per_chip": 392.15,
+          "throughput_tokens_per_sec_total": 44262.41,
+          "elapsed_seconds_median": 32.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 2,
+            "mean": 392.15,
+            "std": 0.02,
+            "cv_pct": 0.01,
+            "stability": "stable",
+            "runs": [
+              392.17,
+              392.14
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:18:17",
+    "run_id": "c35cf907",
+    "run_name": "nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:15:01.295237+00:00",
+    "benchmark_end_time": "2026-05-22T11:18:17.104254+00:00",
+    "benchmark_elapsed_minutes": 3.3,
+    "model_load_seconds": 15.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/online/result.json b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/online/result.json
new file mode 100644
index 00000000..e299f948
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/online/result.json
@@ -0,0 +1,192 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:13:53.452954+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 1,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 627.57,
+          "ttft_ms_p90": 1041.15,
+          "ttft_ms_p99": 1676.04,
+          "tpot_ms_p50": 6.44,
+          "tpot_ms_p90": 11.11,
+          "tpot_ms_p99": 24.51,
+          "elapsed_seconds_median": 201.7,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 2,
+            "mean": 1470.83,
+            "std": 392.14,
+            "cv_pct": 26.66,
+            "stability": "high-variance",
+            "runs": [
+              1748.11,
+              1193.54
+            ]
+          }
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 733.03,
+          "ttft_ms_p90": 1441.58,
+          "ttft_ms_p99": 1987.27,
+          "tpot_ms_p50": 10.92,
+          "tpot_ms_p90": 22.24,
+          "tpot_ms_p99": 29.0,
+          "elapsed_seconds_median": 114.9,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 2,
+            "mean": 1955.72,
+            "std": 44.61,
+            "cv_pct": 2.28,
+            "stability": "stable",
+            "runs": [
+              1987.27,
+              1924.18
+            ]
+          }
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 9263.55,
+          "ttft_ms_p90": 17389.95,
+          "ttft_ms_p99": 22195.2,
+          "tpot_ms_p50": 48.55,
+          "tpot_ms_p90": 49.18,
+          "tpot_ms_p99": 49.35,
+          "elapsed_seconds_median": 70.0,
+          "sla_met": false,
+          "ttft_ms_p99_reliability": {
+            "n": 2,
+            "mean": 19059.25,
+            "std": 4927.55,
+            "cv_pct": 25.85,
+            "stability": "high-variance",
+            "runs": [
+              22543.56,
+              15574.95
+            ]
+          }
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-25",
+    "time": "09:33:20",
+    "run_id": "c35cf907",
+    "run_name": "nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-25T09:20:11.033130+00:00",
+    "benchmark_end_time": "2026-05-25T09:33:20.789590+00:00",
+    "benchmark_elapsed_minutes": 13.2,
+    "model_load_seconds": 13.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/result.json b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/result.json
new file mode 100644
index 00000000..01711191
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/result.json
@@ -0,0 +1,617 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:13:53.452954+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "interactive",
+      "sustained",
+      "online"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 2,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 392.64,
+          "throughput_tokens_per_sec_per_chip": 392.64,
+          "throughput_tokens_per_sec_total": 44317.41,
+          "elapsed_seconds_median": 32.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 2,
+            "mean": 392.64,
+            "std": 0.28,
+            "cv_pct": 0.07,
+            "stability": "stable",
+            "runs": [
+              392.84,
+              392.44
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 392.15,
+          "throughput_tokens_per_sec_per_chip": 392.15,
+          "throughput_tokens_per_sec_total": 44262.41,
+          "elapsed_seconds_median": 32.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 2,
+            "mean": 392.15,
+            "std": 0.02,
+            "cv_pct": 0.01,
+            "stability": "stable",
+            "runs": [
+              392.17,
+              392.14
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 599.84,
+      "ttft_ms_p90": 620.68,
+      "ttft_ms_p99": 636.57,
+      "tpot_ms_p50": 4.09,
+      "tpot_ms_p90": 4.11,
+      "tpot_ms_p99": 4.12,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 164.1,
+      "ttft_ms_p99_reliability": {
+        "n": 2,
+        "mean": 637.52,
+        "std": 2.57,
+        "cv_pct": 0.4,
+        "stability": "stable",
+        "runs": [
+          639.34,
+          635.7
+        ]
+      }
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 311.4,
+          "tokens_out": 18688,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 1017.3,
+          "ttft_ms_p99": 4280.7
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 970.6,
+          "ttft_ms_p99": 1157.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1020.2,
+          "ttft_ms_p99": 1128.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.2,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 982.6,
+          "ttft_ms_p99": 1092.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 983.7,
+          "ttft_ms_p99": 1170.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 999.2,
+          "ttft_ms_p99": 1113.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.5,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 954.9,
+          "ttft_ms_p99": 1161.0
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1026.8,
+          "ttft_ms_p99": 1161.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.5,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 982.9,
+          "ttft_ms_p99": 1110.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.1,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 982.9,
+          "ttft_ms_p99": 1155.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 997.9,
+          "ttft_ms_p99": 1113.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 955.1,
+          "ttft_ms_p99": 1164.9
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1019.6,
+          "ttft_ms_p99": 1245.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 984.3,
+          "ttft_ms_p99": 1109.2
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.2,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 986.7,
+          "ttft_ms_p99": 1171.3
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1002.4,
+          "ttft_ms_p99": 1122.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 975.1,
+          "ttft_ms_p99": 1162.6
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.0,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 1053.6,
+          "ttft_ms_p99": 1161.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 981.7,
+          "ttft_ms_p99": 1092.2
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.1,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 982.7,
+          "ttft_ms_p99": 1169.6
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 999.7,
+          "ttft_ms_p99": 1197.2
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 975.5,
+          "ttft_ms_p99": 1160.0
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.2,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 1021.8,
+          "ttft_ms_p99": 1165.8
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 980.9,
+          "ttft_ms_p99": 1093.8
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.0,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 981.8,
+          "ttft_ms_p99": 1157.1
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1018.1,
+          "ttft_ms_p99": 1131.0
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.1,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 979.7,
+          "ttft_ms_p99": 1159.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1021.3,
+          "ttft_ms_p99": 1131.5
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 980.8,
+          "ttft_ms_p99": 1085.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 340.3,
+      "throttle_ratio": 0.987,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -71.6,
+      "throughput_post_warmup_reliability": {
+        "n": 28,
+        "mean": 340.3,
+        "std": 1.9,
+        "cv_pct": 0.54,
+        "stability": "stable",
+        "runs": [
+          341.2,
+          341.3,
+          337.2,
+          341.3,
+          341.2,
+          341.5,
+          341.2,
+          341.5,
+          341.1,
+          341.3,
+          341.4,
+          341.3,
+          341.3,
+          337.2,
+          341.4,
+          341.3,
+          337.0,
+          341.2,
+          337.1,
+          341.4,
+          341.4,
+          337.2,
+          341.3,
+          337.0,
+          341.3,
+          337.1,
+          341.3,
+          341.3
+        ]
+      }
+    },
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 1,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 627.57,
+          "ttft_ms_p90": 1041.15,
+          "ttft_ms_p99": 1676.04,
+          "tpot_ms_p50": 6.44,
+          "tpot_ms_p90": 11.11,
+          "tpot_ms_p99": 24.51,
+          "elapsed_seconds_median": 201.7,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 2,
+            "mean": 1470.83,
+            "std": 392.14,
+            "cv_pct": 26.66,
+            "stability": "high-variance",
+            "runs": [
+              1748.11,
+              1193.54
+            ]
+          }
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 733.03,
+          "ttft_ms_p90": 1441.58,
+          "ttft_ms_p99": 1987.27,
+          "tpot_ms_p50": 10.92,
+          "tpot_ms_p90": 22.24,
+          "tpot_ms_p99": 29.0,
+          "elapsed_seconds_median": 114.9,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 2,
+            "mean": 1955.72,
+            "std": 44.61,
+            "cv_pct": 2.28,
+            "stability": "stable",
+            "runs": [
+              1987.27,
+              1924.18
+            ]
+          }
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 9263.55,
+          "ttft_ms_p90": 17389.95,
+          "ttft_ms_p99": 22195.2,
+          "tpot_ms_p50": 48.55,
+          "tpot_ms_p90": 49.18,
+          "tpot_ms_p99": 49.35,
+          "elapsed_seconds_median": 70.0,
+          "sla_met": false,
+          "ttft_ms_p99_reliability": {
+            "n": 2,
+            "mean": 19059.25,
+            "std": 4927.55,
+            "cv_pct": 25.85,
+            "stability": "high-variance",
+            "runs": [
+              22543.56,
+              15574.95
+            ]
+          }
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:18:17",
+    "run_id": "c35cf907",
+    "run_name": "nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:15:01.295237+00:00",
+    "benchmark_end_time": "2026-05-22T11:18:17.104254+00:00",
+    "benchmark_elapsed_minutes": 52.1,
+    "model_load_seconds": 15.7,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'interactive', 'sustained', 'online'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/offline",
+      "interactive": "results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/interactive",
+      "sustained": "results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/sustained",
+      "online": "results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/online"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/sustained/result.json b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/sustained/result.json
new file mode 100644
index 00000000..0b3813c5
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907/sustained/result.json
@@ -0,0 +1,456 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:13:53.452954+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 311.4,
+          "tokens_out": 18688,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 1017.3,
+          "ttft_ms_p99": 4280.7
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 970.6,
+          "ttft_ms_p99": 1157.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1020.2,
+          "ttft_ms_p99": 1128.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.2,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 982.6,
+          "ttft_ms_p99": 1092.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 983.7,
+          "ttft_ms_p99": 1170.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 999.2,
+          "ttft_ms_p99": 1113.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.5,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 954.9,
+          "ttft_ms_p99": 1161.0
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1026.8,
+          "ttft_ms_p99": 1161.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.5,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 982.9,
+          "ttft_ms_p99": 1110.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.1,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 982.9,
+          "ttft_ms_p99": 1155.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 997.9,
+          "ttft_ms_p99": 1113.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 955.1,
+          "ttft_ms_p99": 1164.9
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1019.6,
+          "ttft_ms_p99": 1245.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 984.3,
+          "ttft_ms_p99": 1109.2
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.2,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 986.7,
+          "ttft_ms_p99": 1171.3
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1002.4,
+          "ttft_ms_p99": 1122.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 975.1,
+          "ttft_ms_p99": 1162.6
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.0,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 1053.6,
+          "ttft_ms_p99": 1161.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.2,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 981.7,
+          "ttft_ms_p99": 1092.2
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.1,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 982.7,
+          "ttft_ms_p99": 1169.6
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 999.7,
+          "ttft_ms_p99": 1197.2
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.4,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 975.5,
+          "ttft_ms_p99": 1160.0
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.2,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 1021.8,
+          "ttft_ms_p99": 1165.8
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 980.9,
+          "ttft_ms_p99": 1093.8
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.0,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 981.8,
+          "ttft_ms_p99": 1157.1
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1018.1,
+          "ttft_ms_p99": 1131.0
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 337.1,
+          "tokens_out": 20224,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 979.7,
+          "ttft_ms_p99": 1159.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 1021.3,
+          "ttft_ms_p99": 1131.5
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 341.3,
+          "tokens_out": 20480,
+          "tokens_in": 0,
+          "requests_completed": 80,
+          "ttft_ms_p50": 980.8,
+          "ttft_ms_p99": 1085.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 340.3,
+      "throttle_ratio": 0.987,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -71.6,
+      "throughput_post_warmup_reliability": {
+        "n": 28,
+        "mean": 340.3,
+        "std": 1.9,
+        "cv_pct": 0.54,
+        "stability": "stable",
+        "runs": [
+          341.2,
+          341.3,
+          337.2,
+          341.3,
+          341.2,
+          341.5,
+          341.2,
+          341.5,
+          341.1,
+          341.3,
+          341.4,
+          341.3,
+          341.3,
+          337.2,
+          341.4,
+          341.3,
+          337.0,
+          341.2,
+          337.1,
+          341.4,
+          341.4,
+          337.2,
+          341.3,
+          337.0,
+          341.3,
+          337.1,
+          341.3,
+          341.3
+        ]
+      }
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-25",
+    "time": "09:19:38",
+    "run_id": "c35cf907",
+    "run_name": "nvidia_b200x1_suite_D_nvidia_vllm020_0f6c56e4_c35cf907",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-25T08:49:32.474610+00:00",
+    "benchmark_end_time": "2026-05-25T09:19:38.964660+00:00",
+    "benchmark_elapsed_minutes": 30.1,
+    "model_load_seconds": 13.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/accuracy/accuracy.json b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/accuracy/accuracy.json
new file mode 100644
index 00000000..66467933
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.4,
+  "baseline_delta": 0.02,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/env_info.json b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/env_info.json
new file mode 100644
index 00000000..8e183ac0
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/env_info.json
@@ -0,0 +1,44 @@
+{
+  "collected_at": "2026-05-22T11:19:19.953009+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA B200",
+      "vendor": "NVIDIA",
+      "memory_gb": 179.1,
+      "driver_version": "595.71.05",
+      "firmware_version": null,
+      "compute_capability": "10.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "Intel(R) Xeon(R) Platinum 8559C",
+    "physical_cores": 96,
+    "logical_cores": 192,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1996.0,
+  "pcie_generation": "PCIe Gen 5",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.5 LTS",
+  "python_version": "3.12.13",
+  "kernel_version": "6.17.0-1013-aws",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/interactive/result.json b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/interactive/result.json
new file mode 100644
index 00000000..0e4329b2
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/interactive/result.json
@@ -0,0 +1,139 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:19:19.953009+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 6.14,
+      "ttft_ms_p90": 6.66,
+      "ttft_ms_p99": 7.75,
+      "tpot_ms_p50": 1.27,
+      "tpot_ms_p90": 1.29,
+      "tpot_ms_p99": 1.34,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 40.8,
+      "ttft_ms_p99_reliability": {
+        "n": 3,
+        "mean": 7.57,
+        "std": 0.95,
+        "cv_pct": 12.48,
+        "stability": "high-variance",
+        "runs": [
+          8.43,
+          7.72,
+          6.56
+        ]
+      }
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:25:57",
+    "run_id": "ef7c9362",
+    "run_name": "nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:23:54.757444+00:00",
+    "benchmark_end_time": "2026-05-22T11:25:57.449221+00:00",
+    "benchmark_elapsed_minutes": 2.0,
+    "model_load_seconds": 10.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/offline/result.json b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/offline/result.json
new file mode 100644
index 00000000..d7685d4b
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/offline/result.json
@@ -0,0 +1,196 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:19:19.953009+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 40406.2,
+          "throughput_tokens_per_sec_per_chip": 40406.2,
+          "throughput_tokens_per_sec_total": 60226.68,
+          "elapsed_seconds_median": 1.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 40333.75,
+            "std": 326.78,
+            "cv_pct": 0.81,
+            "stability": "stable",
+            "runs": [
+              40618.23,
+              40406.2,
+              39976.83
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 40280.84,
+          "throughput_tokens_per_sec_per_chip": 40280.84,
+          "throughput_tokens_per_sec_total": 60077.93,
+          "elapsed_seconds_median": 1.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 40327.57,
+            "std": 183.4,
+            "cv_pct": 0.45,
+            "stability": "stable",
+            "runs": [
+              40280.84,
+              40529.82,
+              40172.05
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 40387.24,
+          "throughput_tokens_per_sec_per_chip": 40387.24,
+          "throughput_tokens_per_sec_total": 60236.62,
+          "elapsed_seconds_median": 1.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 40423.33,
+            "std": 134.85,
+            "cv_pct": 0.33,
+            "stability": "stable",
+            "runs": [
+              40310.2,
+              40572.56,
+              40387.24
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:20:39",
+    "run_id": "ef7c9362",
+    "run_name": "nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:20:26.814666+00:00",
+    "benchmark_end_time": "2026-05-22T11:20:39.423697+00:00",
+    "benchmark_elapsed_minutes": 0.2,
+    "model_load_seconds": 17.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/online/result.json b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/online/result.json
new file mode 100644
index 00000000..94b805d6
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/online/result.json
@@ -0,0 +1,171 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:19:19.953009+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 8.15,
+          "ttft_ms_p90": 8.73,
+          "ttft_ms_p99": 12.48,
+          "tpot_ms_p50": 1.39,
+          "tpot_ms_p90": 1.5,
+          "tpot_ms_p99": 1.61,
+          "elapsed_seconds_median": 31.8,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 12.48,
+            "std": 0.6,
+            "cv_pct": 4.78,
+            "stability": "noisy",
+            "runs": [
+              13.16,
+              12.19,
+              12.08
+            ]
+          }
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 9.25,
+          "ttft_ms_p90": 13.26,
+          "ttft_ms_p99": 16.48,
+          "tpot_ms_p50": 1.86,
+          "tpot_ms_p90": 2.13,
+          "tpot_ms_p99": 2.28,
+          "elapsed_seconds_median": 7.7,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 16.0,
+            "std": 1.57,
+            "cv_pct": 9.83,
+            "stability": "high-variance",
+            "runs": [
+              17.22,
+              14.23,
+              16.56
+            ]
+          }
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:23:25",
+    "run_id": "ef7c9362",
+    "run_name": "nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:21:24.333603+00:00",
+    "benchmark_end_time": "2026-05-22T11:23:25.745227+00:00",
+    "benchmark_elapsed_minutes": 2.0,
+    "model_load_seconds": 26.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/result.json b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/result.json
new file mode 100644
index 00000000..523af6a1
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/result.json
@@ -0,0 +1,460 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:19:19.953009+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 40406.2,
+          "throughput_tokens_per_sec_per_chip": 40406.2,
+          "throughput_tokens_per_sec_total": 60226.68,
+          "elapsed_seconds_median": 1.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 40333.75,
+            "std": 326.78,
+            "cv_pct": 0.81,
+            "stability": "stable",
+            "runs": [
+              40618.23,
+              40406.2,
+              39976.83
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 40280.84,
+          "throughput_tokens_per_sec_per_chip": 40280.84,
+          "throughput_tokens_per_sec_total": 60077.93,
+          "elapsed_seconds_median": 1.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 40327.57,
+            "std": 183.4,
+            "cv_pct": 0.45,
+            "stability": "stable",
+            "runs": [
+              40280.84,
+              40529.82,
+              40172.05
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 40387.24,
+          "throughput_tokens_per_sec_per_chip": 40387.24,
+          "throughput_tokens_per_sec_total": 60236.62,
+          "elapsed_seconds_median": 1.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "throughput_tokens_per_sec_reliability": {
+            "n": 3,
+            "mean": 40423.33,
+            "std": 134.85,
+            "cv_pct": 0.33,
+            "stability": "stable",
+            "runs": [
+              40310.2,
+              40572.56,
+              40387.24
+            ]
+          },
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 8.15,
+          "ttft_ms_p90": 8.73,
+          "ttft_ms_p99": 12.48,
+          "tpot_ms_p50": 1.39,
+          "tpot_ms_p90": 1.5,
+          "tpot_ms_p99": 1.61,
+          "elapsed_seconds_median": 31.8,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 12.48,
+            "std": 0.6,
+            "cv_pct": 4.78,
+            "stability": "noisy",
+            "runs": [
+              13.16,
+              12.19,
+              12.08
+            ]
+          }
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 9.25,
+          "ttft_ms_p90": 13.26,
+          "ttft_ms_p99": 16.48,
+          "tpot_ms_p50": 1.86,
+          "tpot_ms_p90": 2.13,
+          "tpot_ms_p99": 2.28,
+          "elapsed_seconds_median": 7.7,
+          "sla_met": true,
+          "ttft_ms_p99_reliability": {
+            "n": 3,
+            "mean": 16.0,
+            "std": 1.57,
+            "cv_pct": 9.83,
+            "stability": "high-variance",
+            "runs": [
+              17.22,
+              14.23,
+              16.56
+            ]
+          }
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 6.14,
+      "ttft_ms_p90": 6.66,
+      "ttft_ms_p99": 7.75,
+      "tpot_ms_p50": 1.27,
+      "tpot_ms_p90": 1.29,
+      "tpot_ms_p99": 1.34,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 40.8,
+      "ttft_ms_p99_reliability": {
+        "n": 3,
+        "mean": 7.57,
+        "std": 0.95,
+        "cv_pct": 12.48,
+        "stability": "high-variance",
+        "runs": [
+          8.43,
+          7.72,
+          6.56
+        ]
+      }
+    },
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13004.8,
+          "tokens_out": 780773,
+          "tokens_in": 0,
+          "requests_completed": 3739,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 39.8
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13172.1,
+          "tokens_out": 790104,
+          "tokens_in": 0,
+          "requests_completed": 3781,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13168.2,
+          "tokens_out": 789862,
+          "tokens_in": 0,
+          "requests_completed": 3786,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13279.4,
+          "tokens_out": 797350,
+          "tokens_in": 0,
+          "requests_completed": 3826,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13197.2,
+          "tokens_out": 791576,
+          "tokens_in": 0,
+          "requests_completed": 3789,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13141.7,
+          "tokens_out": 788568,
+          "tokens_in": 0,
+          "requests_completed": 3794,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13155.2,
+          "tokens_out": 788960,
+          "tokens_in": 0,
+          "requests_completed": 3787,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13167.2,
+          "tokens_out": 790032,
+          "tokens_in": 0,
+          "requests_completed": 3793,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13199.1,
+          "tokens_out": 792415,
+          "tokens_in": 0,
+          "requests_completed": 3791,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13284.6,
+          "tokens_out": 796782,
+          "tokens_in": 0,
+          "requests_completed": 3818,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13196.6,
+          "tokens_out": 792186,
+          "tokens_in": 0,
+          "requests_completed": 3799,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13155.4,
+          "tokens_out": 788693,
+          "tokens_in": 0,
+          "requests_completed": 3786,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13191.5,
+          "tokens_out": 791827,
+          "tokens_in": 0,
+          "requests_completed": 3796,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13135.2,
+          "tokens_out": 787979,
+          "tokens_in": 0,
+          "requests_completed": 3779,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 13174.9,
+      "throttle_ratio": 0.979,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -22.4,
+      "throughput_post_warmup_reliability": {
+        "n": 14,
+        "mean": 13174.9,
+        "std": 66.4,
+        "cv_pct": 0.5,
+        "stability": "stable",
+        "runs": [
+          13004.8,
+          13172.1,
+          13168.2,
+          13279.4,
+          13197.2,
+          13141.7,
+          13155.2,
+          13167.2,
+          13199.1,
+          13284.6,
+          13196.6,
+          13155.4,
+          13191.5,
+          13135.2
+        ]
+      }
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.4,
+    "baseline_delta": 0.02,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-22",
+    "time": "11:20:39",
+    "run_id": "ef7c9362",
+    "run_name": "nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-22T11:20:26.814666+00:00",
+    "benchmark_end_time": "2026-05-22T11:20:39.423697+00:00",
+    "benchmark_elapsed_minutes": 19.2,
+    "model_load_seconds": 17.4,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/offline",
+      "online": "results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/online",
+      "interactive": "results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/interactive",
+      "sustained": "results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/sustained/result.json b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/sustained/result.json
new file mode 100644
index 00000000..9c1844ce
--- /dev/null
+++ b/results/community/nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362/sustained/result.json
@@ -0,0 +1,292 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA B200",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 179.1,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-22T11:19:19.953009+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA B200",
+        "vendor": "NVIDIA",
+        "memory_gb": 179.1,
+        "driver_version": "595.71.05",
+        "firmware_version": null,
+        "compute_capability": "10.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-47,96-143\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8559C",
+      "physical_cores": 96,
+      "logical_cores": 192,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1996.0,
+    "pcie_generation": "PCIe Gen 5",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13",
+    "kernel_version": "6.17.0-1013-aws",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.9.0",
+    "driver_version": "595.71.05",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.5 LTS",
+    "python_version": "3.12.13"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13004.8,
+          "tokens_out": 780773,
+          "tokens_in": 0,
+          "requests_completed": 3739,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 39.8
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13172.1,
+          "tokens_out": 790104,
+          "tokens_in": 0,
+          "requests_completed": 3781,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13168.2,
+          "tokens_out": 789862,
+          "tokens_in": 0,
+          "requests_completed": 3786,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13279.4,
+          "tokens_out": 797350,
+          "tokens_in": 0,
+          "requests_completed": 3826,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13197.2,
+          "tokens_out": 791576,
+          "tokens_in": 0,
+          "requests_completed": 3789,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13141.7,
+          "tokens_out": 788568,
+          "tokens_in": 0,
+          "requests_completed": 3794,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13155.2,
+          "tokens_out": 788960,
+          "tokens_in": 0,
+          "requests_completed": 3787,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13167.2,
+          "tokens_out": 790032,
+          "tokens_in": 0,
+          "requests_completed": 3793,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13199.1,
+          "tokens_out": 792415,
+          "tokens_in": 0,
+          "requests_completed": 3791,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13284.6,
+          "tokens_out": 796782,
+          "tokens_in": 0,
+          "requests_completed": 3818,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13196.6,
+          "tokens_out": 792186,
+          "tokens_in": 0,
+          "requests_completed": 3799,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13155.4,
+          "tokens_out": 788693,
+          "tokens_in": 0,
+          "requests_completed": 3786,
+          "ttft_ms_p50": 9.7,
+          "ttft_ms_p99": 17.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13191.5,
+          "tokens_out": 791827,
+          "tokens_in": 0,
+          "requests_completed": 3796,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 13135.2,
+          "tokens_out": 787979,
+          "tokens_in": 0,
+          "requests_completed": 3779,
+          "ttft_ms_p50": 9.6,
+          "ttft_ms_p99": 17.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 13174.9,
+      "throttle_ratio": 0.979,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -22.4,
+      "throughput_post_warmup_reliability": {
+        "n": 14,
+        "mean": 13174.9,
+        "std": 66.4,
+        "cv_pct": 0.5,
+        "stability": "stable",
+        "runs": [
+          13004.8,
+          13172.1,
+          13168.2,
+          13279.4,
+          13197.2,
+          13141.7,
+          13155.2,
+          13167.2,
+          13199.1,
+          13284.6,
+          13196.6,
+          13155.4,
+          13191.5,
+          13135.2
+        ]
+      }
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-25",
+    "time": "09:59:33",
+    "run_id": "ef7c9362",
+    "run_name": "nvidia_b200x1_suite_F_nvidia_vllm020_0f6c56e4_ef7c9362",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-25T09:44:32.432302+00:00",
+    "benchmark_end_time": "2026-05-25T09:59:33.602088+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 25.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/accuracy/accuracy.json
new file mode 100644
index 00000000..ca1b4692
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.6,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/burst/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/burst/result.json
new file mode 100644
index 00000000..7c9ab3df
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/burst/result.json
@@ -0,0 +1,143 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:39:40.369666+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "burst",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 65.89,
+      "steady_ttft_p99_ms": 6708.31,
+      "burst_ttft_p50_ms": 9683.26,
+      "burst_ttft_p99_ms": 22975.84,
+      "sla_met_during_burst": false,
+      "burst_degradation_ratio": 3.425,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 7772.85,
+          "burst_ttft_p99_ms": 23080.27
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 119.28,
+          "burst_ttft_p99_ms": 22460.91
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 123.02,
+          "burst_ttft_p99_ms": 22360.77
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "09:30:01",
+    "run_id": "9b2b01fd",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:20:56.451446+00:00",
+    "benchmark_end_time": "2026-05-07T09:30:01.912668+00:00",
+    "benchmark_elapsed_minutes": 9.1,
+    "model_load_seconds": 93.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/env_info.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/env_info.json
new file mode 100644
index 00000000..de151680
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/env_info.json
@@ -0,0 +1,32 @@
+{
+  "collected_at": "2026-05-07T07:39:40.369666+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA GeForce RTX 4090",
+      "vendor": "NVIDIA",
+      "memory_gb": 24.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.9",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "INTEL(R) XEON(R) GOLD 6530",
+    "physical_cores": 64,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 1",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": null,
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/interactive/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/interactive/result.json
new file mode 100644
index 00000000..04ecf18d
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/interactive/result.json
@@ -0,0 +1,115 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:39:40.369666+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 34.55,
+      "ttft_ms_p90": 56.25,
+      "ttft_ms_p99": 80.59,
+      "tpot_ms_p50": 16.58,
+      "tpot_ms_p90": 16.64,
+      "tpot_ms_p99": 16.75,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 485.8
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:25:51",
+    "run_id": "9b2b01fd",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:01:18.850226+00:00",
+    "benchmark_end_time": "2026-05-07T08:25:51.605313+00:00",
+    "benchmark_elapsed_minutes": 24.5,
+    "model_load_seconds": 163.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/offline/result.json
similarity index 54%
rename from results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/offline/result.json
rename to results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/offline/result.json
index a050fe47..9a373f6d 100644
--- a/results/community/mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0/offline/result.json
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/offline/result.json
@@ -1,70 +1,54 @@
 {
   "schema_version": "1.0",
   "suite_id": "suite_A",
-  "implementation_id": "moorethreads_vllm_musa_f2f6f965",
+  "implementation_id": "nvidia_sglang_c43a8309",
   "chip": {
-    "name": "MTT S4000",
-    "vendor": "Moore Threads",
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
     "count": 1,
-    "memory_gb": 48.0,
+    "memory_gb": 24.0,
     "interconnect_intra_node": null,
     "interconnect_inter_node": null
   },
   "environment": {
-    "collected_at": "2026-05-18T09:21:31.092840+00:00",
+    "collected_at": "2026-05-07T07:39:40.369666+00:00",
     "accelerators": [
       {
         "index": 0,
-        "name": "MTT S4000",
-        "vendor": "Moore Threads",
-        "memory_gb": 48.0,
-        "driver_version": "2.7.0",
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
         "firmware_version": null,
+        "compute_capability": "8.9",
         "supports_bf16": true
       }
     ],
-    "accelerator_platform": "moorethreads",
-    "accelerator_topology": null,
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
     "intra_node_interconnect": null,
     "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6430",
+      "model": "INTEL(R) XEON(R) GOLD 6530",
       "physical_cores": 64,
       "logical_cores": 128,
       "numa_nodes": 2
     },
-    "system_memory_gb": 1007.5,
-    "pcie_generation": "PCIe 16x/16x",
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
     "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": [
-      {
-        "name": "mlx5_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_1",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      },
-      {
-        "name": "mlx5_bond_0",
-        "type": "InfiniBand/RoCE",
-        "bandwidth_gbps": null
-      }
-    ],
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8",
-    "kernel_version": "5.15.0-105-generic",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "pytorch_version": "2.2.0"
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
   },
   "software": {
-    "framework": "vllm-musa",
-    "framework_version": "0.4.2",
-    "driver_version": "2.7.0",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8"
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
   },
   "model": {
     "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
@@ -75,7 +59,7 @@
     "architecture": "dense",
     "parameter_count_b": 8.0,
     "precision": "BF16",
-    "effective_dtype": "float16",
+    "effective_dtype": "bfloat16",
     "quantization_method": null,
     "model_format": "HuggingFace original"
   },
@@ -97,10 +81,9 @@
       "results_by_concurrency": [
         {
           "client_concurrency": 8,
-          "throughput_tokens_per_sec": 332.62,
-          "throughput_tokens_per_sec_per_chip": 332.62,
-          "throughput_tokens_per_sec_total": 922.83,
-          "elapsed_seconds_median": 43.4,
+          "throughput_tokens_per_sec": 2026.75,
+          "throughput_tokens_per_sec_per_chip": 2026.75,
+          "elapsed_seconds_median": 17.1,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -110,10 +93,9 @@
         },
         {
           "client_concurrency": 32,
-          "throughput_tokens_per_sec": 331.64,
-          "throughput_tokens_per_sec_per_chip": 331.64,
-          "throughput_tokens_per_sec_total": 920.1,
-          "elapsed_seconds_median": 43.6,
+          "throughput_tokens_per_sec": 1967.36,
+          "throughput_tokens_per_sec_per_chip": 1967.36,
+          "elapsed_seconds_median": 17.4,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -123,10 +105,9 @@
         },
         {
           "client_concurrency": 128,
-          "throughput_tokens_per_sec": 331.76,
-          "throughput_tokens_per_sec_per_chip": 331.76,
-          "throughput_tokens_per_sec_total": 920.46,
-          "elapsed_seconds_median": 43.6,
+          "throughput_tokens_per_sec": 1976.69,
+          "throughput_tokens_per_sec_per_chip": 1976.69,
+          "elapsed_seconds_median": 17.4,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -144,21 +125,21 @@
     "notes": "Run --scenario accuracy to check model accuracy."
   },
   "meta": {
-    "submitted_by": "JuhaoLiang1997",
+    "submitted_by": "Gong-K",
     "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "17:34:52",
-    "run_id": "cabb7bd0",
-    "run_name": "mtt_s4000x1_suite_A_moorethreads_vllm_musa_f2f6f965_cabb7bd0",
+    "date": "2026-05-07",
+    "time": "07:49:17",
+    "run_id": "9b2b01fd",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd",
     "flagged": null,
-    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
     "env_info_file": "../env_info.json",
     "log_file": "run.log",
     "samples_file": "samples.jsonl",
     "notes": null,
-    "benchmark_start_time": "2026-05-18T09:26:10.676960+00:00",
-    "benchmark_end_time": "2026-05-18T09:34:52.667112+00:00",
-    "benchmark_elapsed_minutes": 8.7,
-    "model_load_seconds": 116.8
+    "benchmark_start_time": "2026-05-07T07:45:42.464403+00:00",
+    "benchmark_end_time": "2026-05-07T07:49:17.840697+00:00",
+    "benchmark_elapsed_minutes": 3.6,
+    "model_load_seconds": 91.1
   }
 }
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/online/result.json
new file mode 100644
index 00000000..b1edfa56
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/online/result.json
@@ -0,0 +1,147 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:39:40.369666+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 70.25,
+          "ttft_ms_p90": 102.66,
+          "ttft_ms_p99": 2415.93,
+          "tpot_ms_p50": 23.17,
+          "tpot_ms_p90": 25.89,
+          "tpot_ms_p99": 31.86,
+          "elapsed_seconds_median": 67.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 4399.78,
+          "ttft_ms_p90": 8909.53,
+          "ttft_ms_p99": 9633.91,
+          "tpot_ms_p50": 40.9,
+          "tpot_ms_p90": 43.39,
+          "tpot_ms_p99": 113.85,
+          "elapsed_seconds_median": 25.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 8348.54,
+          "ttft_ms_p90": 16328.81,
+          "ttft_ms_p99": 16742.85,
+          "tpot_ms_p50": 35.32,
+          "tpot_ms_p90": 43.82,
+          "tpot_ms_p99": 842.87,
+          "elapsed_seconds_median": 24.8,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "07:57:27",
+    "run_id": "9b2b01fd",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T07:51:35.930159+00:00",
+    "benchmark_end_time": "2026-05-07T07:57:27.625509+00:00",
+    "benchmark_elapsed_minutes": 5.9,
+    "model_load_seconds": 96.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/result.json
new file mode 100644
index 00000000..1d7fa650
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/result.json
@@ -0,0 +1,594 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:39:40.369666+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained",
+      "speculative",
+      "burst"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 2026.75,
+          "throughput_tokens_per_sec_per_chip": 2026.75,
+          "elapsed_seconds_median": 17.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 1967.36,
+          "throughput_tokens_per_sec_per_chip": 1967.36,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 1976.69,
+          "throughput_tokens_per_sec_per_chip": 1976.69,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 70.25,
+          "ttft_ms_p90": 102.66,
+          "ttft_ms_p99": 2415.93,
+          "tpot_ms_p50": 23.17,
+          "tpot_ms_p90": 25.89,
+          "tpot_ms_p99": 31.86,
+          "elapsed_seconds_median": 67.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 4399.78,
+          "ttft_ms_p90": 8909.53,
+          "ttft_ms_p99": 9633.91,
+          "tpot_ms_p50": 40.9,
+          "tpot_ms_p90": 43.39,
+          "tpot_ms_p99": 113.85,
+          "elapsed_seconds_median": 25.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 8348.54,
+          "ttft_ms_p90": 16328.81,
+          "ttft_ms_p99": 16742.85,
+          "tpot_ms_p50": 35.32,
+          "tpot_ms_p90": 43.82,
+          "tpot_ms_p99": 842.87,
+          "elapsed_seconds_median": 24.8,
+          "sla_met": false
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 34.55,
+      "ttft_ms_p90": 56.25,
+      "ttft_ms_p99": 80.59,
+      "tpot_ms_p50": 16.58,
+      "tpot_ms_p90": 16.64,
+      "tpot_ms_p99": 16.75,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 485.8
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 350.8,
+          "tokens_out": 21065,
+          "tokens_in": 0,
+          "requests_completed": 112,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 8410.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 442.0,
+          "tokens_out": 26516,
+          "tokens_in": 0,
+          "requests_completed": 142,
+          "ttft_ms_p50": 49.7,
+          "ttft_ms_p99": 61.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 435.8,
+          "tokens_out": 26138,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 49.7,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 441.5,
+          "tokens_out": 26496,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 71.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.4,
+          "tokens_out": 26234,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 63.6
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 443.0,
+          "tokens_out": 26582,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 65.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 434.5,
+          "tokens_out": 26066,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 63.4
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 443.9,
+          "tokens_out": 26649,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 64.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.1,
+          "tokens_out": 26393,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 65.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.3,
+          "tokens_out": 26252,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 64.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.4,
+          "tokens_out": 26183,
+          "tokens_in": 0,
+          "requests_completed": 137,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 55.7
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 442.9,
+          "tokens_out": 26576,
+          "tokens_in": 0,
+          "requests_completed": 143,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 62.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 439.4,
+          "tokens_out": 26344,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 62.9
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 434.2,
+          "tokens_out": 26066,
+          "tokens_in": 0,
+          "requests_completed": 137,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 66.4
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 444.4,
+          "tokens_out": 26668,
+          "tokens_in": 0,
+          "requests_completed": 142,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 66.2
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 428.1,
+          "tokens_out": 25687,
+          "tokens_in": 0,
+          "requests_completed": 136,
+          "ttft_ms_p50": 49.9,
+          "ttft_ms_p99": 66.6
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 442.3,
+          "tokens_out": 26531,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 61.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 439.0,
+          "tokens_out": 26347,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 61.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.0,
+          "tokens_out": 26202,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 49.9,
+          "ttft_ms_p99": 60.3
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.3,
+          "tokens_out": 26417,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 66.2
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 443.4,
+          "tokens_out": 26621,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 63.3
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.7,
+          "tokens_out": 26254,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 49.9,
+          "ttft_ms_p99": 62.3
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 434.6,
+          "tokens_out": 26070,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 61.4
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 447.5,
+          "tokens_out": 26855,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.4,
+          "ttft_ms_p99": 63.2
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.5,
+          "tokens_out": 26252,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 58.9
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 435.9,
+          "tokens_out": 26161,
+          "tokens_in": 0,
+          "requests_completed": 137,
+          "ttft_ms_p50": 50.2,
+          "ttft_ms_p99": 63.0
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.5,
+          "tokens_out": 26231,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 63.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.4,
+          "tokens_out": 26443,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.5,
+          "ttft_ms_p99": 65.2
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.4,
+          "tokens_out": 26242,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 62.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 439.0,
+      "throttle_ratio": 0.957,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 0.2
+    },
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 434.67,
+          "throughput_tokens_per_sec_per_chip": 434.67,
+          "elapsed_seconds_median": 79.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 439.2,
+          "throughput_tokens_per_sec_per_chip": 439.2,
+          "elapsed_seconds_median": 78.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 439.01,
+          "throughput_tokens_per_sec_per_chip": 439.01,
+          "elapsed_seconds_median": 78.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 65.89,
+      "steady_ttft_p99_ms": 6708.31,
+      "burst_ttft_p50_ms": 9683.26,
+      "burst_ttft_p99_ms": 22975.84,
+      "sla_met_during_burst": false,
+      "burst_degradation_ratio": 3.425,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 7772.85,
+          "burst_ttft_p99_ms": 23080.27
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 119.28,
+          "burst_ttft_p99_ms": 22460.91
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 123.02,
+          "burst_ttft_p99_ms": 22360.77
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.6,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "07:49:17",
+    "run_id": "9b2b01fd",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T07:45:42.464403+00:00",
+    "benchmark_end_time": "2026-05-07T07:49:17.840697+00:00",
+    "benchmark_elapsed_minutes": 89.2,
+    "model_load_seconds": 91.1,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained', 'speculative', 'burst'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/offline",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/online",
+      "interactive": "results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/interactive",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/sustained",
+      "speculative": "results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/speculative",
+      "burst": "results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/burst"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/speculative/result.json
similarity index 50%
rename from results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/offline/result.json
rename to results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/speculative/result.json
index 2e6fc7fc..6d376d23 100644
--- a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/offline/result.json
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/speculative/result.json
@@ -1,55 +1,54 @@
 {
   "schema_version": "1.0",
   "suite_id": "suite_A",
-  "implementation_id": "nvidia_onecat_vllm_12a253c2",
+  "implementation_id": "nvidia_sglang_c43a8309",
   "chip": {
-    "name": "Tesla V100-PCIE-32GB",
+    "name": "NVIDIA GeForce RTX 4090",
     "vendor": "NVIDIA",
     "count": 1,
-    "memory_gb": 32.0,
+    "memory_gb": 24.0,
     "interconnect_intra_node": null,
     "interconnect_inter_node": null
   },
   "environment": {
-    "collected_at": "2026-05-18T09:38:50.346241+00:00",
+    "collected_at": "2026-05-07T07:39:40.369666+00:00",
     "accelerators": [
       {
         "index": 0,
-        "name": "Tesla V100-PCIE-32GB",
+        "name": "NVIDIA GeForce RTX 4090",
         "vendor": "NVIDIA",
-        "memory_gb": 32.0,
-        "driver_version": "580.82.07",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
         "firmware_version": null,
-        "compute_capability": "7.0",
-        "supports_bf16": false
+        "compute_capability": "8.9",
+        "supports_bf16": true
       }
     ],
-    "accelerator_platform": "nvidia",
-    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
     "intra_node_interconnect": null,
     "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-      "physical_cores": 26,
-      "logical_cores": 52,
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
       "numa_nodes": 2
     },
-    "system_memory_gb": 214.5,
-    "pcie_generation": "PCIe Gen 3",
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
     "cpu_accelerator_bandwidth_gbs": null,
     "network_interfaces": null,
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13",
-    "kernel_version": "5.4.0-149-generic",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
     "runtime_version": "CUDA 12.8",
     "pytorch_version": "2.9.1+cu128"
   },
   "software": {
-    "framework": "1Cat-vLLM",
-    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
-    "driver_version": "580.82.07",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
     "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13"
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
   },
   "model": {
     "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
@@ -59,13 +58,13 @@
     "model_source": "local",
     "architecture": "dense",
     "parameter_count_b": 8.0,
-    "precision": "FP16",
-    "effective_dtype": "float16",
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
     "quantization_method": null,
     "model_format": "HuggingFace original"
   },
   "task": {
-    "scenario": "offline",
+    "scenario": "speculative",
     "num_runs": 3,
     "warmup_runs": 1,
     "parallelism": {
@@ -74,28 +73,17 @@
       "expert_parallel_size": 1,
       "data_parallel_size": 1
     },
-    "extra_config": {
-      "tensor_parallel_size": 1,
-      "enforce_eager": false,
-      "max_num_seqs": 512,
-      "gpu_memory_utilization": 0.9,
-      "engine_kwargs": {
-        "enable_prefix_caching": false,
-        "enable_chunked_prefill": false,
-        "kv_cache_auto_trim_ratio": 0.0
-      }
-    },
+    "extra_config": null,
     "runtime_metrics": null
   },
   "metrics": {
-    "offline": {
+    "speculative": {
       "results_by_concurrency": [
         {
           "client_concurrency": 8,
-          "throughput_tokens_per_sec": 671.32,
-          "throughput_tokens_per_sec_per_chip": 671.32,
-          "throughput_tokens_per_sec_total": 1168.67,
-          "elapsed_seconds_median": 51.6,
+          "throughput_tokens_per_sec": 434.67,
+          "throughput_tokens_per_sec_per_chip": 434.67,
+          "elapsed_seconds_median": 79.6,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -105,10 +93,9 @@
         },
         {
           "client_concurrency": 32,
-          "throughput_tokens_per_sec": 670.99,
-          "throughput_tokens_per_sec_per_chip": 670.99,
-          "throughput_tokens_per_sec_total": 1168.09,
-          "elapsed_seconds_median": 51.6,
+          "throughput_tokens_per_sec": 439.2,
+          "throughput_tokens_per_sec_per_chip": 439.2,
+          "elapsed_seconds_median": 78.6,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -118,10 +105,9 @@
         },
         {
           "client_concurrency": 128,
-          "throughput_tokens_per_sec": 671.43,
-          "throughput_tokens_per_sec_per_chip": 671.43,
-          "throughput_tokens_per_sec_total": 1168.44,
-          "elapsed_seconds_median": 51.6,
+          "throughput_tokens_per_sec": 439.01,
+          "throughput_tokens_per_sec_per_chip": 439.01,
+          "elapsed_seconds_median": 78.7,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -139,21 +125,21 @@
     "notes": "Run --scenario accuracy to check model accuracy."
   },
   "meta": {
-    "submitted_by": "JuhaoLiang1997",
+    "submitted_by": "Gong-K",
     "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "18:03:39",
-    "run_id": "4e0e6eba",
-    "run_name": "tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba",
+    "date": "2026-05-07",
+    "time": "09:18:46",
+    "run_id": "9b2b01fd",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd",
     "flagged": null,
-    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
     "env_info_file": "../env_info.json",
     "log_file": "run.log",
     "samples_file": "samples.jsonl",
     "notes": null,
-    "benchmark_start_time": "2026-05-18T09:53:19.928949+00:00",
-    "benchmark_end_time": "2026-05-18T10:03:39.512440+00:00",
-    "benchmark_elapsed_minutes": 10.3,
-    "model_load_seconds": 47.8
+    "benchmark_start_time": "2026-05-07T09:02:48.231664+00:00",
+    "benchmark_end_time": "2026-05-07T09:18:46.844554+00:00",
+    "benchmark_elapsed_minutes": 16.0,
+    "model_load_seconds": 141.6
   }
 }
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/sustained/result.json
new file mode 100644
index 00000000..3f3146d9
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd/sustained/result.json
@@ -0,0 +1,407 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T07:39:40.369666+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 350.8,
+          "tokens_out": 21065,
+          "tokens_in": 0,
+          "requests_completed": 112,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 8410.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 442.0,
+          "tokens_out": 26516,
+          "tokens_in": 0,
+          "requests_completed": 142,
+          "ttft_ms_p50": 49.7,
+          "ttft_ms_p99": 61.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 435.8,
+          "tokens_out": 26138,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 49.7,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 441.5,
+          "tokens_out": 26496,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 71.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.4,
+          "tokens_out": 26234,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 63.6
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 443.0,
+          "tokens_out": 26582,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 65.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 434.5,
+          "tokens_out": 26066,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 63.4
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 443.9,
+          "tokens_out": 26649,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 64.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.1,
+          "tokens_out": 26393,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 65.4
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.3,
+          "tokens_out": 26252,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 64.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.4,
+          "tokens_out": 26183,
+          "tokens_in": 0,
+          "requests_completed": 137,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 55.7
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 442.9,
+          "tokens_out": 26576,
+          "tokens_in": 0,
+          "requests_completed": 143,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 62.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 439.4,
+          "tokens_out": 26344,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 62.9
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 434.2,
+          "tokens_out": 26066,
+          "tokens_in": 0,
+          "requests_completed": 137,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 66.4
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 444.4,
+          "tokens_out": 26668,
+          "tokens_in": 0,
+          "requests_completed": 142,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 66.2
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 428.1,
+          "tokens_out": 25687,
+          "tokens_in": 0,
+          "requests_completed": 136,
+          "ttft_ms_p50": 49.9,
+          "ttft_ms_p99": 66.6
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 442.3,
+          "tokens_out": 26531,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 61.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 439.0,
+          "tokens_out": 26347,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 61.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.0,
+          "tokens_out": 26202,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 49.9,
+          "ttft_ms_p99": 60.3
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.3,
+          "tokens_out": 26417,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 66.2
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 443.4,
+          "tokens_out": 26621,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 63.3
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.7,
+          "tokens_out": 26254,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 49.9,
+          "ttft_ms_p99": 62.3
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 434.6,
+          "tokens_out": 26070,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 49.8,
+          "ttft_ms_p99": 61.4
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 447.5,
+          "tokens_out": 26855,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.4,
+          "ttft_ms_p99": 63.2
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.5,
+          "tokens_out": 26252,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 58.9
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 435.9,
+          "tokens_out": 26161,
+          "tokens_in": 0,
+          "requests_completed": 137,
+          "ttft_ms_p50": 50.2,
+          "ttft_ms_p99": 63.0
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.5,
+          "tokens_out": 26231,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 63.3
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.4,
+          "tokens_out": 26443,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.5,
+          "ttft_ms_p99": 65.2
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.4,
+          "tokens_out": 26242,
+          "tokens_in": 0,
+          "requests_completed": 138,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 62.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 439.0,
+      "throttle_ratio": 0.957,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": 0.2
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "08:59:47",
+    "run_id": "9b2b01fd",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_A_nvidia_sglang_c43a8309_9b2b01fd",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T08:29:41.518630+00:00",
+    "benchmark_end_time": "2026-05-07T08:59:47.912999+00:00",
+    "benchmark_elapsed_minutes": 30.1,
+    "model_load_seconds": 153.1
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/accuracy/accuracy.json
new file mode 100644
index 00000000..37f4d824
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/offline/result.json
new file mode 100644
index 00000000..2b8d274e
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/offline/result.json
@@ -0,0 +1,157 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1986.44,
+          "throughput_tokens_per_sec_per_chip": 1986.44,
+          "elapsed_seconds_median": 18.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1966.2,
+          "throughput_tokens_per_sec_per_chip": 1966.2,
+          "elapsed_seconds_median": 18.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1933.26,
+          "throughput_tokens_per_sec_per_chip": 1933.26,
+          "elapsed_seconds_median": 18.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1965.04,
+          "throughput_tokens_per_sec_per_chip": 1965.04,
+          "elapsed_seconds_median": 18.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "13:25:04",
+    "run_id": "4ce353c2",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T13:20:07.690461+00:00",
+    "benchmark_end_time": "2026-04-27T13:25:04.187964+00:00",
+    "benchmark_elapsed_minutes": 4.9,
+    "model_load_seconds": 81.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/online/result.json
new file mode 100644
index 00000000..040498d9
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/online/result.json
@@ -0,0 +1,159 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 68.99,
+          "ttft_ms_p90": 101.54,
+          "ttft_ms_p99": 1975.21,
+          "tpot_ms_p50": 23.2,
+          "tpot_ms_p90": 25.96,
+          "tpot_ms_p99": 29.33,
+          "elapsed_seconds_median": 67.1,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 77.79,
+          "ttft_ms_p90": 120.18,
+          "ttft_ms_p99": 155.35,
+          "tpot_ms_p50": 34.33,
+          "tpot_ms_p90": 37.28,
+          "tpot_ms_p99": 48.36,
+          "elapsed_seconds_median": 34.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 4302.15,
+          "ttft_ms_p90": 8634.91,
+          "ttft_ms_p99": 9469.27,
+          "tpot_ms_p50": 40.74,
+          "tpot_ms_p90": 44.2,
+          "tpot_ms_p99": 115.81,
+          "elapsed_seconds_median": 25.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 7003.83,
+          "ttft_ms_p90": 13096.96,
+          "ttft_ms_p99": 14256.05,
+          "tpot_ms_p50": 35.52,
+          "tpot_ms_p90": 45.45,
+          "tpot_ms_p99": 831.83,
+          "elapsed_seconds_median": 24.8,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "13:34:34",
+    "run_id": "4ce353c2",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T13:26:59.027717+00:00",
+    "benchmark_end_time": "2026-04-27T13:34:34.420033+00:00",
+    "benchmark_elapsed_minutes": 7.6,
+    "model_load_seconds": 80.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/result.json
new file mode 100644
index 00000000..7317b5bf
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/result.json
@@ -0,0 +1,374 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1986.44,
+          "throughput_tokens_per_sec_per_chip": 1986.44,
+          "elapsed_seconds_median": 18.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1966.2,
+          "throughput_tokens_per_sec_per_chip": 1966.2,
+          "elapsed_seconds_median": 18.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1933.26,
+          "throughput_tokens_per_sec_per_chip": 1933.26,
+          "elapsed_seconds_median": 18.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1965.04,
+          "throughput_tokens_per_sec_per_chip": 1965.04,
+          "elapsed_seconds_median": 18.3,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 68.99,
+          "ttft_ms_p90": 101.54,
+          "ttft_ms_p99": 1975.21,
+          "tpot_ms_p50": 23.2,
+          "tpot_ms_p90": 25.96,
+          "tpot_ms_p99": 29.33,
+          "elapsed_seconds_median": 67.1,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 77.79,
+          "ttft_ms_p90": 120.18,
+          "ttft_ms_p99": 155.35,
+          "tpot_ms_p50": 34.33,
+          "tpot_ms_p90": 37.28,
+          "tpot_ms_p99": 48.36,
+          "elapsed_seconds_median": 34.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 4302.15,
+          "ttft_ms_p90": 8634.91,
+          "ttft_ms_p99": 9469.27,
+          "tpot_ms_p50": 40.74,
+          "tpot_ms_p90": 44.2,
+          "tpot_ms_p99": 115.81,
+          "elapsed_seconds_median": 25.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 7003.83,
+          "ttft_ms_p90": 13096.96,
+          "ttft_ms_p99": 14256.05,
+          "tpot_ms_p50": 35.52,
+          "tpot_ms_p90": 45.45,
+          "tpot_ms_p99": 831.83,
+          "elapsed_seconds_median": 24.8,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 393.4,
+          "tokens_out": 23625,
+          "tokens_in": 0,
+          "requests_completed": 131,
+          "ttft_ms_p50": 60.8,
+          "ttft_ms_p99": 3031.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 433.1,
+          "tokens_out": 25985,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 61.2
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.6,
+          "tokens_out": 26191,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.2,
+          "ttft_ms_p99": 62.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 446.9,
+          "tokens_out": 26811,
+          "tokens_in": 0,
+          "requests_completed": 148,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 59.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.6,
+          "tokens_out": 26429,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 62.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.3,
+          "tokens_out": 26190,
+          "tokens_in": 0,
+          "requests_completed": 144,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 61.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.4,
+          "tokens_out": 26187,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.2,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.7,
+          "tokens_out": 26197,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 64.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.8,
+          "tokens_out": 26436,
+          "tokens_in": 0,
+          "requests_completed": 143,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 63.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.4,
+          "tokens_out": 26252,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.5,
+          "ttft_ms_p99": 62.4
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.6,
+          "tokens_out": 26250,
+          "tokens_in": 0,
+          "requests_completed": 144,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 62.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.1,
+          "tokens_out": 26162,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 58.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 435.8,
+          "tokens_out": 26158,
+          "tokens_in": 0,
+          "requests_completed": 145,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 62.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 438.7,
+          "tokens_out": 26320,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.4,
+          "ttft_ms_p99": 62.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 434.7,
+      "throttle_ratio": 0.88,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2968.6
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "13:25:04",
+    "run_id": "4ce353c2",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T13:20:07.690461+00:00",
+    "benchmark_end_time": "2026-04-27T13:25:04.187964+00:00",
+    "benchmark_elapsed_minutes": 27.6,
+    "model_load_seconds": 81.4,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/offline",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/online",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/sustained/result.json
new file mode 100644
index 00000000..468b0fda
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/sustained/result.json
@@ -0,0 +1,257 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 393.4,
+          "tokens_out": 23625,
+          "tokens_in": 0,
+          "requests_completed": 131,
+          "ttft_ms_p50": 60.8,
+          "ttft_ms_p99": 3031.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 433.1,
+          "tokens_out": 25985,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 61.2
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.6,
+          "tokens_out": 26191,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.2,
+          "ttft_ms_p99": 62.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 446.9,
+          "tokens_out": 26811,
+          "tokens_in": 0,
+          "requests_completed": 148,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 59.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.6,
+          "tokens_out": 26429,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 62.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.3,
+          "tokens_out": 26190,
+          "tokens_in": 0,
+          "requests_completed": 144,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 61.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.4,
+          "tokens_out": 26187,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.2,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.7,
+          "tokens_out": 26197,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 64.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 440.8,
+          "tokens_out": 26436,
+          "tokens_in": 0,
+          "requests_completed": 143,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 63.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.4,
+          "tokens_out": 26252,
+          "tokens_in": 0,
+          "requests_completed": 141,
+          "ttft_ms_p50": 50.5,
+          "ttft_ms_p99": 62.4
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 437.6,
+          "tokens_out": 26250,
+          "tokens_in": 0,
+          "requests_completed": 144,
+          "ttft_ms_p50": 50.1,
+          "ttft_ms_p99": 62.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.1,
+          "tokens_out": 26162,
+          "tokens_in": 0,
+          "requests_completed": 140,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 58.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 435.8,
+          "tokens_out": 26158,
+          "tokens_in": 0,
+          "requests_completed": 145,
+          "ttft_ms_p50": 50.3,
+          "ttft_ms_p99": 62.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 438.7,
+          "tokens_out": 26320,
+          "tokens_in": 0,
+          "requests_completed": 139,
+          "ttft_ms_p50": 50.4,
+          "ttft_ms_p99": 62.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 434.7,
+      "throttle_ratio": 0.88,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2968.6
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "13:51:48",
+    "run_id": "4ce353c2",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T13:36:40.827984+00:00",
+    "benchmark_end_time": "2026-04-27T13:51:48.094516+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 92.1
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/env_info.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/env_info.json
new file mode 100644
index 00000000..54f5ab5a
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/env_info.json
@@ -0,0 +1,32 @@
+{
+  "collected_at": "2026-04-27T13:14:17.420434+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA GeForce RTX 4090",
+      "vendor": "NVIDIA",
+      "memory_gb": 24.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.9",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "INTEL(R) XEON(R) GOLD 6530",
+    "physical_cores": 64,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 1",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": null,
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/accuracy/accuracy.json
new file mode 100644
index 00000000..1129e80c
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.58,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "FP8",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/offline/result.json
new file mode 100644
index 00000000..03dc80bc
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/offline/result.json
@@ -0,0 +1,157 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+    "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+    "model_name": null,
+    "model_note": "FP8 quantized by RedHatAI using llm-compressor. Weights and activations in FP8. Native speedup on H100/MI300X; computed in BF16 on A100.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3870.35,
+          "throughput_tokens_per_sec_per_chip": 3870.35,
+          "elapsed_seconds_median": 9.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3873.21,
+          "throughput_tokens_per_sec_per_chip": 3873.21,
+          "elapsed_seconds_median": 9.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3965.79,
+          "throughput_tokens_per_sec_per_chip": 3965.79,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3935.02,
+          "throughput_tokens_per_sec_per_chip": 3935.02,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:00:18",
+    "run_id": "c609f262",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_c609f262",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T13:57:50.323406+00:00",
+    "benchmark_end_time": "2026-04-27T14:00:18.468430+00:00",
+    "benchmark_elapsed_minutes": 2.5,
+    "model_load_seconds": 71.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/online/result.json
new file mode 100644
index 00000000..e70d046a
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/online/result.json
@@ -0,0 +1,159 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+    "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+    "model_name": null,
+    "model_note": "FP8 quantized by RedHatAI using llm-compressor. Weights and activations in FP8. Native speedup on H100/MI300X; computed in BF16 on A100.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.91,
+          "ttft_ms_p90": 58.78,
+          "ttft_ms_p99": 1717.74,
+          "tpot_ms_p50": 12.0,
+          "tpot_ms_p90": 12.87,
+          "tpot_ms_p99": 15.87,
+          "elapsed_seconds_median": 65.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 44.32,
+          "ttft_ms_p90": 53.51,
+          "ttft_ms_p99": 90.52,
+          "tpot_ms_p50": 18.34,
+          "tpot_ms_p90": 19.66,
+          "tpot_ms_p99": 22.26,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 50.71,
+          "ttft_ms_p90": 64.15,
+          "ttft_ms_p99": 109.33,
+          "tpot_ms_p50": 29.33,
+          "tpot_ms_p90": 36.05,
+          "tpot_ms_p99": 46.56,
+          "elapsed_seconds_median": 16.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 53.61,
+          "ttft_ms_p90": 1478.22,
+          "ttft_ms_p99": 4184.64,
+          "tpot_ms_p50": 38.21,
+          "tpot_ms_p90": 43.17,
+          "tpot_ms_p99": 64.28,
+          "elapsed_seconds_median": 13.5,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:08:19",
+    "run_id": "c609f262",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_c609f262",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T14:01:58.865277+00:00",
+    "benchmark_end_time": "2026-04-27T14:08:19.969799+00:00",
+    "benchmark_elapsed_minutes": 6.4,
+    "model_load_seconds": 67.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/result.json
new file mode 100644
index 00000000..cb9da1ce
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/result.json
@@ -0,0 +1,374 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+    "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+    "model_name": null,
+    "model_note": "FP8 quantized by RedHatAI using llm-compressor. Weights and activations in FP8. Native speedup on H100/MI300X; computed in BF16 on A100.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3870.35,
+          "throughput_tokens_per_sec_per_chip": 3870.35,
+          "elapsed_seconds_median": 9.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3873.21,
+          "throughput_tokens_per_sec_per_chip": 3873.21,
+          "elapsed_seconds_median": 9.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3965.79,
+          "throughput_tokens_per_sec_per_chip": 3965.79,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3935.02,
+          "throughput_tokens_per_sec_per_chip": 3935.02,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.91,
+          "ttft_ms_p90": 58.78,
+          "ttft_ms_p99": 1717.74,
+          "tpot_ms_p50": 12.0,
+          "tpot_ms_p90": 12.87,
+          "tpot_ms_p99": 15.87,
+          "elapsed_seconds_median": 65.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 44.32,
+          "ttft_ms_p90": 53.51,
+          "ttft_ms_p99": 90.52,
+          "tpot_ms_p50": 18.34,
+          "tpot_ms_p90": 19.66,
+          "tpot_ms_p99": 22.26,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 50.71,
+          "ttft_ms_p90": 64.15,
+          "ttft_ms_p99": 109.33,
+          "tpot_ms_p50": 29.33,
+          "tpot_ms_p90": 36.05,
+          "tpot_ms_p99": 46.56,
+          "elapsed_seconds_median": 16.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 53.61,
+          "ttft_ms_p90": 1478.22,
+          "ttft_ms_p99": 4184.64,
+          "tpot_ms_p50": 38.21,
+          "tpot_ms_p90": 43.17,
+          "tpot_ms_p99": 64.28,
+          "elapsed_seconds_median": 13.5,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 645.5,
+          "tokens_out": 38742,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 38.2,
+          "ttft_ms_p99": 2750.8
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 702.3,
+          "tokens_out": 42131,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 37.4,
+          "ttft_ms_p99": 83.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 695.8,
+          "tokens_out": 41779,
+          "tokens_in": 0,
+          "requests_completed": 228,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 56.1
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 698.5,
+          "tokens_out": 41888,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 700.5,
+          "tokens_out": 42050,
+          "tokens_in": 0,
+          "requests_completed": 231,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 54.2
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 695.7,
+          "tokens_out": 41730,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 54.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 691.3,
+          "tokens_out": 41466,
+          "tokens_in": 0,
+          "requests_completed": 227,
+          "ttft_ms_p50": 37.4,
+          "ttft_ms_p99": 56.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 704.3,
+          "tokens_out": 42253,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 693.3,
+          "tokens_out": 41616,
+          "tokens_in": 0,
+          "requests_completed": 231,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 51.9
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 693.2,
+          "tokens_out": 41595,
+          "tokens_in": 0,
+          "requests_completed": 226,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 698.0,
+          "tokens_out": 41865,
+          "tokens_in": 0,
+          "requests_completed": 228,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 54.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.1,
+          "tokens_out": 42061,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 55.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 695.7,
+          "tokens_out": 41764,
+          "tokens_in": 0,
+          "requests_completed": 226,
+          "ttft_ms_p50": 37.4,
+          "ttft_ms_p99": 59.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.8,
+          "tokens_out": 42097,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 57.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 694.1,
+      "throttle_ratio": 0.917,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2693.1
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.58,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "FP8",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:00:18",
+    "run_id": "c609f262",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_c609f262",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T13:57:50.323406+00:00",
+    "benchmark_end_time": "2026-04-27T14:00:18.468430+00:00",
+    "benchmark_elapsed_minutes": 24.0,
+    "model_load_seconds": 71.6,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/offline",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/online",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/sustained/result.json
new file mode 100644
index 00000000..31643190
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/sustained/result.json
@@ -0,0 +1,257 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+    "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+    "model_name": null,
+    "model_note": "FP8 quantized by RedHatAI using llm-compressor. Weights and activations in FP8. Native speedup on H100/MI300X; computed in BF16 on A100.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 645.5,
+          "tokens_out": 38742,
+          "tokens_in": 0,
+          "requests_completed": 211,
+          "ttft_ms_p50": 38.2,
+          "ttft_ms_p99": 2750.8
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 702.3,
+          "tokens_out": 42131,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 37.4,
+          "ttft_ms_p99": 83.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 695.8,
+          "tokens_out": 41779,
+          "tokens_in": 0,
+          "requests_completed": 228,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 56.1
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 698.5,
+          "tokens_out": 41888,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 700.5,
+          "tokens_out": 42050,
+          "tokens_in": 0,
+          "requests_completed": 231,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 54.2
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 695.7,
+          "tokens_out": 41730,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 54.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 691.3,
+          "tokens_out": 41466,
+          "tokens_in": 0,
+          "requests_completed": 227,
+          "ttft_ms_p50": 37.4,
+          "ttft_ms_p99": 56.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 704.3,
+          "tokens_out": 42253,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 693.3,
+          "tokens_out": 41616,
+          "tokens_in": 0,
+          "requests_completed": 231,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 51.9
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 693.2,
+          "tokens_out": 41595,
+          "tokens_in": 0,
+          "requests_completed": 226,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 698.0,
+          "tokens_out": 41865,
+          "tokens_in": 0,
+          "requests_completed": 228,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 54.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.1,
+          "tokens_out": 42061,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 55.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 695.7,
+          "tokens_out": 41764,
+          "tokens_in": 0,
+          "requests_completed": 226,
+          "ttft_ms_p50": 37.4,
+          "ttft_ms_p99": 59.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.8,
+          "tokens_out": 42097,
+          "tokens_in": 0,
+          "requests_completed": 229,
+          "ttft_ms_p50": 37.2,
+          "ttft_ms_p99": 57.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 694.1,
+      "throttle_ratio": 0.917,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2693.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:25:07",
+    "run_id": "c609f262",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_c609f262",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T14:10:02.789377+00:00",
+    "benchmark_end_time": "2026-04-27T14:25:07.007784+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 68.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/result.json
new file mode 100644
index 00000000..3c31a113
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/result.json
@@ -0,0 +1,1499 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original",
+    "_note": "suite model_id. Each precision level uses its own quantized checkpoint."
+  },
+  "task": {
+    "scenarios_run": [
+      "accuracy",
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "precision_levels_run": [
+      "BF16",
+      "FP8",
+      "W8A8",
+      "W8A16",
+      "W4A16"
+    ],
+    "precision_levels_skipped": [
+      "FP16"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "quantization": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+          "best_throughput_tokens_per_sec": 1986.44,
+          "accuracy_score": 0.55,
+          "accuracy_baseline_delta": -0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 1092.5,
+          "speedup_vs_bf16": 1.0,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 1986.44,
+              "throughput_tokens_per_sec_per_chip": 1986.44,
+              "elapsed_seconds_median": 18.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 1966.2,
+              "throughput_tokens_per_sec_per_chip": 1966.2,
+              "elapsed_seconds_median": 18.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 1933.26,
+              "throughput_tokens_per_sec_per_chip": 1933.26,
+              "elapsed_seconds_median": 18.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 1965.04,
+              "throughput_tokens_per_sec_per_chip": 1965.04,
+              "elapsed_seconds_median": 18.3,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "bf16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": null
+        },
+        {
+          "precision": "FP8",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+          "best_throughput_tokens_per_sec": 3965.79,
+          "accuracy_score": 0.58,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 2300.2,
+          "speedup_vs_bf16": 1.996,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3870.35,
+              "throughput_tokens_per_sec_per_chip": 3870.35,
+              "elapsed_seconds_median": 9.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 3873.21,
+              "throughput_tokens_per_sec_per_chip": 3873.21,
+              "elapsed_seconds_median": 9.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 3965.79,
+              "throughput_tokens_per_sec_per_chip": 3965.79,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 3935.02,
+              "throughput_tokens_per_sec_per_chip": 3935.02,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "fp8",
+          "effective_dtype": "auto",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W8A8",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+          "best_throughput_tokens_per_sec": 4044.47,
+          "accuracy_score": 0.6,
+          "accuracy_baseline_delta": 0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 2426.7,
+          "speedup_vs_bf16": 2.036,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3903.39,
+              "throughput_tokens_per_sec_per_chip": 3903.39,
+              "elapsed_seconds_median": 9.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 4044.47,
+              "throughput_tokens_per_sec_per_chip": 4044.47,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 4000.58,
+              "throughput_tokens_per_sec_per_chip": 4000.58,
+              "elapsed_seconds_median": 8.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 4027.83,
+              "throughput_tokens_per_sec_per_chip": 4027.83,
+              "elapsed_seconds_median": 8.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a8",
+          "effective_dtype": "auto",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W8A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+          "best_throughput_tokens_per_sec": 2987.4,
+          "accuracy_score": 0.59,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 1762.6,
+          "speedup_vs_bf16": 1.504,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 2972.67,
+              "throughput_tokens_per_sec_per_chip": 2972.67,
+              "elapsed_seconds_median": 12.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 2969.3,
+              "throughput_tokens_per_sec_per_chip": 2969.3,
+              "elapsed_seconds_median": 12.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 2973.03,
+              "throughput_tokens_per_sec_per_chip": 2973.03,
+              "elapsed_seconds_median": 12.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 2987.4,
+              "throughput_tokens_per_sec_per_chip": 2987.4,
+              "elapsed_seconds_median": 12.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a16",
+          "effective_dtype": "auto",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W4A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+          "best_throughput_tokens_per_sec": 1611.83,
+          "accuracy_score": 0.55,
+          "accuracy_baseline_delta": -0.02,
+          "accuracy_valid": true,
+          "quality_efficiency": 886.5,
+          "speedup_vs_bf16": 0.811,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 1611.42,
+              "throughput_tokens_per_sec_per_chip": 1611.42,
+              "elapsed_seconds_median": 21.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 1611.83,
+              "throughput_tokens_per_sec_per_chip": 1611.83,
+              "elapsed_seconds_median": 21.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 1609.86,
+              "throughput_tokens_per_sec_per_chip": 1609.86,
+              "elapsed_seconds_median": 21.5,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 1610.66,
+              "throughput_tokens_per_sec_per_chip": 1610.66,
+              "elapsed_seconds_median": 21.7,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w4a16",
+          "effective_dtype": "auto",
+          "quantization_method": "gptq"
+        }
+      ]
+    },
+    "derived": {},
+    "quantization_online": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "max_valid_qps": 10,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 68.99,
+              "ttft_ms_p90": 101.54,
+              "ttft_ms_p99": 1975.21,
+              "tpot_ms_p50": 23.2,
+              "tpot_ms_p90": 25.96,
+              "tpot_ms_p99": 29.33,
+              "elapsed_seconds_median": 67.1,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 77.79,
+              "ttft_ms_p90": 120.18,
+              "ttft_ms_p99": 155.35,
+              "tpot_ms_p50": 34.33,
+              "tpot_ms_p90": 37.28,
+              "tpot_ms_p99": 48.36,
+              "elapsed_seconds_median": 34.2,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 4302.15,
+              "ttft_ms_p90": 8634.91,
+              "ttft_ms_p99": 9469.27,
+              "tpot_ms_p50": 40.74,
+              "tpot_ms_p90": 44.2,
+              "tpot_ms_p99": 115.81,
+              "elapsed_seconds_median": 25.9,
+              "sla_met": false
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 7003.83,
+              "ttft_ms_p90": 13096.96,
+              "ttft_ms_p99": 14256.05,
+              "tpot_ms_p50": 35.52,
+              "tpot_ms_p90": 45.45,
+              "tpot_ms_p99": 831.83,
+              "elapsed_seconds_median": 24.8,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "FP8",
+          "max_valid_qps": 25,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 40.91,
+              "ttft_ms_p90": 58.78,
+              "ttft_ms_p99": 1717.74,
+              "tpot_ms_p50": 12.0,
+              "tpot_ms_p90": 12.87,
+              "tpot_ms_p99": 15.87,
+              "elapsed_seconds_median": 65.4,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 44.32,
+              "ttft_ms_p90": 53.51,
+              "ttft_ms_p99": 90.52,
+              "tpot_ms_p50": 18.34,
+              "tpot_ms_p90": 19.66,
+              "tpot_ms_p99": 22.26,
+              "elapsed_seconds_median": 32.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 50.71,
+              "ttft_ms_p90": 64.15,
+              "ttft_ms_p99": 109.33,
+              "tpot_ms_p50": 29.33,
+              "tpot_ms_p90": 36.05,
+              "tpot_ms_p99": 46.56,
+              "elapsed_seconds_median": 16.0,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 53.61,
+              "ttft_ms_p90": 1478.22,
+              "ttft_ms_p99": 4184.64,
+              "tpot_ms_p50": 38.21,
+              "tpot_ms_p90": 43.17,
+              "tpot_ms_p99": 64.28,
+              "elapsed_seconds_median": 13.5,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "W8A8",
+          "max_valid_qps": 25,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 38.71,
+              "ttft_ms_p90": 53.38,
+              "ttft_ms_p99": 1589.47,
+              "tpot_ms_p50": 11.46,
+              "tpot_ms_p90": 12.3,
+              "tpot_ms_p99": 15.34,
+              "elapsed_seconds_median": 65.2,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 44.87,
+              "ttft_ms_p90": 53.48,
+              "ttft_ms_p99": 65.3,
+              "tpot_ms_p50": 18.42,
+              "tpot_ms_p90": 19.56,
+              "tpot_ms_p99": 21.14,
+              "elapsed_seconds_median": 32.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 50.04,
+              "ttft_ms_p90": 61.38,
+              "ttft_ms_p99": 85.27,
+              "tpot_ms_p50": 28.19,
+              "tpot_ms_p90": 33.11,
+              "tpot_ms_p99": 44.66,
+              "elapsed_seconds_median": 15.7,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 52.01,
+              "ttft_ms_p90": 1450.28,
+              "ttft_ms_p99": 3527.1,
+              "tpot_ms_p50": 34.6,
+              "tpot_ms_p90": 40.12,
+              "tpot_ms_p99": 58.13,
+              "elapsed_seconds_median": 12.6,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "max_valid_qps": 25,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 38.31,
+              "ttft_ms_p90": 74.84,
+              "ttft_ms_p99": 1847.63,
+              "tpot_ms_p50": 11.74,
+              "tpot_ms_p90": 13.92,
+              "tpot_ms_p99": 18.31,
+              "elapsed_seconds_median": 65.3,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 39.48,
+              "ttft_ms_p90": 47.1,
+              "ttft_ms_p99": 61.58,
+              "tpot_ms_p50": 16.6,
+              "tpot_ms_p90": 17.88,
+              "tpot_ms_p99": 20.06,
+              "elapsed_seconds_median": 32.0,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 50.43,
+              "ttft_ms_p90": 78.15,
+              "ttft_ms_p99": 105.59,
+              "tpot_ms_p50": 40.03,
+              "tpot_ms_p90": 45.76,
+              "tpot_ms_p99": 61.97,
+              "elapsed_seconds_median": 18.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 61.94,
+              "ttft_ms_p90": 3528.85,
+              "ttft_ms_p99": 7833.34,
+              "tpot_ms_p50": 53.7,
+              "tpot_ms_p90": 59.48,
+              "tpot_ms_p99": 83.56,
+              "elapsed_seconds_median": 17.8,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 33.91,
+              "ttft_ms_p90": 103.24,
+              "ttft_ms_p99": 5457.75,
+              "tpot_ms_p50": 10.27,
+              "tpot_ms_p90": 21.22,
+              "tpot_ms_p99": 53.71,
+              "elapsed_seconds_median": 64.9,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 54.88,
+              "ttft_ms_p90": 92.23,
+              "ttft_ms_p99": 103.95,
+              "tpot_ms_p50": 44.0,
+              "tpot_ms_p90": 49.52,
+              "tpot_ms_p99": 53.52,
+              "elapsed_seconds_median": 36.4,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 64.76,
+              "ttft_ms_p90": 102.45,
+              "ttft_ms_p99": 140.98,
+              "tpot_ms_p50": 57.46,
+              "tpot_ms_p90": 61.94,
+              "tpot_ms_p99": 74.79,
+              "elapsed_seconds_median": 22.2,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 60.06,
+              "ttft_ms_p90": 102.77,
+              "ttft_ms_p99": 143.02,
+              "tpot_ms_p50": 61.0,
+              "tpot_ms_p90": 66.57,
+              "tpot_ms_p99": 87.33,
+              "elapsed_seconds_median": 17.9,
+              "sla_met": true
+            }
+          ]
+        }
+      ]
+    },
+    "quantization_sustained": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "sustained_throughput_tokens_per_sec": 434.7,
+          "throttle_ratio": 0.88,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -2968.6,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 393.4,
+              "tokens_out": 23625,
+              "tokens_in": 0,
+              "requests_completed": 131,
+              "ttft_ms_p50": 60.8,
+              "ttft_ms_p99": 3031.4
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 433.1,
+              "tokens_out": 25985,
+              "tokens_in": 0,
+              "requests_completed": 140,
+              "ttft_ms_p50": 50.1,
+              "ttft_ms_p99": 61.2
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 436.6,
+              "tokens_out": 26191,
+              "tokens_in": 0,
+              "requests_completed": 140,
+              "ttft_ms_p50": 50.2,
+              "ttft_ms_p99": 62.6
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 446.9,
+              "tokens_out": 26811,
+              "tokens_in": 0,
+              "requests_completed": 148,
+              "ttft_ms_p50": 50.3,
+              "ttft_ms_p99": 59.6
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 440.6,
+              "tokens_out": 26429,
+              "tokens_in": 0,
+              "requests_completed": 140,
+              "ttft_ms_p50": 50.1,
+              "ttft_ms_p99": 62.1
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 436.3,
+              "tokens_out": 26190,
+              "tokens_in": 0,
+              "requests_completed": 144,
+              "ttft_ms_p50": 50.3,
+              "ttft_ms_p99": 61.5
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 436.4,
+              "tokens_out": 26187,
+              "tokens_in": 0,
+              "requests_completed": 140,
+              "ttft_ms_p50": 50.2,
+              "ttft_ms_p99": 71.8
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 436.7,
+              "tokens_out": 26197,
+              "tokens_in": 0,
+              "requests_completed": 141,
+              "ttft_ms_p50": 50.3,
+              "ttft_ms_p99": 64.3
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 440.8,
+              "tokens_out": 26436,
+              "tokens_in": 0,
+              "requests_completed": 143,
+              "ttft_ms_p50": 50.3,
+              "ttft_ms_p99": 63.2
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 437.4,
+              "tokens_out": 26252,
+              "tokens_in": 0,
+              "requests_completed": 141,
+              "ttft_ms_p50": 50.5,
+              "ttft_ms_p99": 62.4
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 437.6,
+              "tokens_out": 26250,
+              "tokens_in": 0,
+              "requests_completed": 144,
+              "ttft_ms_p50": 50.1,
+              "ttft_ms_p99": 62.2
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 436.1,
+              "tokens_out": 26162,
+              "tokens_in": 0,
+              "requests_completed": 140,
+              "ttft_ms_p50": 50.3,
+              "ttft_ms_p99": 58.0
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 435.8,
+              "tokens_out": 26158,
+              "tokens_in": 0,
+              "requests_completed": 145,
+              "ttft_ms_p50": 50.3,
+              "ttft_ms_p99": 62.4
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 438.7,
+              "tokens_out": 26320,
+              "tokens_in": 0,
+              "requests_completed": 139,
+              "ttft_ms_p50": 50.4,
+              "ttft_ms_p99": 62.8
+            }
+          ]
+        },
+        {
+          "precision": "FP8",
+          "sustained_throughput_tokens_per_sec": 694.1,
+          "throttle_ratio": 0.917,
+          "throttle_onset_minute": null,
+          "ttft_p99_drift_ms": -2693.1,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 645.5,
+              "tokens_out": 38742,
+              "tokens_in": 0,
+              "requests_completed": 211,
+              "ttft_ms_p50": 38.2,
+              "ttft_ms_p99": 2750.8
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 702.3,
+              "tokens_out": 42131,
+              "tokens_in": 0,
+              "requests_completed": 234,
+              "ttft_ms_p50": 37.4,
+              "ttft_ms_p99": 83.8
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 695.8,
+              "tokens_out": 41779,
+              "tokens_in": 0,
+              "requests_completed": 228,
+              "ttft_ms_p50": 37.2,
+              "ttft_ms_p99": 56.1
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 698.5,
+              "tokens_out": 41888,
+              "tokens_in": 0,
+              "requests_completed": 229,
+              "ttft_ms_p50": 37.2,
+              "ttft_ms_p99": 55.8
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 700.5,
+              "tokens_out": 42050,
+              "tokens_in": 0,
+              "requests_completed": 231,
+              "ttft_ms_p50": 37.0,
+              "ttft_ms_p99": 54.2
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 695.7,
+              "tokens_out": 41730,
+              "tokens_in": 0,
+              "requests_completed": 229,
+              "ttft_ms_p50": 37.0,
+              "ttft_ms_p99": 54.4
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 691.3,
+              "tokens_out": 41466,
+              "tokens_in": 0,
+              "requests_completed": 227,
+              "ttft_ms_p50": 37.4,
+              "ttft_ms_p99": 56.3
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 704.3,
+              "tokens_out": 42253,
+              "tokens_in": 0,
+              "requests_completed": 229,
+              "ttft_ms_p50": 37.2,
+              "ttft_ms_p99": 54.0
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 693.3,
+              "tokens_out": 41616,
+              "tokens_in": 0,
+              "requests_completed": 231,
+              "ttft_ms_p50": 37.1,
+              "ttft_ms_p99": 51.9
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 693.2,
+              "tokens_out": 41595,
+              "tokens_in": 0,
+              "requests_completed": 226,
+              "ttft_ms_p50": 37.0,
+              "ttft_ms_p99": 58.1
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 698.0,
+              "tokens_out": 41865,
+              "tokens_in": 0,
+              "requests_completed": 228,
+              "ttft_ms_p50": 37.0,
+              "ttft_ms_p99": 54.5
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 701.1,
+              "tokens_out": 42061,
+              "tokens_in": 0,
+              "requests_completed": 234,
+              "ttft_ms_p50": 37.1,
+              "ttft_ms_p99": 55.2
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 695.7,
+              "tokens_out": 41764,
+              "tokens_in": 0,
+              "requests_completed": 226,
+              "ttft_ms_p50": 37.4,
+              "ttft_ms_p99": 59.0
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 701.8,
+              "tokens_out": 42097,
+              "tokens_in": 0,
+              "requests_completed": 229,
+              "ttft_ms_p50": 37.2,
+              "ttft_ms_p99": 57.7
+            }
+          ]
+        },
+        {
+          "precision": "W8A8",
+          "sustained_throughput_tokens_per_sec": 719.2,
+          "throttle_ratio": 0.92,
+          "throttle_onset_minute": null,
+          "ttft_p99_drift_ms": -2756.6,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 677.6,
+              "tokens_out": 40664,
+              "tokens_in": 0,
+              "requests_completed": 222,
+              "ttft_ms_p50": 36.6,
+              "ttft_ms_p99": 2809.1
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 714.9,
+              "tokens_out": 42916,
+              "tokens_in": 0,
+              "requests_completed": 232,
+              "ttft_ms_p50": 35.9,
+              "ttft_ms_p99": 52.5
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 726.3,
+              "tokens_out": 43580,
+              "tokens_in": 0,
+              "requests_completed": 236,
+              "ttft_ms_p50": 35.9,
+              "ttft_ms_p99": 46.2
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 721.1,
+              "tokens_out": 43251,
+              "tokens_in": 0,
+              "requests_completed": 233,
+              "ttft_ms_p50": 36.0,
+              "ttft_ms_p99": 53.7
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 724.4,
+              "tokens_out": 43460,
+              "tokens_in": 0,
+              "requests_completed": 235,
+              "ttft_ms_p50": 36.2,
+              "ttft_ms_p99": 60.5
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 721.4,
+              "tokens_out": 43288,
+              "tokens_in": 0,
+              "requests_completed": 234,
+              "ttft_ms_p50": 36.0,
+              "ttft_ms_p99": 45.1
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 728.3,
+              "tokens_out": 43696,
+              "tokens_in": 0,
+              "requests_completed": 238,
+              "ttft_ms_p50": 36.1,
+              "ttft_ms_p99": 52.2
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 715.3,
+              "tokens_out": 42941,
+              "tokens_in": 0,
+              "requests_completed": 233,
+              "ttft_ms_p50": 36.0,
+              "ttft_ms_p99": 54.6
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 722.2,
+              "tokens_out": 43310,
+              "tokens_in": 0,
+              "requests_completed": 232,
+              "ttft_ms_p50": 35.8,
+              "ttft_ms_p99": 50.5
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 722.3,
+              "tokens_out": 43356,
+              "tokens_in": 0,
+              "requests_completed": 235,
+              "ttft_ms_p50": 36.1,
+              "ttft_ms_p99": 66.9
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 714.5,
+              "tokens_out": 42869,
+              "tokens_in": 0,
+              "requests_completed": 233,
+              "ttft_ms_p50": 36.2,
+              "ttft_ms_p99": 53.5
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 736.2,
+              "tokens_out": 44141,
+              "tokens_in": 0,
+              "requests_completed": 237,
+              "ttft_ms_p50": 36.0,
+              "ttft_ms_p99": 51.9
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 715.5,
+              "tokens_out": 42963,
+              "tokens_in": 0,
+              "requests_completed": 236,
+              "ttft_ms_p50": 36.0,
+              "ttft_ms_p99": 52.4
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 728.6,
+              "tokens_out": 43711,
+              "tokens_in": 0,
+              "requests_completed": 233,
+              "ttft_ms_p50": 36.0,
+              "ttft_ms_p99": 52.5
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "sustained_throughput_tokens_per_sec": 718.4,
+          "throttle_ratio": 0.871,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -3856.6,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 639.5,
+              "tokens_out": 38376,
+              "tokens_in": 0,
+              "requests_completed": 208,
+              "ttft_ms_p50": 38.1,
+              "ttft_ms_p99": 3904.2
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 728.0,
+              "tokens_out": 43705,
+              "tokens_in": 0,
+              "requests_completed": 237,
+              "ttft_ms_p50": 33.3,
+              "ttft_ms_p99": 49.3
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 723.2,
+              "tokens_out": 43391,
+              "tokens_in": 0,
+              "requests_completed": 234,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 54.6
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 721.2,
+              "tokens_out": 43268,
+              "tokens_in": 0,
+              "requests_completed": 232,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 48.6
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 723.7,
+              "tokens_out": 43427,
+              "tokens_in": 0,
+              "requests_completed": 236,
+              "ttft_ms_p50": 33.4,
+              "ttft_ms_p99": 52.8
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 725.8,
+              "tokens_out": 43530,
+              "tokens_in": 0,
+              "requests_completed": 234,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 60.8
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 717.4,
+              "tokens_out": 43059,
+              "tokens_in": 0,
+              "requests_completed": 231,
+              "ttft_ms_p50": 33.0,
+              "ttft_ms_p99": 47.9
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 734.3,
+              "tokens_out": 44054,
+              "tokens_in": 0,
+              "requests_completed": 240,
+              "ttft_ms_p50": 33.4,
+              "ttft_ms_p99": 51.6
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 726.3,
+              "tokens_out": 43577,
+              "tokens_in": 0,
+              "requests_completed": 235,
+              "ttft_ms_p50": 33.3,
+              "ttft_ms_p99": 45.3
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 725.4,
+              "tokens_out": 43519,
+              "tokens_in": 0,
+              "requests_completed": 233,
+              "ttft_ms_p50": 33.3,
+              "ttft_ms_p99": 64.0
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 716.7,
+              "tokens_out": 42986,
+              "tokens_in": 0,
+              "requests_completed": 234,
+              "ttft_ms_p50": 33.3,
+              "ttft_ms_p99": 48.3
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 724.8,
+              "tokens_out": 43518,
+              "tokens_in": 0,
+              "requests_completed": 235,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 44.4
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 723.3,
+              "tokens_out": 43383,
+              "tokens_in": 0,
+              "requests_completed": 233,
+              "ttft_ms_p50": 33.2,
+              "ttft_ms_p99": 52.3
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 727.9,
+              "tokens_out": 43664,
+              "tokens_in": 0,
+              "requests_completed": 237,
+              "ttft_ms_p50": 33.3,
+              "ttft_ms_p99": 47.6
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "sustained_throughput_tokens_per_sec": 970.9,
+          "throttle_ratio": 0.882,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -3382.4,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 868.0,
+              "tokens_out": 52079,
+              "tokens_in": 0,
+              "requests_completed": 293,
+              "ttft_ms_p50": 31.6,
+              "ttft_ms_p99": 3422.2
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 984.5,
+              "tokens_out": 59080,
+              "tokens_in": 0,
+              "requests_completed": 331,
+              "ttft_ms_p50": 26.9,
+              "ttft_ms_p99": 50.9
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 980.5,
+              "tokens_out": 58862,
+              "tokens_in": 0,
+              "requests_completed": 330,
+              "ttft_ms_p50": 26.9,
+              "ttft_ms_p99": 42.5
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 982.3,
+              "tokens_out": 58913,
+              "tokens_in": 0,
+              "requests_completed": 330,
+              "ttft_ms_p50": 26.5,
+              "ttft_ms_p99": 40.7
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 979.3,
+              "tokens_out": 58764,
+              "tokens_in": 0,
+              "requests_completed": 327,
+              "ttft_ms_p50": 26.4,
+              "ttft_ms_p99": 39.4
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 973.9,
+              "tokens_out": 58410,
+              "tokens_in": 0,
+              "requests_completed": 331,
+              "ttft_ms_p50": 26.5,
+              "ttft_ms_p99": 44.5
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 974.7,
+              "tokens_out": 58492,
+              "tokens_in": 0,
+              "requests_completed": 325,
+              "ttft_ms_p50": 26.6,
+              "ttft_ms_p99": 41.7
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 977.1,
+              "tokens_out": 58653,
+              "tokens_in": 0,
+              "requests_completed": 328,
+              "ttft_ms_p50": 26.6,
+              "ttft_ms_p99": 45.7
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 980.2,
+              "tokens_out": 58825,
+              "tokens_in": 0,
+              "requests_completed": 331,
+              "ttft_ms_p50": 26.6,
+              "ttft_ms_p99": 39.3
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 976.1,
+              "tokens_out": 58525,
+              "tokens_in": 0,
+              "requests_completed": 328,
+              "ttft_ms_p50": 26.4,
+              "ttft_ms_p99": 44.7
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 981.0,
+              "tokens_out": 58888,
+              "tokens_in": 0,
+              "requests_completed": 330,
+              "ttft_ms_p50": 26.7,
+              "ttft_ms_p99": 41.3
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 979.3,
+              "tokens_out": 58736,
+              "tokens_in": 0,
+              "requests_completed": 327,
+              "ttft_ms_p50": 26.5,
+              "ttft_ms_p99": 42.5
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 971.5,
+              "tokens_out": 58325,
+              "tokens_in": 0,
+              "requests_completed": 331,
+              "ttft_ms_p50": 26.9,
+              "ttft_ms_p99": 46.5
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 984.4,
+              "tokens_out": 59028,
+              "tokens_in": 0,
+              "requests_completed": 327,
+              "ttft_ms_p50": 26.3,
+              "ttft_ms_p99": 39.8
+            }
+          ]
+        }
+      ]
+    }
+  },
+  "accuracy": null,
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "13:25:04",
+    "run_id": "4ce353c2",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T13:20:07.690461+00:00",
+    "benchmark_end_time": "2026-04-27T13:25:04.187964+00:00",
+    "benchmark_elapsed_minutes": 128.5,
+    "model_load_seconds": 81.4,
+    "benchmark_elapsed_minutes_note": "Sum of per-precision benchmark_elapsed_minutes (excludes sleep gaps and orchestrator overhead).",
+    "scenario_dirs": {
+      "bf16/offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/offline",
+      "bf16/online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/online",
+      "bf16/sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/bf16/sustained",
+      "fp8/offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/offline",
+      "fp8/online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/online",
+      "fp8/sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/fp8/sustained",
+      "w8a8/offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/offline",
+      "w8a8/online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/online",
+      "w8a8/sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/sustained",
+      "w8a16/offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/offline",
+      "w8a16/online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/online",
+      "w8a16/sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/sustained",
+      "w4a16/offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/offline",
+      "w4a16/online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/online",
+      "w4a16/sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/sustained"
+    },
+    "precision_dirs": {
+      "BF16": "bf16",
+      "FP8": "fp8",
+      "W8A8": "w8a8",
+      "W8A16": "w8a16",
+      "W4A16": "w4a16"
+    },
+    "precision_model_map": {
+      "BF16": {
+        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+        "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+        "dtype_override": "bfloat16"
+      },
+      "FP8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+        "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "Static per-tensor FP8 (weights + activations). Requires Ampere+ (A100, A800, H20). Skipped automatically on FP16-only hardware."
+      },
+      "W8A8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights + INT8 activations via compressed-tensors. Exercises native int8 tensor cores."
+      },
+      "W8A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+        "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights, FP16 activations. Weight-only quantization — reduces memory bandwidth, not compute dtype."
+      },
+      "W4A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+        "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+        "engine_kwargs": {
+          "quantization": "gptq"
+        },
+        "_note": "INT4 weights, FP16 activations via GPTQ Marlin kernels. Weight-only quantization — larger memory saving than W8A16."
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/accuracy/accuracy.json
new file mode 100644
index 00000000..68a15e84
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.02,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W4A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/offline/result.json
new file mode 100644
index 00000000..914af955
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/offline/result.json
@@ -0,0 +1,157 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1611.42,
+          "throughput_tokens_per_sec_per_chip": 1611.42,
+          "elapsed_seconds_median": 21.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1611.83,
+          "throughput_tokens_per_sec_per_chip": 1611.83,
+          "elapsed_seconds_median": 21.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1609.86,
+          "throughput_tokens_per_sec_per_chip": 1609.86,
+          "elapsed_seconds_median": 21.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1610.66,
+          "throughput_tokens_per_sec_per_chip": 1610.66,
+          "elapsed_seconds_median": 21.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "15:48:10",
+    "run_id": "98b818e7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_98b818e7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T15:42:22.200967+00:00",
+    "benchmark_end_time": "2026-04-27T15:48:10.359781+00:00",
+    "benchmark_elapsed_minutes": 5.8,
+    "model_load_seconds": 80.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/online/result.json
new file mode 100644
index 00000000..188df7d2
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/online/result.json
@@ -0,0 +1,159 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 33.91,
+          "ttft_ms_p90": 103.24,
+          "ttft_ms_p99": 5457.75,
+          "tpot_ms_p50": 10.27,
+          "tpot_ms_p90": 21.22,
+          "tpot_ms_p99": 53.71,
+          "elapsed_seconds_median": 64.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 54.88,
+          "ttft_ms_p90": 92.23,
+          "ttft_ms_p99": 103.95,
+          "tpot_ms_p50": 44.0,
+          "tpot_ms_p90": 49.52,
+          "tpot_ms_p99": 53.52,
+          "elapsed_seconds_median": 36.4,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 64.76,
+          "ttft_ms_p90": 102.45,
+          "ttft_ms_p99": 140.98,
+          "tpot_ms_p50": 57.46,
+          "tpot_ms_p90": 61.94,
+          "tpot_ms_p99": 74.79,
+          "elapsed_seconds_median": 22.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 60.06,
+          "ttft_ms_p90": 102.77,
+          "ttft_ms_p99": 143.02,
+          "tpot_ms_p50": 61.0,
+          "tpot_ms_p90": 66.57,
+          "tpot_ms_p99": 87.33,
+          "elapsed_seconds_median": 17.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "15:58:41",
+    "run_id": "98b818e7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_98b818e7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T15:51:40.707094+00:00",
+    "benchmark_end_time": "2026-04-27T15:58:41.603670+00:00",
+    "benchmark_elapsed_minutes": 7.0,
+    "model_load_seconds": 152.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/result.json
new file mode 100644
index 00000000..19fad6ca
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/result.json
@@ -0,0 +1,374 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1611.42,
+          "throughput_tokens_per_sec_per_chip": 1611.42,
+          "elapsed_seconds_median": 21.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1611.83,
+          "throughput_tokens_per_sec_per_chip": 1611.83,
+          "elapsed_seconds_median": 21.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1609.86,
+          "throughput_tokens_per_sec_per_chip": 1609.86,
+          "elapsed_seconds_median": 21.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1610.66,
+          "throughput_tokens_per_sec_per_chip": 1610.66,
+          "elapsed_seconds_median": 21.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 33.91,
+          "ttft_ms_p90": 103.24,
+          "ttft_ms_p99": 5457.75,
+          "tpot_ms_p50": 10.27,
+          "tpot_ms_p90": 21.22,
+          "tpot_ms_p99": 53.71,
+          "elapsed_seconds_median": 64.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 54.88,
+          "ttft_ms_p90": 92.23,
+          "ttft_ms_p99": 103.95,
+          "tpot_ms_p50": 44.0,
+          "tpot_ms_p90": 49.52,
+          "tpot_ms_p99": 53.52,
+          "elapsed_seconds_median": 36.4,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 64.76,
+          "ttft_ms_p90": 102.45,
+          "ttft_ms_p99": 140.98,
+          "tpot_ms_p50": 57.46,
+          "tpot_ms_p90": 61.94,
+          "tpot_ms_p99": 74.79,
+          "elapsed_seconds_median": 22.2,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 60.06,
+          "ttft_ms_p90": 102.77,
+          "ttft_ms_p99": 143.02,
+          "tpot_ms_p50": 61.0,
+          "tpot_ms_p90": 66.57,
+          "tpot_ms_p99": 87.33,
+          "elapsed_seconds_median": 17.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 868.0,
+          "tokens_out": 52079,
+          "tokens_in": 0,
+          "requests_completed": 293,
+          "ttft_ms_p50": 31.6,
+          "ttft_ms_p99": 3422.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 984.5,
+          "tokens_out": 59080,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 50.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 980.5,
+          "tokens_out": 58862,
+          "tokens_in": 0,
+          "requests_completed": 330,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 42.5
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 982.3,
+          "tokens_out": 58913,
+          "tokens_in": 0,
+          "requests_completed": 330,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 40.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 979.3,
+          "tokens_out": 58764,
+          "tokens_in": 0,
+          "requests_completed": 327,
+          "ttft_ms_p50": 26.4,
+          "ttft_ms_p99": 39.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 973.9,
+          "tokens_out": 58410,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 974.7,
+          "tokens_out": 58492,
+          "tokens_in": 0,
+          "requests_completed": 325,
+          "ttft_ms_p50": 26.6,
+          "ttft_ms_p99": 41.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 977.1,
+          "tokens_out": 58653,
+          "tokens_in": 0,
+          "requests_completed": 328,
+          "ttft_ms_p50": 26.6,
+          "ttft_ms_p99": 45.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 980.2,
+          "tokens_out": 58825,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.6,
+          "ttft_ms_p99": 39.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 976.1,
+          "tokens_out": 58525,
+          "tokens_in": 0,
+          "requests_completed": 328,
+          "ttft_ms_p50": 26.4,
+          "ttft_ms_p99": 44.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 981.0,
+          "tokens_out": 58888,
+          "tokens_in": 0,
+          "requests_completed": 330,
+          "ttft_ms_p50": 26.7,
+          "ttft_ms_p99": 41.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 979.3,
+          "tokens_out": 58736,
+          "tokens_in": 0,
+          "requests_completed": 327,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 42.5
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 971.5,
+          "tokens_out": 58325,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 46.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 984.4,
+          "tokens_out": 59028,
+          "tokens_in": 0,
+          "requests_completed": 327,
+          "ttft_ms_p50": 26.3,
+          "ttft_ms_p99": 39.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 970.9,
+      "throttle_ratio": 0.882,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3382.4
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.02,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W4A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "15:48:10",
+    "run_id": "98b818e7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_98b818e7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T15:42:22.200967+00:00",
+    "benchmark_end_time": "2026-04-27T15:48:10.359781+00:00",
+    "benchmark_elapsed_minutes": 27.9,
+    "model_load_seconds": 80.7,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/offline",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/online",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/sustained/result.json
new file mode 100644
index 00000000..1f0f3d66
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w4a16/sustained/result.json
@@ -0,0 +1,257 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 868.0,
+          "tokens_out": 52079,
+          "tokens_in": 0,
+          "requests_completed": 293,
+          "ttft_ms_p50": 31.6,
+          "ttft_ms_p99": 3422.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 984.5,
+          "tokens_out": 59080,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 50.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 980.5,
+          "tokens_out": 58862,
+          "tokens_in": 0,
+          "requests_completed": 330,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 42.5
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 982.3,
+          "tokens_out": 58913,
+          "tokens_in": 0,
+          "requests_completed": 330,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 40.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 979.3,
+          "tokens_out": 58764,
+          "tokens_in": 0,
+          "requests_completed": 327,
+          "ttft_ms_p50": 26.4,
+          "ttft_ms_p99": 39.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 973.9,
+          "tokens_out": 58410,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 974.7,
+          "tokens_out": 58492,
+          "tokens_in": 0,
+          "requests_completed": 325,
+          "ttft_ms_p50": 26.6,
+          "ttft_ms_p99": 41.7
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 977.1,
+          "tokens_out": 58653,
+          "tokens_in": 0,
+          "requests_completed": 328,
+          "ttft_ms_p50": 26.6,
+          "ttft_ms_p99": 45.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 980.2,
+          "tokens_out": 58825,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.6,
+          "ttft_ms_p99": 39.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 976.1,
+          "tokens_out": 58525,
+          "tokens_in": 0,
+          "requests_completed": 328,
+          "ttft_ms_p50": 26.4,
+          "ttft_ms_p99": 44.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 981.0,
+          "tokens_out": 58888,
+          "tokens_in": 0,
+          "requests_completed": 330,
+          "ttft_ms_p50": 26.7,
+          "ttft_ms_p99": 41.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 979.3,
+          "tokens_out": 58736,
+          "tokens_in": 0,
+          "requests_completed": 327,
+          "ttft_ms_p50": 26.5,
+          "ttft_ms_p99": 42.5
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 971.5,
+          "tokens_out": 58325,
+          "tokens_in": 0,
+          "requests_completed": 331,
+          "ttft_ms_p50": 26.9,
+          "ttft_ms_p99": 46.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 984.4,
+          "tokens_out": 59028,
+          "tokens_in": 0,
+          "requests_completed": 327,
+          "ttft_ms_p50": 26.3,
+          "ttft_ms_p99": 39.8
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 970.9,
+      "throttle_ratio": 0.882,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3382.4
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "16:15:57",
+    "run_id": "98b818e7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_98b818e7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T16:00:54.374341+00:00",
+    "benchmark_end_time": "2026-04-27T16:15:57.425199+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 97.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/accuracy/accuracy.json
new file mode 100644
index 00000000..60c0d5fb
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.59,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W8A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/offline/result.json
new file mode 100644
index 00000000..b409ebea
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/offline/result.json
@@ -0,0 +1,157 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2972.67,
+          "throughput_tokens_per_sec_per_chip": 2972.67,
+          "elapsed_seconds_median": 12.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2969.3,
+          "throughput_tokens_per_sec_per_chip": 2969.3,
+          "elapsed_seconds_median": 12.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2973.03,
+          "throughput_tokens_per_sec_per_chip": 2973.03,
+          "elapsed_seconds_median": 12.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2987.4,
+          "throughput_tokens_per_sec_per_chip": 2987.4,
+          "elapsed_seconds_median": 12.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "15:08:30",
+    "run_id": "637ce9ca",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_637ce9ca",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T15:05:13.757784+00:00",
+    "benchmark_end_time": "2026-04-27T15:08:30.679669+00:00",
+    "benchmark_elapsed_minutes": 3.3,
+    "model_load_seconds": 90.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/online/result.json
new file mode 100644
index 00000000..123c452f
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/online/result.json
@@ -0,0 +1,159 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 38.31,
+          "ttft_ms_p90": 74.84,
+          "ttft_ms_p99": 1847.63,
+          "tpot_ms_p50": 11.74,
+          "tpot_ms_p90": 13.92,
+          "tpot_ms_p99": 18.31,
+          "elapsed_seconds_median": 65.3,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 39.48,
+          "ttft_ms_p90": 47.1,
+          "ttft_ms_p99": 61.58,
+          "tpot_ms_p50": 16.6,
+          "tpot_ms_p90": 17.88,
+          "tpot_ms_p99": 20.06,
+          "elapsed_seconds_median": 32.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 50.43,
+          "ttft_ms_p90": 78.15,
+          "ttft_ms_p99": 105.59,
+          "tpot_ms_p50": 40.03,
+          "tpot_ms_p90": 45.76,
+          "tpot_ms_p99": 61.97,
+          "elapsed_seconds_median": 18.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 61.94,
+          "ttft_ms_p90": 3528.85,
+          "ttft_ms_p99": 7833.34,
+          "tpot_ms_p50": 53.7,
+          "tpot_ms_p90": 59.48,
+          "tpot_ms_p99": 83.56,
+          "elapsed_seconds_median": 17.8,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "15:17:09",
+    "run_id": "637ce9ca",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_637ce9ca",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T15:10:30.734651+00:00",
+    "benchmark_end_time": "2026-04-27T15:17:09.889423+00:00",
+    "benchmark_elapsed_minutes": 6.7,
+    "model_load_seconds": 87.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/result.json
new file mode 100644
index 00000000..e953270e
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/result.json
@@ -0,0 +1,374 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2972.67,
+          "throughput_tokens_per_sec_per_chip": 2972.67,
+          "elapsed_seconds_median": 12.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2969.3,
+          "throughput_tokens_per_sec_per_chip": 2969.3,
+          "elapsed_seconds_median": 12.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2973.03,
+          "throughput_tokens_per_sec_per_chip": 2973.03,
+          "elapsed_seconds_median": 12.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2987.4,
+          "throughput_tokens_per_sec_per_chip": 2987.4,
+          "elapsed_seconds_median": 12.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 38.31,
+          "ttft_ms_p90": 74.84,
+          "ttft_ms_p99": 1847.63,
+          "tpot_ms_p50": 11.74,
+          "tpot_ms_p90": 13.92,
+          "tpot_ms_p99": 18.31,
+          "elapsed_seconds_median": 65.3,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 39.48,
+          "ttft_ms_p90": 47.1,
+          "ttft_ms_p99": 61.58,
+          "tpot_ms_p50": 16.6,
+          "tpot_ms_p90": 17.88,
+          "tpot_ms_p99": 20.06,
+          "elapsed_seconds_median": 32.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 50.43,
+          "ttft_ms_p90": 78.15,
+          "ttft_ms_p99": 105.59,
+          "tpot_ms_p50": 40.03,
+          "tpot_ms_p90": 45.76,
+          "tpot_ms_p99": 61.97,
+          "elapsed_seconds_median": 18.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 61.94,
+          "ttft_ms_p90": 3528.85,
+          "ttft_ms_p99": 7833.34,
+          "tpot_ms_p50": 53.7,
+          "tpot_ms_p90": 59.48,
+          "tpot_ms_p99": 83.56,
+          "elapsed_seconds_median": 17.8,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 639.5,
+          "tokens_out": 38376,
+          "tokens_in": 0,
+          "requests_completed": 208,
+          "ttft_ms_p50": 38.1,
+          "ttft_ms_p99": 3904.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.0,
+          "tokens_out": 43705,
+          "tokens_in": 0,
+          "requests_completed": 237,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 49.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.2,
+          "tokens_out": 43391,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.2,
+          "tokens_out": 43268,
+          "tokens_in": 0,
+          "requests_completed": 232,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 48.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.7,
+          "tokens_out": 43427,
+          "tokens_in": 0,
+          "requests_completed": 236,
+          "ttft_ms_p50": 33.4,
+          "ttft_ms_p99": 52.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 725.8,
+          "tokens_out": 43530,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 60.8
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 717.4,
+          "tokens_out": 43059,
+          "tokens_in": 0,
+          "requests_completed": 231,
+          "ttft_ms_p50": 33.0,
+          "ttft_ms_p99": 47.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 734.3,
+          "tokens_out": 44054,
+          "tokens_in": 0,
+          "requests_completed": 240,
+          "ttft_ms_p50": 33.4,
+          "ttft_ms_p99": 51.6
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.3,
+          "tokens_out": 43577,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 45.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 725.4,
+          "tokens_out": 43519,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 64.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 716.7,
+          "tokens_out": 42986,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 48.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 724.8,
+          "tokens_out": 43518,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 44.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.3,
+          "tokens_out": 43383,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 727.9,
+          "tokens_out": 43664,
+          "tokens_in": 0,
+          "requests_completed": 237,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 47.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 718.4,
+      "throttle_ratio": 0.871,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3856.6
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.59,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W8A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "15:08:30",
+    "run_id": "637ce9ca",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_637ce9ca",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T15:05:13.757784+00:00",
+    "benchmark_end_time": "2026-04-27T15:08:30.679669+00:00",
+    "benchmark_elapsed_minutes": 25.1,
+    "model_load_seconds": 90.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/offline",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/online",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/sustained/result.json
new file mode 100644
index 00000000..234affa9
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a16/sustained/result.json
@@ -0,0 +1,257 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 639.5,
+          "tokens_out": 38376,
+          "tokens_in": 0,
+          "requests_completed": 208,
+          "ttft_ms_p50": 38.1,
+          "ttft_ms_p99": 3904.2
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.0,
+          "tokens_out": 43705,
+          "tokens_in": 0,
+          "requests_completed": 237,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 49.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.2,
+          "tokens_out": 43391,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.2,
+          "tokens_out": 43268,
+          "tokens_in": 0,
+          "requests_completed": 232,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 48.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.7,
+          "tokens_out": 43427,
+          "tokens_in": 0,
+          "requests_completed": 236,
+          "ttft_ms_p50": 33.4,
+          "ttft_ms_p99": 52.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 725.8,
+          "tokens_out": 43530,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 60.8
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 717.4,
+          "tokens_out": 43059,
+          "tokens_in": 0,
+          "requests_completed": 231,
+          "ttft_ms_p50": 33.0,
+          "ttft_ms_p99": 47.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 734.3,
+          "tokens_out": 44054,
+          "tokens_in": 0,
+          "requests_completed": 240,
+          "ttft_ms_p50": 33.4,
+          "ttft_ms_p99": 51.6
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.3,
+          "tokens_out": 43577,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 45.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 725.4,
+          "tokens_out": 43519,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 64.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 716.7,
+          "tokens_out": 42986,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 48.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 724.8,
+          "tokens_out": 43518,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 44.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.3,
+          "tokens_out": 43383,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 33.2,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 727.9,
+          "tokens_out": 43664,
+          "tokens_in": 0,
+          "requests_completed": 237,
+          "ttft_ms_p50": 33.3,
+          "ttft_ms_p99": 47.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 718.4,
+      "throttle_ratio": 0.871,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -3856.6
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "15:35:26",
+    "run_id": "637ce9ca",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_637ce9ca",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T15:20:22.197972+00:00",
+    "benchmark_end_time": "2026-04-27T15:35:26.482828+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 149.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/accuracy/accuracy.json
new file mode 100644
index 00000000..5835a80c
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.6,
+  "baseline_delta": 0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W8A8",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/offline/result.json
new file mode 100644
index 00000000..33a4a933
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/offline/result.json
@@ -0,0 +1,157 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3903.39,
+          "throughput_tokens_per_sec_per_chip": 3903.39,
+          "elapsed_seconds_median": 9.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 4044.47,
+          "throughput_tokens_per_sec_per_chip": 4044.47,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 4000.58,
+          "throughput_tokens_per_sec_per_chip": 4000.58,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 4027.83,
+          "throughput_tokens_per_sec_per_chip": 4027.83,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:33:14",
+    "run_id": "30a46687",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_30a46687",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T14:30:47.317448+00:00",
+    "benchmark_end_time": "2026-04-27T14:33:14.431323+00:00",
+    "benchmark_elapsed_minutes": 2.5,
+    "model_load_seconds": 71.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/online/result.json
new file mode 100644
index 00000000..b02c0244
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/online/result.json
@@ -0,0 +1,159 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 38.71,
+          "ttft_ms_p90": 53.38,
+          "ttft_ms_p99": 1589.47,
+          "tpot_ms_p50": 11.46,
+          "tpot_ms_p90": 12.3,
+          "tpot_ms_p99": 15.34,
+          "elapsed_seconds_median": 65.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 44.87,
+          "ttft_ms_p90": 53.48,
+          "ttft_ms_p99": 65.3,
+          "tpot_ms_p50": 18.42,
+          "tpot_ms_p90": 19.56,
+          "tpot_ms_p99": 21.14,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 50.04,
+          "ttft_ms_p90": 61.38,
+          "ttft_ms_p99": 85.27,
+          "tpot_ms_p50": 28.19,
+          "tpot_ms_p90": 33.11,
+          "tpot_ms_p99": 44.66,
+          "elapsed_seconds_median": 15.7,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 52.01,
+          "ttft_ms_p90": 1450.28,
+          "ttft_ms_p99": 3527.1,
+          "tpot_ms_p50": 34.6,
+          "tpot_ms_p90": 40.12,
+          "tpot_ms_p99": 58.13,
+          "elapsed_seconds_median": 12.6,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:41:14",
+    "run_id": "30a46687",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_30a46687",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T14:34:57.279473+00:00",
+    "benchmark_end_time": "2026-04-27T14:41:14.157626+00:00",
+    "benchmark_elapsed_minutes": 6.3,
+    "model_load_seconds": 69.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/result.json
new file mode 100644
index 00000000..f4210cac
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/result.json
@@ -0,0 +1,374 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3903.39,
+          "throughput_tokens_per_sec_per_chip": 3903.39,
+          "elapsed_seconds_median": 9.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 4044.47,
+          "throughput_tokens_per_sec_per_chip": 4044.47,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 4000.58,
+          "throughput_tokens_per_sec_per_chip": 4000.58,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 4027.83,
+          "throughput_tokens_per_sec_per_chip": 4027.83,
+          "elapsed_seconds_median": 8.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 25,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 38.71,
+          "ttft_ms_p90": 53.38,
+          "ttft_ms_p99": 1589.47,
+          "tpot_ms_p50": 11.46,
+          "tpot_ms_p90": 12.3,
+          "tpot_ms_p99": 15.34,
+          "elapsed_seconds_median": 65.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 44.87,
+          "ttft_ms_p90": 53.48,
+          "ttft_ms_p99": 65.3,
+          "tpot_ms_p50": 18.42,
+          "tpot_ms_p90": 19.56,
+          "tpot_ms_p99": 21.14,
+          "elapsed_seconds_median": 32.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 50.04,
+          "ttft_ms_p90": 61.38,
+          "ttft_ms_p99": 85.27,
+          "tpot_ms_p50": 28.19,
+          "tpot_ms_p90": 33.11,
+          "tpot_ms_p99": 44.66,
+          "elapsed_seconds_median": 15.7,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 52.01,
+          "ttft_ms_p90": 1450.28,
+          "ttft_ms_p99": 3527.1,
+          "tpot_ms_p50": 34.6,
+          "tpot_ms_p90": 40.12,
+          "tpot_ms_p99": 58.13,
+          "elapsed_seconds_median": 12.6,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 677.6,
+          "tokens_out": 40664,
+          "tokens_in": 0,
+          "requests_completed": 222,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 2809.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 714.9,
+          "tokens_out": 42916,
+          "tokens_in": 0,
+          "requests_completed": 232,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 52.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.3,
+          "tokens_out": 43580,
+          "tokens_in": 0,
+          "requests_completed": 236,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 46.2
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.1,
+          "tokens_out": 43251,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 53.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 724.4,
+          "tokens_out": 43460,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 36.2,
+          "ttft_ms_p99": 60.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.4,
+          "tokens_out": 43288,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 45.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.3,
+          "tokens_out": 43696,
+          "tokens_in": 0,
+          "requests_completed": 238,
+          "ttft_ms_p50": 36.1,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 715.3,
+          "tokens_out": 42941,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 722.2,
+          "tokens_out": 43310,
+          "tokens_in": 0,
+          "requests_completed": 232,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 50.5
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 722.3,
+          "tokens_out": 43356,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 36.1,
+          "ttft_ms_p99": 66.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 714.5,
+          "tokens_out": 42869,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.2,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 736.2,
+          "tokens_out": 44141,
+          "tokens_in": 0,
+          "requests_completed": 237,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 51.9
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 715.5,
+          "tokens_out": 42963,
+          "tokens_in": 0,
+          "requests_completed": 236,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 52.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.6,
+          "tokens_out": 43711,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 52.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 719.2,
+      "throttle_ratio": 0.92,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2756.6
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.6,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W8A8",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:33:14",
+    "run_id": "30a46687",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_30a46687",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T14:30:47.317448+00:00",
+    "benchmark_end_time": "2026-04-27T14:33:14.431323+00:00",
+    "benchmark_elapsed_minutes": 23.9,
+    "model_load_seconds": 71.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/offline",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/online",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/sustained/result.json
new file mode 100644
index 00000000..03d17a6b
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_4ce353c2/w8a8/sustained/result.json
@@ -0,0 +1,257 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-27T13:14:17.420434+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 677.6,
+          "tokens_out": 40664,
+          "tokens_in": 0,
+          "requests_completed": 222,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 2809.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 714.9,
+          "tokens_out": 42916,
+          "tokens_in": 0,
+          "requests_completed": 232,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 52.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.3,
+          "tokens_out": 43580,
+          "tokens_in": 0,
+          "requests_completed": 236,
+          "ttft_ms_p50": 35.9,
+          "ttft_ms_p99": 46.2
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.1,
+          "tokens_out": 43251,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 53.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 724.4,
+          "tokens_out": 43460,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 36.2,
+          "ttft_ms_p99": 60.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.4,
+          "tokens_out": 43288,
+          "tokens_in": 0,
+          "requests_completed": 234,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 45.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.3,
+          "tokens_out": 43696,
+          "tokens_in": 0,
+          "requests_completed": 238,
+          "ttft_ms_p50": 36.1,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 715.3,
+          "tokens_out": 42941,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 722.2,
+          "tokens_out": 43310,
+          "tokens_in": 0,
+          "requests_completed": 232,
+          "ttft_ms_p50": 35.8,
+          "ttft_ms_p99": 50.5
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 722.3,
+          "tokens_out": 43356,
+          "tokens_in": 0,
+          "requests_completed": 235,
+          "ttft_ms_p50": 36.1,
+          "ttft_ms_p99": 66.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 714.5,
+          "tokens_out": 42869,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.2,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 736.2,
+          "tokens_out": 44141,
+          "tokens_in": 0,
+          "requests_completed": 237,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 51.9
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 715.5,
+          "tokens_out": 42963,
+          "tokens_in": 0,
+          "requests_completed": 236,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 52.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.6,
+          "tokens_out": 43711,
+          "tokens_in": 0,
+          "requests_completed": 233,
+          "ttft_ms_p50": 36.0,
+          "ttft_ms_p99": 52.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 719.2,
+      "throttle_ratio": 0.92,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2756.6
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-27",
+    "time": "14:58:07",
+    "run_id": "30a46687",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_C_nvidia_sglang_c43a8309_30a46687",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-27T14:43:03.698180+00:00",
+    "benchmark_end_time": "2026-04-27T14:58:07.654245+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 75.1
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/accuracy/accuracy.json
new file mode 100644
index 00000000..37f4d824
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.55,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/env_info.json b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/env_info.json
new file mode 100644
index 00000000..89e8ceb3
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/env_info.json
@@ -0,0 +1,32 @@
+{
+  "collected_at": "2026-05-07T09:31:39.284738+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA GeForce RTX 4090",
+      "vendor": "NVIDIA",
+      "memory_gb": 24.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.9",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "INTEL(R) XEON(R) GOLD 6530",
+    "physical_cores": 64,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 1",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": null,
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/interactive/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/interactive/result.json
new file mode 100644
index 00000000..c9462eb8
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/interactive/result.json
@@ -0,0 +1,115 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T09:31:39.284738+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 4305.61,
+      "ttft_ms_p90": 4494.03,
+      "ttft_ms_p99": 4587.16,
+      "tpot_ms_p50": 20.59,
+      "tpot_ms_p90": 20.72,
+      "tpot_ms_p99": 20.96,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 896.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "10:56:58",
+    "run_id": "3f838de7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T10:27:06.729869+00:00",
+    "benchmark_end_time": "2026-05-07T10:56:58.928536+00:00",
+    "benchmark_elapsed_minutes": 29.9,
+    "model_load_seconds": 160.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/offline/result.json
new file mode 100644
index 00000000..a739f138
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/offline/result.json
@@ -0,0 +1,133 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T09:31:39.284738+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 27.14,
+          "throughput_tokens_per_sec_per_chip": 27.14,
+          "elapsed_seconds_median": 473.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 27.07,
+          "throughput_tokens_per_sec_per_chip": 27.07,
+          "elapsed_seconds_median": 474.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "10:23:05",
+    "run_id": "3f838de7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T09:35:39.694912+00:00",
+    "benchmark_end_time": "2026-05-07T10:23:05.704936+00:00",
+    "benchmark_elapsed_minutes": 47.4,
+    "model_load_seconds": 81.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/online/result.json
new file mode 100644
index 00000000..d12fdeb7
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/online/result.json
@@ -0,0 +1,147 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T09:31:39.284738+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 351298.63,
+          "ttft_ms_p90": 625325.33,
+          "ttft_ms_p99": 680302.66,
+          "tpot_ms_p50": 20.59,
+          "tpot_ms_p90": 20.69,
+          "tpot_ms_p99": 20.74,
+          "elapsed_seconds_median": 884.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 381686.19,
+          "ttft_ms_p90": 687285.75,
+          "ttft_ms_p99": 759388.6,
+          "tpot_ms_p50": 20.59,
+          "tpot_ms_p90": 20.7,
+          "tpot_ms_p99": 20.75,
+          "elapsed_seconds_median": 883.3,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 415347.43,
+          "ttft_ms_p90": 744515.16,
+          "ttft_ms_p99": 822031.46,
+          "tpot_ms_p50": 20.61,
+          "tpot_ms_p90": 20.71,
+          "tpot_ms_p99": 20.86,
+          "elapsed_seconds_median": 880.7,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "13:02:24",
+    "run_id": "3f838de7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T11:34:06.635917+00:00",
+    "benchmark_end_time": "2026-05-07T13:02:24.424539+00:00",
+    "benchmark_elapsed_minutes": 88.3,
+    "model_load_seconds": 162.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/result.json
new file mode 100644
index 00000000..5e822930
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/result.json
@@ -0,0 +1,500 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T09:31:39.284738+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "interactive",
+      "sustained",
+      "online"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 2,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 27.14,
+          "throughput_tokens_per_sec_per_chip": 27.14,
+          "elapsed_seconds_median": 473.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 27.07,
+          "throughput_tokens_per_sec_per_chip": 27.07,
+          "elapsed_seconds_median": 474.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 4305.61,
+      "ttft_ms_p90": 4494.03,
+      "ttft_ms_p99": 4587.16,
+      "tpot_ms_p50": 20.59,
+      "tpot_ms_p90": 20.72,
+      "tpot_ms_p99": 20.96,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 896.1
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 29474.9,
+          "ttft_ms_p99": 50800.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65800.9,
+          "ttft_ms_p99": 68681.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 66378.2,
+          "ttft_ms_p99": 66953.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66799.4,
+          "ttft_ms_p99": 67336.4
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65978.9,
+          "ttft_ms_p99": 66173.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65047.4,
+          "ttft_ms_p99": 66056.9
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65985.2,
+          "ttft_ms_p99": 66517.4
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65430.6,
+          "ttft_ms_p99": 65984.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 66326.7,
+          "ttft_ms_p99": 66529.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66475.0,
+          "ttft_ms_p99": 66630.4
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66933.2,
+          "ttft_ms_p99": 67325.9
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66210.0,
+          "ttft_ms_p99": 66866.3
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 65003.8,
+          "ttft_ms_p99": 65056.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65985.2,
+          "ttft_ms_p99": 66384.5
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65385.6,
+          "ttft_ms_p99": 65743.1
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66150.9,
+          "ttft_ms_p99": 66460.8
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66230.9,
+          "ttft_ms_p99": 66732.4
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 67157.7,
+          "ttft_ms_p99": 67520.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66791.7,
+          "ttft_ms_p99": 66894.9
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65156.8,
+          "ttft_ms_p99": 66151.1
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65935.1,
+          "ttft_ms_p99": 66244.4
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65618.1,
+          "ttft_ms_p99": 66477.5
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 65725.5,
+          "ttft_ms_p99": 66185.8
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66058.3,
+          "ttft_ms_p99": 66293.7
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66477.1,
+          "ttft_ms_p99": 67254.9
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66985.0,
+          "ttft_ms_p99": 67472.7
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 66160.4,
+          "ttft_ms_p99": 66370.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 64984.1,
+          "ttft_ms_p99": 66016.4
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66045.7,
+          "ttft_ms_p99": 66588.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 25.4,
+      "throttle_ratio": 0.856,
+      "throttle_onset_minute": 3.0,
+      "ttft_p99_drift_ms": -2093.1
+    },
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 351298.63,
+          "ttft_ms_p90": 625325.33,
+          "ttft_ms_p99": 680302.66,
+          "tpot_ms_p50": 20.59,
+          "tpot_ms_p90": 20.69,
+          "tpot_ms_p99": 20.74,
+          "elapsed_seconds_median": 884.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 381686.19,
+          "ttft_ms_p90": 687285.75,
+          "ttft_ms_p99": 759388.6,
+          "tpot_ms_p50": 20.59,
+          "tpot_ms_p90": 20.7,
+          "tpot_ms_p99": 20.75,
+          "elapsed_seconds_median": 883.3,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 415347.43,
+          "ttft_ms_p90": 744515.16,
+          "ttft_ms_p99": 822031.46,
+          "tpot_ms_p50": 20.61,
+          "tpot_ms_p90": 20.71,
+          "tpot_ms_p99": 20.86,
+          "elapsed_seconds_median": 880.7,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.55,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "10:23:05",
+    "run_id": "3f838de7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": "Partial run: ['offline', 'interactive', 'sustained', 'online'] succeeded, ['speculative'] failed.",
+    "benchmark_start_time": "2026-05-07T09:35:39.694912+00:00",
+    "benchmark_end_time": "2026-05-07T10:23:05.704936+00:00",
+    "benchmark_elapsed_minutes": 196.6,
+    "model_load_seconds": 81.3,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'interactive', 'sustained', 'online'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/offline",
+      "interactive": "results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/interactive",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/sustained",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/online"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/sustained/result.json
new file mode 100644
index 00000000..480026c0
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7/sustained/result.json
@@ -0,0 +1,407 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-07T09:31:39.284738+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 29474.9,
+          "ttft_ms_p99": 50800.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65800.9,
+          "ttft_ms_p99": 68681.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 66378.2,
+          "ttft_ms_p99": 66953.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66799.4,
+          "ttft_ms_p99": 67336.4
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65978.9,
+          "ttft_ms_p99": 66173.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65047.4,
+          "ttft_ms_p99": 66056.9
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65985.2,
+          "ttft_ms_p99": 66517.4
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65430.6,
+          "ttft_ms_p99": 65984.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 66326.7,
+          "ttft_ms_p99": 66529.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66475.0,
+          "ttft_ms_p99": 66630.4
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66933.2,
+          "ttft_ms_p99": 67325.9
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66210.0,
+          "ttft_ms_p99": 66866.3
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 65003.8,
+          "ttft_ms_p99": 65056.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65985.2,
+          "ttft_ms_p99": 66384.5
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65385.6,
+          "ttft_ms_p99": 65743.1
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66150.9,
+          "ttft_ms_p99": 66460.8
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66230.9,
+          "ttft_ms_p99": 66732.4
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 67157.7,
+          "ttft_ms_p99": 67520.3
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66791.7,
+          "ttft_ms_p99": 66894.9
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65156.8,
+          "ttft_ms_p99": 66151.1
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65935.1,
+          "ttft_ms_p99": 66244.4
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 65618.1,
+          "ttft_ms_p99": 66477.5
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 65725.5,
+          "ttft_ms_p99": 66185.8
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66058.3,
+          "ttft_ms_p99": 66293.7
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66477.1,
+          "ttft_ms_p99": 67254.9
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66985.0,
+          "ttft_ms_p99": 67472.7
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 22.5,
+          "tokens_out": 1350,
+          "tokens_in": 0,
+          "requests_completed": 6,
+          "ttft_ms_p50": 66160.4,
+          "ttft_ms_p99": 66370.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 64984.1,
+          "ttft_ms_p99": 66016.4
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 66045.7,
+          "ttft_ms_p99": 66588.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 25.4,
+      "throttle_ratio": 0.856,
+      "throttle_onset_minute": 3.0,
+      "ttft_p99_drift_ms": -2093.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-07",
+    "time": "11:30:07",
+    "run_id": "3f838de7",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_D_nvidia_sglang_c43a8309_3f838de7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-07T10:59:07.695088+00:00",
+    "benchmark_end_time": "2026-05-07T11:30:07.732770+00:00",
+    "benchmark_elapsed_minutes": 31.0,
+    "model_load_seconds": 88.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/accuracy/accuracy.json b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/accuracy/accuracy.json
new file mode 100644
index 00000000..0dd1af70
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.42,
+  "baseline_delta": 0.04,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/env_info.json b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/env_info.json
new file mode 100644
index 00000000..5ad0f9ef
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/env_info.json
@@ -0,0 +1,32 @@
+{
+  "collected_at": "2026-05-14T03:54:50.748183+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA GeForce RTX 4090",
+      "vendor": "NVIDIA",
+      "memory_gb": 24.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.9",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "INTEL(R) XEON(R) GOLD 6530",
+    "physical_cores": 64,
+    "logical_cores": 128,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.6,
+  "pcie_generation": "PCIe Gen 1",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": null,
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/interactive/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/interactive/result.json
new file mode 100644
index 00000000..1f04d8f0
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/interactive/result.json
@@ -0,0 +1,115 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-14T03:54:50.748183+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 13.54,
+      "ttft_ms_p90": 14.88,
+      "ttft_ms_p99": 22.4,
+      "tpot_ms_p50": 1.62,
+      "tpot_ms_p90": 1.66,
+      "tpot_ms_p99": 1.78,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 48.5
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-14",
+    "time": "04:07:10",
+    "run_id": "12d28697",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-14T04:04:43.073670+00:00",
+    "benchmark_end_time": "2026-05-14T04:07:10.437796+00:00",
+    "benchmark_elapsed_minutes": 2.5,
+    "model_load_seconds": 49.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/offline/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/offline/result.json
similarity index 51%
rename from results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/offline/result.json
rename to results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/offline/result.json
index da8126bc..c346d66f 100644
--- a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/offline/result.json
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/offline/result.json
@@ -1,55 +1,54 @@
 {
   "schema_version": "1.0",
   "suite_id": "suite_F",
-  "implementation_id": "nvidia_onecat_vllm_12a253c2",
+  "implementation_id": "nvidia_sglang_c43a8309",
   "chip": {
-    "name": "Tesla V100-PCIE-32GB",
+    "name": "NVIDIA GeForce RTX 4090",
     "vendor": "NVIDIA",
     "count": 1,
-    "memory_gb": 32.0,
+    "memory_gb": 24.0,
     "interconnect_intra_node": null,
     "interconnect_inter_node": null
   },
   "environment": {
-    "collected_at": "2026-05-18T12:26:03.593928+00:00",
+    "collected_at": "2026-05-14T03:54:50.748183+00:00",
     "accelerators": [
       {
         "index": 0,
-        "name": "Tesla V100-PCIE-32GB",
+        "name": "NVIDIA GeForce RTX 4090",
         "vendor": "NVIDIA",
-        "memory_gb": 32.0,
-        "driver_version": "580.82.07",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
         "firmware_version": null,
-        "compute_capability": "7.0",
-        "supports_bf16": false
+        "compute_capability": "8.9",
+        "supports_bf16": true
       }
     ],
-    "accelerator_platform": "nvidia",
-    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
     "intra_node_interconnect": null,
     "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-      "physical_cores": 26,
-      "logical_cores": 52,
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
       "numa_nodes": 2
     },
-    "system_memory_gb": 214.5,
-    "pcie_generation": "PCIe Gen 3",
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
     "cpu_accelerator_bandwidth_gbs": null,
     "network_interfaces": null,
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13",
-    "kernel_version": "5.4.0-149-generic",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
     "runtime_version": "CUDA 12.8",
     "pytorch_version": "2.9.1+cu128"
   },
   "software": {
-    "framework": "1Cat-vLLM",
-    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
-    "driver_version": "580.82.07",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
     "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13"
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
   },
   "model": {
     "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
@@ -59,8 +58,8 @@
     "model_source": "local",
     "architecture": "dense",
     "parameter_count_b": 0.5,
-    "precision": "FP16",
-    "effective_dtype": "float16",
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
     "quantization_method": null,
     "model_format": "HuggingFace original"
   },
@@ -74,17 +73,7 @@
       "expert_parallel_size": 1,
       "data_parallel_size": 1
     },
-    "extra_config": {
-      "tensor_parallel_size": 1,
-      "enforce_eager": false,
-      "max_num_seqs": 512,
-      "gpu_memory_utilization": 0.9,
-      "engine_kwargs": {
-        "enable_prefix_caching": false,
-        "enable_chunked_prefill": false,
-        "kv_cache_auto_trim_ratio": 0.0
-      }
-    },
+    "extra_config": null,
     "runtime_metrics": null
   },
   "metrics": {
@@ -92,10 +81,10 @@
       "results_by_concurrency": [
         {
           "client_concurrency": 4,
-          "throughput_tokens_per_sec": 6234.82,
-          "throughput_tokens_per_sec_per_chip": 6234.82,
-          "throughput_tokens_per_sec_total": 9303.11,
-          "elapsed_seconds_median": 6.8,
+          "throughput_tokens_per_sec": 14832.92,
+          "throughput_tokens_per_sec_per_chip": 14832.92,
+          "throughput_tokens_per_sec_total": 20031.5,
+          "elapsed_seconds_median": 2.9,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -105,10 +94,10 @@
         },
         {
           "client_concurrency": 16,
-          "throughput_tokens_per_sec": 6292.79,
-          "throughput_tokens_per_sec_per_chip": 6292.79,
-          "throughput_tokens_per_sec_total": 9356.18,
-          "elapsed_seconds_median": 6.7,
+          "throughput_tokens_per_sec": 14771.95,
+          "throughput_tokens_per_sec_per_chip": 14771.95,
+          "throughput_tokens_per_sec_total": 19959.08,
+          "elapsed_seconds_median": 2.9,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -118,10 +107,10 @@
         },
         {
           "client_concurrency": 64,
-          "throughput_tokens_per_sec": 6243.51,
-          "throughput_tokens_per_sec_per_chip": 6243.51,
-          "throughput_tokens_per_sec_total": 9267.55,
-          "elapsed_seconds_median": 6.8,
+          "throughput_tokens_per_sec": 14824.27,
+          "throughput_tokens_per_sec_per_chip": 14824.27,
+          "throughput_tokens_per_sec_total": 20046.95,
+          "elapsed_seconds_median": 2.8,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -139,21 +128,21 @@
     "notes": "Run --scenario accuracy to check model accuracy."
   },
   "meta": {
-    "submitted_by": "JuhaoLiang1997",
+    "submitted_by": "Gong-K",
     "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "20:28:55",
-    "run_id": "419b138c",
-    "run_name": "tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c",
+    "date": "2026-05-14",
+    "time": "04:00:02",
+    "run_id": "12d28697",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697",
     "flagged": null,
-    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
     "env_info_file": "../env_info.json",
     "log_file": "run.log",
     "samples_file": "samples.jsonl",
     "notes": null,
-    "benchmark_start_time": "2026-05-18T12:27:34.502139+00:00",
-    "benchmark_end_time": "2026-05-18T12:28:55.745031+00:00",
-    "benchmark_elapsed_minutes": 1.4,
-    "model_load_seconds": 31.7
+    "benchmark_start_time": "2026-05-14T03:59:22.196953+00:00",
+    "benchmark_end_time": "2026-05-14T04:00:02.171314+00:00",
+    "benchmark_elapsed_minutes": 0.7,
+    "model_load_seconds": 54.9
   }
 }
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/online/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/online/result.json
new file mode 100644
index 00000000..3d472d35
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/online/result.json
@@ -0,0 +1,135 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-14T03:54:50.748183+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 15.66,
+          "ttft_ms_p90": 26.95,
+          "ttft_ms_p99": 1894.51,
+          "tpot_ms_p50": 2.07,
+          "tpot_ms_p90": 2.47,
+          "tpot_ms_p99": 13.98,
+          "elapsed_seconds_median": 31.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 27.64,
+          "ttft_ms_p90": 32.89,
+          "ttft_ms_p99": 41.81,
+          "tpot_ms_p50": 14.31,
+          "tpot_ms_p90": 15.93,
+          "tpot_ms_p99": 21.71,
+          "elapsed_seconds_median": 9.1,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-14",
+    "time": "04:03:23",
+    "run_id": "12d28697",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-14T04:01:21.418175+00:00",
+    "benchmark_end_time": "2026-05-14T04:03:23.369790+00:00",
+    "benchmark_elapsed_minutes": 2.0,
+    "model_load_seconds": 49.1
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/result.json
new file mode 100644
index 00000000..c87f4c41
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/result.json
@@ -0,0 +1,353 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-14T03:54:50.748183+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 14832.92,
+          "throughput_tokens_per_sec_per_chip": 14832.92,
+          "throughput_tokens_per_sec_total": 20031.5,
+          "elapsed_seconds_median": 2.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 14771.95,
+          "throughput_tokens_per_sec_per_chip": 14771.95,
+          "throughput_tokens_per_sec_total": 19959.08,
+          "elapsed_seconds_median": 2.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 14824.27,
+          "throughput_tokens_per_sec_per_chip": 14824.27,
+          "throughput_tokens_per_sec_total": 20046.95,
+          "elapsed_seconds_median": 2.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 15.66,
+          "ttft_ms_p90": 26.95,
+          "ttft_ms_p99": 1894.51,
+          "tpot_ms_p50": 2.07,
+          "tpot_ms_p90": 2.47,
+          "tpot_ms_p99": 13.98,
+          "elapsed_seconds_median": 31.9,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 27.64,
+          "ttft_ms_p90": 32.89,
+          "ttft_ms_p99": 41.81,
+          "tpot_ms_p50": 14.31,
+          "tpot_ms_p90": 15.93,
+          "tpot_ms_p99": 21.71,
+          "elapsed_seconds_median": 9.1,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 13.54,
+      "ttft_ms_p90": 14.88,
+      "ttft_ms_p99": 22.4,
+      "tpot_ms_p50": 1.62,
+      "tpot_ms_p90": 1.66,
+      "tpot_ms_p99": 1.78,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 48.5
+    },
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2928.3,
+          "tokens_out": 175824,
+          "tokens_in": 0,
+          "requests_completed": 946,
+          "ttft_ms_p50": 24.2,
+          "ttft_ms_p99": 2551.5
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3143.7,
+          "tokens_out": 188505,
+          "tokens_in": 0,
+          "requests_completed": 1013,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3116.7,
+          "tokens_out": 187133,
+          "tokens_in": 0,
+          "requests_completed": 1004,
+          "ttft_ms_p50": 23.4,
+          "ttft_ms_p99": 32.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3064.3,
+          "tokens_out": 183809,
+          "tokens_in": 0,
+          "requests_completed": 985,
+          "ttft_ms_p50": 23.7,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3101.3,
+          "tokens_out": 186067,
+          "tokens_in": 0,
+          "requests_completed": 997,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 30.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3086.1,
+          "tokens_out": 185226,
+          "tokens_in": 0,
+          "requests_completed": 993,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 35.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3134.9,
+          "tokens_out": 188082,
+          "tokens_in": 0,
+          "requests_completed": 1007,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3063.3,
+          "tokens_out": 183696,
+          "tokens_in": 0,
+          "requests_completed": 986,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 32.5
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3108.4,
+          "tokens_out": 186640,
+          "tokens_in": 0,
+          "requests_completed": 1000,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 31.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3101.4,
+          "tokens_out": 186059,
+          "tokens_in": 0,
+          "requests_completed": 997,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 36.4
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3080.3,
+          "tokens_out": 184804,
+          "tokens_in": 0,
+          "requests_completed": 991,
+          "ttft_ms_p50": 23.9,
+          "ttft_ms_p99": 35.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3084.6,
+          "tokens_out": 184961,
+          "tokens_in": 0,
+          "requests_completed": 994,
+          "ttft_ms_p50": 23.7,
+          "ttft_ms_p99": 33.3
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3134.2,
+          "tokens_out": 188117,
+          "tokens_in": 0,
+          "requests_completed": 1009,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.7
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3112.2,
+          "tokens_out": 186781,
+          "tokens_in": 0,
+          "requests_completed": 1001,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 3090.0,
+      "throttle_ratio": 0.931,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2518.9
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.42,
+    "baseline_delta": 0.04,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-14",
+    "time": "04:00:02",
+    "run_id": "12d28697",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-14T03:59:22.196953+00:00",
+    "benchmark_end_time": "2026-05-14T04:00:02.171314+00:00",
+    "benchmark_elapsed_minutes": 20.2,
+    "model_load_seconds": 54.9,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/offline",
+      "online": "results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/online",
+      "interactive": "results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/interactive",
+      "sustained": "results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/sustained/result.json b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/sustained/result.json
new file mode 100644
index 00000000..073ea2d4
--- /dev/null
+++ b/results/community/nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697/sustained/result.json
@@ -0,0 +1,257 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA GeForce RTX 4090",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 24.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-14T03:54:50.748183+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA GeForce RTX 4090",
+        "vendor": "NVIDIA",
+        "memory_gb": 24.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.9",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-31,64-95\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "INTEL(R) XEON(R) GOLD 6530",
+      "physical_cores": 64,
+      "logical_cores": 128,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.6,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": null,
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 2928.3,
+          "tokens_out": 175824,
+          "tokens_in": 0,
+          "requests_completed": 946,
+          "ttft_ms_p50": 24.2,
+          "ttft_ms_p99": 2551.5
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3143.7,
+          "tokens_out": 188505,
+          "tokens_in": 0,
+          "requests_completed": 1013,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3116.7,
+          "tokens_out": 187133,
+          "tokens_in": 0,
+          "requests_completed": 1004,
+          "ttft_ms_p50": 23.4,
+          "ttft_ms_p99": 32.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3064.3,
+          "tokens_out": 183809,
+          "tokens_in": 0,
+          "requests_completed": 985,
+          "ttft_ms_p50": 23.7,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3101.3,
+          "tokens_out": 186067,
+          "tokens_in": 0,
+          "requests_completed": 997,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 30.8
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3086.1,
+          "tokens_out": 185226,
+          "tokens_in": 0,
+          "requests_completed": 993,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 35.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3134.9,
+          "tokens_out": 188082,
+          "tokens_in": 0,
+          "requests_completed": 1007,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3063.3,
+          "tokens_out": 183696,
+          "tokens_in": 0,
+          "requests_completed": 986,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 32.5
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3108.4,
+          "tokens_out": 186640,
+          "tokens_in": 0,
+          "requests_completed": 1000,
+          "ttft_ms_p50": 23.6,
+          "ttft_ms_p99": 31.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3101.4,
+          "tokens_out": 186059,
+          "tokens_in": 0,
+          "requests_completed": 997,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 36.4
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3080.3,
+          "tokens_out": 184804,
+          "tokens_in": 0,
+          "requests_completed": 991,
+          "ttft_ms_p50": 23.9,
+          "ttft_ms_p99": 35.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3084.6,
+          "tokens_out": 184961,
+          "tokens_in": 0,
+          "requests_completed": 994,
+          "ttft_ms_p50": 23.7,
+          "ttft_ms_p99": 33.3
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3134.2,
+          "tokens_out": 188117,
+          "tokens_in": 0,
+          "requests_completed": 1009,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.7
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 3112.2,
+          "tokens_out": 186781,
+          "tokens_in": 0,
+          "requests_completed": 1001,
+          "ttft_ms_p50": 23.5,
+          "ttft_ms_p99": 32.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 3090.0,
+      "throttle_ratio": 0.931,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2518.9
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-14",
+    "time": "04:23:31",
+    "run_id": "12d28697",
+    "run_name": "nvidia_geforce_rtx_4090x1_suite_F_nvidia_sglang_c43a8309_12d28697",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-14T04:08:29.029603+00:00",
+    "benchmark_end_time": "2026-05-14T04:23:31.519887+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 48.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/accuracy/accuracy.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/accuracy/accuracy.json
new file mode 100644
index 00000000..5b260195
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.61,
+  "baseline_delta": 0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/burst/result.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/burst/result.json
new file mode 100644
index 00000000..ac6da65e
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/burst/result.json
@@ -0,0 +1,164 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T07:36:07.207290+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "burst",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 71.76,
+      "steady_ttft_p99_ms": 5316.14,
+      "burst_ttft_p50_ms": 91.93,
+      "burst_ttft_p99_ms": 361.51,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 0.068,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 6204.77,
+          "burst_ttft_p99_ms": 439.74
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 94.27,
+          "burst_ttft_p99_ms": 305.42
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 94.55,
+          "burst_ttft_p99_ms": 360.12
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "09:32:37",
+    "run_id": "9c6920b5",
+    "run_name": "nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T09:23:55.580940+00:00",
+    "benchmark_end_time": "2026-04-29T09:32:37.748022+00:00",
+    "benchmark_elapsed_minutes": 8.7,
+    "model_load_seconds": 125.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/env_info.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/env_info.json
similarity index 63%
rename from results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/env_info.json
rename to results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/env_info.json
index cfd770eb..e9877c8b 100644
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/env_info.json
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/env_info.json
@@ -1,27 +1,27 @@
 {
-  "collected_at": "2026-05-07T10:52:35.700123+00:00",
+  "collected_at": "2026-04-29T07:36:07.207290+00:00",
   "accelerators": [
     {
       "index": 0,
-      "name": "NVIDIA A100-SXM4-40GB",
+      "name": "NVIDIA RTX A6000",
       "vendor": "NVIDIA",
-      "memory_gb": 40,
+      "memory_gb": 48.0,
       "driver_version": "565.57.01",
       "firmware_version": null,
-      "compute_capability": "8.0",
+      "compute_capability": "8.6",
       "supports_bf16": true
     }
   ],
-  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
   "intra_node_interconnect": null,
   "cpu": {
-    "model": "AMD EPYC 7532 32-Core Processor",
-    "physical_cores": 64,
-    "logical_cores": 128,
+    "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+    "physical_cores": 76,
+    "logical_cores": 152,
     "numa_nodes": 2
   },
-  "system_memory_gb": 1007.7,
-  "pcie_generation": "PCIe Gen 4",
+  "system_memory_gb": 1007.5,
+  "pcie_generation": "PCIe Gen 1",
   "cpu_accelerator_bandwidth_gbs": null,
   "network_interfaces": [
     {
@@ -38,6 +38,11 @@
       "name": "mlx5_2",
       "type": "InfiniBand/RoCE",
       "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
     }
   ],
   "os": "Ubuntu 22.04.4 LTS",
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/interactive/result.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/interactive/result.json
new file mode 100644
index 00000000..f9238a36
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T07:36:07.207290+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 47.37,
+      "ttft_ms_p90": 81.71,
+      "ttft_ms_p99": 111.29,
+      "tpot_ms_p50": 23.0,
+      "tpot_ms_p90": 23.08,
+      "tpot_ms_p99": 23.19,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 676.7
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "08:26:48",
+    "run_id": "9c6920b5",
+    "run_name": "nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T07:52:53.926105+00:00",
+    "benchmark_end_time": "2026-04-29T08:26:48.699149+00:00",
+    "benchmark_elapsed_minutes": 33.9,
+    "model_load_seconds": 80.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/offline/result.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/offline/result.json
new file mode 100644
index 00000000..9f2ab0e3
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/offline/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T07:36:07.207290+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 2039.11,
+          "throughput_tokens_per_sec_per_chip": 2039.11,
+          "elapsed_seconds_median": 17.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 2024.26,
+          "throughput_tokens_per_sec_per_chip": 2024.26,
+          "elapsed_seconds_median": 17.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 2029.11,
+          "throughput_tokens_per_sec_per_chip": 2029.11,
+          "elapsed_seconds_median": 17.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "07:44:11",
+    "run_id": "9c6920b5",
+    "run_name": "nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T07:40:39.895067+00:00",
+    "benchmark_end_time": "2026-04-29T07:44:11.019231+00:00",
+    "benchmark_elapsed_minutes": 3.5,
+    "model_load_seconds": 58.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/online/result.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/online/result.json
new file mode 100644
index 00000000..57011166
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/online/result.json
@@ -0,0 +1,168 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T07:36:07.207290+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 100,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 73.61,
+          "ttft_ms_p90": 126.19,
+          "ttft_ms_p99": 1989.06,
+          "tpot_ms_p50": 30.47,
+          "tpot_ms_p90": 35.48,
+          "tpot_ms_p99": 38.38,
+          "elapsed_seconds_median": 68.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 74.09,
+          "ttft_ms_p90": 108.59,
+          "ttft_ms_p99": 152.02,
+          "tpot_ms_p50": 68.29,
+          "tpot_ms_p90": 75.68,
+          "tpot_ms_p99": 117.11,
+          "elapsed_seconds_median": 23.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 68.38,
+          "ttft_ms_p90": 80.76,
+          "ttft_ms_p99": 109.03,
+          "tpot_ms_p50": 75.85,
+          "tpot_ms_p90": 91.71,
+          "tpot_ms_p99": 361.36,
+          "elapsed_seconds_median": 18.4,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "07:50:57",
+    "run_id": "9c6920b5",
+    "run_name": "nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T07:45:27.699512+00:00",
+    "benchmark_end_time": "2026-04-29T07:50:57.659935+00:00",
+    "benchmark_elapsed_minutes": 5.5,
+    "model_load_seconds": 51.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/result.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/result.json
new file mode 100644
index 00000000..0eb4d2a9
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/result.json
@@ -0,0 +1,615 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T07:36:07.207290+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained",
+      "speculative",
+      "burst"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 2039.11,
+          "throughput_tokens_per_sec_per_chip": 2039.11,
+          "elapsed_seconds_median": 17.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 2024.26,
+          "throughput_tokens_per_sec_per_chip": 2024.26,
+          "elapsed_seconds_median": 17.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 2029.11,
+          "throughput_tokens_per_sec_per_chip": 2029.11,
+          "elapsed_seconds_median": 17.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 100,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 73.61,
+          "ttft_ms_p90": 126.19,
+          "ttft_ms_p99": 1989.06,
+          "tpot_ms_p50": 30.47,
+          "tpot_ms_p90": 35.48,
+          "tpot_ms_p99": 38.38,
+          "elapsed_seconds_median": 68.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 74.09,
+          "ttft_ms_p90": 108.59,
+          "ttft_ms_p99": 152.02,
+          "tpot_ms_p50": 68.29,
+          "tpot_ms_p90": 75.68,
+          "tpot_ms_p99": 117.11,
+          "elapsed_seconds_median": 23.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 68.38,
+          "ttft_ms_p90": 80.76,
+          "ttft_ms_p99": 109.03,
+          "tpot_ms_p50": 75.85,
+          "tpot_ms_p90": 91.71,
+          "tpot_ms_p99": 361.36,
+          "elapsed_seconds_median": 18.4,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 47.37,
+      "ttft_ms_p90": 81.71,
+      "ttft_ms_p99": 111.29,
+      "tpot_ms_p50": 23.0,
+      "tpot_ms_p90": 23.08,
+      "tpot_ms_p99": 23.19,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 676.7
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 255.3,
+          "tokens_out": 15327,
+          "tokens_in": 0,
+          "requests_completed": 83,
+          "ttft_ms_p50": 111.7,
+          "ttft_ms_p99": 7156.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18877,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 134.7
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.9,
+          "tokens_out": 18900,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18885,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 85.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 312.5,
+          "tokens_out": 18740,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 87.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 320.4,
+          "tokens_out": 19229,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 86.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.0,
+          "tokens_out": 18898,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.0,
+          "ttft_ms_p99": 86.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.8,
+          "tokens_out": 19132,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 69.9,
+          "ttft_ms_p99": 86.1
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 316.1,
+          "tokens_out": 18971,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 69.8,
+          "ttft_ms_p99": 85.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 312.2,
+          "tokens_out": 18733,
+          "tokens_in": 0,
+          "requests_completed": 99,
+          "ttft_ms_p50": 69.9,
+          "ttft_ms_p99": 71.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 310.6,
+          "tokens_out": 18627,
+          "tokens_in": 0,
+          "requests_completed": 99,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 86.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 316.2,
+          "tokens_out": 18971,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.0,
+          "tokens_out": 19083,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 86.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 319.1,
+          "tokens_out": 19143,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 86.3
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 313.8,
+          "tokens_out": 18826,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.0,
+          "ttft_ms_p99": 72.7
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.5,
+          "tokens_out": 19119,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.4,
+          "tokens_out": 18923,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.6,
+          "tokens_out": 18867,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18894,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 319.1,
+          "tokens_out": 19135,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 87.1
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18897,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 86.1
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.4,
+          "tokens_out": 18919,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 316.3,
+          "tokens_out": 18980,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 86.0
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 310.9,
+          "tokens_out": 18658,
+          "tokens_in": 0,
+          "requests_completed": 99,
+          "ttft_ms_p50": 70.0,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.2,
+          "tokens_out": 18905,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.7
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.8,
+          "tokens_out": 19135,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.6
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.0,
+          "tokens_out": 18898,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 313.6,
+          "tokens_out": 18815,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.1,
+          "tokens_out": 18908,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 86.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 315.5,
+      "throttle_ratio": 0.969,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -48.6
+    },
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 421.15,
+          "throughput_tokens_per_sec_per_chip": 421.15,
+          "elapsed_seconds_median": 82.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 421.71,
+          "throughput_tokens_per_sec_per_chip": 421.71,
+          "elapsed_seconds_median": 82.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 421.73,
+          "throughput_tokens_per_sec_per_chip": 421.73,
+          "elapsed_seconds_median": 82.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 71.76,
+      "steady_ttft_p99_ms": 5316.14,
+      "burst_ttft_p50_ms": 91.93,
+      "burst_ttft_p99_ms": 361.51,
+      "sla_met_during_burst": true,
+      "burst_degradation_ratio": 0.068,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 6204.77,
+          "burst_ttft_p99_ms": 439.74
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 94.27,
+          "burst_ttft_p99_ms": 305.42
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 94.55,
+          "burst_ttft_p99_ms": 360.12
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.61,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "07:44:11",
+    "run_id": "9c6920b5",
+    "run_name": "nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T07:40:39.895067+00:00",
+    "benchmark_end_time": "2026-04-29T07:44:11.019231+00:00",
+    "benchmark_elapsed_minutes": 98.5,
+    "model_load_seconds": 58.9,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained', 'speculative', 'burst'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/offline",
+      "online": "results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/online",
+      "interactive": "results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/interactive",
+      "sustained": "results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/sustained",
+      "speculative": "results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/speculative",
+      "burst": "results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/burst"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/speculative/result.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/speculative/result.json
new file mode 100644
index 00000000..581494c9
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/speculative/result.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T07:36:07.207290+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "speculative",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 421.15,
+          "throughput_tokens_per_sec_per_chip": 421.15,
+          "elapsed_seconds_median": 82.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 421.71,
+          "throughput_tokens_per_sec_per_chip": 421.71,
+          "elapsed_seconds_median": 82.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 421.73,
+          "throughput_tokens_per_sec_per_chip": 421.73,
+          "elapsed_seconds_median": 82.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "09:20:48",
+    "run_id": "9c6920b5",
+    "run_name": "nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T09:04:08.250654+00:00",
+    "benchmark_end_time": "2026-04-29T09:20:48.246844+00:00",
+    "benchmark_elapsed_minutes": 16.7,
+    "model_load_seconds": 170.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/sustained/result.json b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/sustained/result.json
new file mode 100644
index 00000000..a3bc39e3
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5/sustained/result.json
@@ -0,0 +1,428 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T07:36:07.207290+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 255.3,
+          "tokens_out": 15327,
+          "tokens_in": 0,
+          "requests_completed": 83,
+          "ttft_ms_p50": 111.7,
+          "ttft_ms_p99": 7156.3
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18877,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 134.7
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.9,
+          "tokens_out": 18900,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.6
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18885,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 85.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 312.5,
+          "tokens_out": 18740,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 87.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 320.4,
+          "tokens_out": 19229,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 86.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.0,
+          "tokens_out": 18898,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.0,
+          "ttft_ms_p99": 86.3
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.8,
+          "tokens_out": 19132,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 69.9,
+          "ttft_ms_p99": 86.1
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 316.1,
+          "tokens_out": 18971,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 69.8,
+          "ttft_ms_p99": 85.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 312.2,
+          "tokens_out": 18733,
+          "tokens_in": 0,
+          "requests_completed": 99,
+          "ttft_ms_p50": 69.9,
+          "ttft_ms_p99": 71.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 310.6,
+          "tokens_out": 18627,
+          "tokens_in": 0,
+          "requests_completed": 99,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 86.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 316.2,
+          "tokens_out": 18971,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.0,
+          "tokens_out": 19083,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 86.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 319.1,
+          "tokens_out": 19143,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 86.3
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 313.8,
+          "tokens_out": 18826,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.0,
+          "ttft_ms_p99": 72.7
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.5,
+          "tokens_out": 19119,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.4,
+          "tokens_out": 18923,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.6,
+          "tokens_out": 18867,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18894,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 319.1,
+          "tokens_out": 19135,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 87.1
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 314.8,
+          "tokens_out": 18897,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 86.1
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.4,
+          "tokens_out": 18919,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 316.3,
+          "tokens_out": 18980,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 86.0
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 310.9,
+          "tokens_out": 18658,
+          "tokens_in": 0,
+          "requests_completed": 99,
+          "ttft_ms_p50": 70.0,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.2,
+          "tokens_out": 18905,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 85.7
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.8,
+          "tokens_out": 19135,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.6
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.0,
+          "tokens_out": 18898,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 313.6,
+          "tokens_out": 18815,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 85.8
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.1,
+          "tokens_out": 18908,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.1,
+          "ttft_ms_p99": 86.1
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 315.5,
+      "throttle_ratio": 0.969,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -48.6
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "09:00:11",
+    "run_id": "9c6920b5",
+    "run_name": "nvidia_rtx_a6000x1_suite_A_nvidia_sglang_c43a8309_9c6920b5",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T08:30:01.272008+00:00",
+    "benchmark_end_time": "2026-04-29T09:00:11.931324+00:00",
+    "benchmark_elapsed_minutes": 30.2,
+    "model_load_seconds": 128.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/accuracy/accuracy.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/accuracy/accuracy.json
similarity index 100%
rename from results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/accuracy/accuracy.json
rename to results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/accuracy/accuracy.json
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/offline/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/offline/result.json
new file mode 100644
index 00000000..3280c33f
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2044.08,
+          "throughput_tokens_per_sec_per_chip": 2044.08,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2043.12,
+          "throughput_tokens_per_sec_per_chip": 2043.12,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2043.77,
+          "throughput_tokens_per_sec_per_chip": 2043.77,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2045.83,
+          "throughput_tokens_per_sec_per_chip": 2045.83,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "10:16:44",
+    "run_id": "4c65fcfb",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T10:11:59.573174+00:00",
+    "benchmark_end_time": "2026-04-29T10:16:44.166472+00:00",
+    "benchmark_elapsed_minutes": 4.7,
+    "model_load_seconds": 63.0
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/online/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/online/result.json
new file mode 100644
index 00000000..57f5f4c9
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 74.15,
+          "ttft_ms_p90": 125.13,
+          "ttft_ms_p99": 1721.49,
+          "tpot_ms_p50": 30.47,
+          "tpot_ms_p90": 35.85,
+          "tpot_ms_p99": 40.25,
+          "elapsed_seconds_median": 68.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 71.26,
+          "ttft_ms_p90": 85.73,
+          "ttft_ms_p99": 93.62,
+          "tpot_ms_p50": 40.73,
+          "tpot_ms_p90": 42.27,
+          "tpot_ms_p99": 46.79,
+          "elapsed_seconds_median": 36.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 74.68,
+          "ttft_ms_p90": 110.47,
+          "ttft_ms_p99": 158.24,
+          "tpot_ms_p50": 67.21,
+          "tpot_ms_p90": 76.53,
+          "tpot_ms_p99": 102.14,
+          "elapsed_seconds_median": 23.4,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 71.87,
+          "ttft_ms_p90": 100.76,
+          "ttft_ms_p99": 152.6,
+          "tpot_ms_p50": 76.16,
+          "tpot_ms_p90": 89.69,
+          "tpot_ms_p99": 138.89,
+          "elapsed_seconds_median": 20.4,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "10:25:34",
+    "run_id": "4c65fcfb",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T10:18:09.895393+00:00",
+    "benchmark_end_time": "2026-04-29T10:25:34.882647+00:00",
+    "benchmark_elapsed_minutes": 7.4,
+    "model_load_seconds": 58.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/result.json
new file mode 100644
index 00000000..e2272bca
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2044.08,
+          "throughput_tokens_per_sec_per_chip": 2044.08,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2043.12,
+          "throughput_tokens_per_sec_per_chip": 2043.12,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2043.77,
+          "throughput_tokens_per_sec_per_chip": 2043.77,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2045.83,
+          "throughput_tokens_per_sec_per_chip": 2045.83,
+          "elapsed_seconds_median": 17.4,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 74.15,
+          "ttft_ms_p90": 125.13,
+          "ttft_ms_p99": 1721.49,
+          "tpot_ms_p50": 30.47,
+          "tpot_ms_p90": 35.85,
+          "tpot_ms_p99": 40.25,
+          "elapsed_seconds_median": 68.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 71.26,
+          "ttft_ms_p90": 85.73,
+          "ttft_ms_p99": 93.62,
+          "tpot_ms_p50": 40.73,
+          "tpot_ms_p90": 42.27,
+          "tpot_ms_p99": 46.79,
+          "elapsed_seconds_median": 36.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 74.68,
+          "ttft_ms_p90": 110.47,
+          "ttft_ms_p99": 158.24,
+          "tpot_ms_p50": 67.21,
+          "tpot_ms_p90": 76.53,
+          "tpot_ms_p99": 102.14,
+          "elapsed_seconds_median": 23.4,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 71.87,
+          "ttft_ms_p90": 100.76,
+          "ttft_ms_p99": 152.6,
+          "tpot_ms_p50": 76.16,
+          "tpot_ms_p90": 89.69,
+          "tpot_ms_p99": 138.89,
+          "elapsed_seconds_median": 20.4,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 273.8,
+          "tokens_out": 16436,
+          "tokens_in": 0,
+          "requests_completed": 92,
+          "ttft_ms_p50": 104.0,
+          "ttft_ms_p99": 2684.5
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.4,
+          "tokens_out": 19101,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 154.0
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 321.5,
+          "tokens_out": 19280,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.5,
+          "ttft_ms_p99": 85.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 309.0,
+          "tokens_out": 18541,
+          "tokens_in": 0,
+          "requests_completed": 102,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 86.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 307.8,
+          "tokens_out": 18477,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 85.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.5,
+          "tokens_out": 18930,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 320.6,
+          "tokens_out": 19232,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 306.6,
+          "tokens_out": 18403,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.6,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 308.1,
+          "tokens_out": 18483,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 72.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 327.8,
+          "tokens_out": 19673,
+          "tokens_in": 0,
+          "requests_completed": 108,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 85.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 310.3,
+          "tokens_out": 18617,
+          "tokens_in": 0,
+          "requests_completed": 102,
+          "ttft_ms_p50": 70.5,
+          "ttft_ms_p99": 86.0
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 312.6,
+          "tokens_out": 18753,
+          "tokens_in": 0,
+          "requests_completed": 102,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 321.0,
+          "tokens_out": 19255,
+          "tokens_in": 0,
+          "requests_completed": 105,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.5,
+          "tokens_out": 18930,
+          "tokens_in": 0,
+          "requests_completed": 106,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 71.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 312.0,
+      "throttle_ratio": 0.835,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2612.8
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.57,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "10:16:44",
+    "run_id": "4c65fcfb",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T10:11:59.573174+00:00",
+    "benchmark_end_time": "2026-04-29T10:16:44.166472+00:00",
+    "benchmark_elapsed_minutes": 27.3,
+    "model_load_seconds": 63.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/offline",
+      "online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/online",
+      "sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/sustained/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/sustained/result.json
new file mode 100644
index 00000000..c447bccf
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 273.8,
+          "tokens_out": 16436,
+          "tokens_in": 0,
+          "requests_completed": 92,
+          "ttft_ms_p50": 104.0,
+          "ttft_ms_p99": 2684.5
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 318.4,
+          "tokens_out": 19101,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 154.0
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 321.5,
+          "tokens_out": 19280,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.5,
+          "ttft_ms_p99": 85.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 309.0,
+          "tokens_out": 18541,
+          "tokens_in": 0,
+          "requests_completed": 102,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 86.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 307.8,
+          "tokens_out": 18477,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 85.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.5,
+          "tokens_out": 18930,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 320.6,
+          "tokens_out": 19232,
+          "tokens_in": 0,
+          "requests_completed": 104,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 85.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 306.6,
+          "tokens_out": 18403,
+          "tokens_in": 0,
+          "requests_completed": 100,
+          "ttft_ms_p50": 70.6,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 308.1,
+          "tokens_out": 18483,
+          "tokens_in": 0,
+          "requests_completed": 101,
+          "ttft_ms_p50": 70.2,
+          "ttft_ms_p99": 72.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 327.8,
+          "tokens_out": 19673,
+          "tokens_in": 0,
+          "requests_completed": 108,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 85.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 310.3,
+          "tokens_out": 18617,
+          "tokens_in": 0,
+          "requests_completed": 102,
+          "ttft_ms_p50": 70.5,
+          "ttft_ms_p99": 86.0
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 312.6,
+          "tokens_out": 18753,
+          "tokens_in": 0,
+          "requests_completed": 102,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 321.0,
+          "tokens_out": 19255,
+          "tokens_in": 0,
+          "requests_completed": 105,
+          "ttft_ms_p50": 70.3,
+          "ttft_ms_p99": 72.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 315.5,
+          "tokens_out": 18930,
+          "tokens_in": 0,
+          "requests_completed": 106,
+          "ttft_ms_p50": 70.4,
+          "ttft_ms_p99": 71.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 312.0,
+      "throttle_ratio": 0.835,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2612.8
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "10:42:09",
+    "run_id": "4c65fcfb",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T10:27:00.056331+00:00",
+    "benchmark_end_time": "2026-04-29T10:42:09.480677+00:00",
+    "benchmark_elapsed_minutes": 15.2,
+    "model_load_seconds": 56.1
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/env_info.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/env_info.json
similarity index 63%
rename from results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/env_info.json
rename to results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/env_info.json
index 5690fb1d..f726f580 100644
--- a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/env_info.json
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/env_info.json
@@ -1,27 +1,27 @@
 {
-  "collected_at": "2026-05-07T06:55:48.459765+00:00",
+  "collected_at": "2026-04-29T10:07:24.359391+00:00",
   "accelerators": [
     {
       "index": 0,
-      "name": "NVIDIA A100-SXM4-40GB",
+      "name": "NVIDIA RTX A6000",
       "vendor": "NVIDIA",
-      "memory_gb": 40,
+      "memory_gb": 48.0,
       "driver_version": "565.57.01",
       "firmware_version": null,
-      "compute_capability": "8.0",
+      "compute_capability": "8.6",
       "supports_bf16": true
     }
   ],
-  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tSYS\tSYS\tSYS\t32-63,96-127\t1\t\tN/A\nNIC0\tSYS\t X \tPIX\tNODE\t\t\t\t\nNIC1\tSYS\tPIX\t X \tNODE\t\t\t\t\nNIC2\tSYS\tNODE\tNODE\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
   "intra_node_interconnect": null,
   "cpu": {
-    "model": "AMD EPYC 7532 32-Core Processor",
-    "physical_cores": 64,
-    "logical_cores": 128,
+    "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+    "physical_cores": 76,
+    "logical_cores": 152,
     "numa_nodes": 2
   },
-  "system_memory_gb": 1007.7,
-  "pcie_generation": "PCIe Gen 4",
+  "system_memory_gb": 1007.5,
+  "pcie_generation": "PCIe Gen 1",
   "cpu_accelerator_bandwidth_gbs": null,
   "network_interfaces": [
     {
@@ -38,6 +38,11 @@
       "name": "mlx5_2",
       "type": "InfiniBand/RoCE",
       "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
     }
   ],
   "os": "Ubuntu 22.04.4 LTS",
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/result.json
new file mode 100644
index 00000000..1cb7fc0a
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/result.json
@@ -0,0 +1,963 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original",
+    "_note": "suite model_id. Each precision level uses its own quantized checkpoint."
+  },
+  "task": {
+    "scenarios_run": [
+      "accuracy",
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "precision_levels_run": [
+      "BF16",
+      "FP8",
+      "W8A8",
+      "W8A16",
+      "W4A16"
+    ],
+    "precision_levels_skipped": [
+      "FP16"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "quantization": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+          "best_throughput_tokens_per_sec": 2045.83,
+          "accuracy_score": 0.57,
+          "accuracy_baseline_delta": 0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 1166.1,
+          "speedup_vs_bf16": 1.0,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 2044.08,
+              "throughput_tokens_per_sec_per_chip": 2044.08,
+              "elapsed_seconds_median": 17.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 2043.12,
+              "throughput_tokens_per_sec_per_chip": 2043.12,
+              "elapsed_seconds_median": 17.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 2043.77,
+              "throughput_tokens_per_sec_per_chip": 2043.77,
+              "elapsed_seconds_median": 17.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 2045.83,
+              "throughput_tokens_per_sec_per_chip": 2045.83,
+              "elapsed_seconds_median": 17.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "bf16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": null
+        },
+        {
+          "precision": "W8A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+          "best_throughput_tokens_per_sec": 2231.0,
+          "accuracy_score": 0.58,
+          "accuracy_baseline_delta": -0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 1294.0,
+          "speedup_vs_bf16": 1.091,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 2231.0,
+              "throughput_tokens_per_sec_per_chip": 2231.0,
+              "elapsed_seconds_median": 15.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 2228.35,
+              "throughput_tokens_per_sec_per_chip": 2228.35,
+              "elapsed_seconds_median": 15.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 2225.95,
+              "throughput_tokens_per_sec_per_chip": 2225.95,
+              "elapsed_seconds_median": 15.9,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 2221.05,
+              "throughput_tokens_per_sec_per_chip": 2221.05,
+              "elapsed_seconds_median": 16.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a16",
+          "effective_dtype": "auto",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W4A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+          "best_throughput_tokens_per_sec": 1120.82,
+          "accuracy_score": 0.57,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 638.9,
+          "speedup_vs_bf16": 0.548,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 1116.9,
+              "throughput_tokens_per_sec_per_chip": 1116.9,
+              "elapsed_seconds_median": 31.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 1115.69,
+              "throughput_tokens_per_sec_per_chip": 1115.69,
+              "elapsed_seconds_median": 31.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 1120.82,
+              "throughput_tokens_per_sec_per_chip": 1120.82,
+              "elapsed_seconds_median": 31.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 1117.8,
+              "throughput_tokens_per_sec_per_chip": 1117.8,
+              "elapsed_seconds_median": 31.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w4a16",
+          "effective_dtype": "auto",
+          "quantization_method": "gptq"
+        }
+      ]
+    },
+    "derived": {},
+    "quantization_online": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 74.15,
+              "ttft_ms_p90": 125.13,
+              "ttft_ms_p99": 1721.49,
+              "tpot_ms_p50": 30.47,
+              "tpot_ms_p90": 35.85,
+              "tpot_ms_p99": 40.25,
+              "elapsed_seconds_median": 68.7,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 71.26,
+              "ttft_ms_p90": 85.73,
+              "ttft_ms_p99": 93.62,
+              "tpot_ms_p50": 40.73,
+              "tpot_ms_p90": 42.27,
+              "tpot_ms_p99": 46.79,
+              "elapsed_seconds_median": 36.0,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 74.68,
+              "ttft_ms_p90": 110.47,
+              "ttft_ms_p99": 158.24,
+              "tpot_ms_p50": 67.21,
+              "tpot_ms_p90": 76.53,
+              "tpot_ms_p99": 102.14,
+              "elapsed_seconds_median": 23.4,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 71.87,
+              "ttft_ms_p90": 100.76,
+              "ttft_ms_p99": 152.6,
+              "tpot_ms_p50": 76.16,
+              "tpot_ms_p90": 89.69,
+              "tpot_ms_p99": 138.89,
+              "elapsed_seconds_median": 20.4,
+              "sla_met": true
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 49.64,
+              "ttft_ms_p90": 136.86,
+              "ttft_ms_p99": 2169.34,
+              "tpot_ms_p50": 17.11,
+              "tpot_ms_p90": 23.7,
+              "tpot_ms_p99": 30.35,
+              "elapsed_seconds_median": 66.4,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 49.95,
+              "ttft_ms_p90": 60.03,
+              "ttft_ms_p99": 69.32,
+              "tpot_ms_p50": 23.82,
+              "tpot_ms_p90": 26.2,
+              "tpot_ms_p99": 32.79,
+              "elapsed_seconds_median": 33.9,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 63.29,
+              "ttft_ms_p90": 112.98,
+              "ttft_ms_p99": 155.72,
+              "tpot_ms_p50": 64.65,
+              "tpot_ms_p90": 72.61,
+              "tpot_ms_p99": 94.42,
+              "elapsed_seconds_median": 23.3,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 58.06,
+              "ttft_ms_p90": 115.18,
+              "ttft_ms_p99": 187.15,
+              "tpot_ms_p50": 78.68,
+              "tpot_ms_p90": 89.12,
+              "tpot_ms_p99": 118.6,
+              "elapsed_seconds_median": 21.5,
+              "sla_met": true
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "max_valid_qps": 50,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 92.08,
+              "ttft_ms_p90": 210.3,
+              "ttft_ms_p99": 1668.21,
+              "tpot_ms_p50": 53.34,
+              "tpot_ms_p90": 82.0,
+              "tpot_ms_p99": 90.25,
+              "elapsed_seconds_median": 69.4,
+              "sla_met": false
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 91.46,
+              "ttft_ms_p90": 135.97,
+              "ttft_ms_p99": 154.85,
+              "tpot_ms_p50": 71.91,
+              "tpot_ms_p90": 77.39,
+              "tpot_ms_p99": 87.33,
+              "elapsed_seconds_median": 41.6,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 83.34,
+              "ttft_ms_p90": 140.37,
+              "ttft_ms_p99": 190.61,
+              "tpot_ms_p50": 85.26,
+              "tpot_ms_p90": 91.49,
+              "tpot_ms_p99": 109.69,
+              "elapsed_seconds_median": 28.4,
+              "sla_met": true
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 75.89,
+              "ttft_ms_p90": 132.81,
+              "ttft_ms_p99": 204.46,
+              "tpot_ms_p50": 89.48,
+              "tpot_ms_p90": 98.79,
+              "tpot_ms_p99": 119.35,
+              "elapsed_seconds_median": 24.4,
+              "sla_met": true
+            }
+          ]
+        }
+      ]
+    },
+    "quantization_sustained": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "sustained_throughput_tokens_per_sec": 312.0,
+          "throttle_ratio": 0.835,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -2612.8,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 273.8,
+              "tokens_out": 16436,
+              "tokens_in": 0,
+              "requests_completed": 92,
+              "ttft_ms_p50": 104.0,
+              "ttft_ms_p99": 2684.5
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 318.4,
+              "tokens_out": 19101,
+              "tokens_in": 0,
+              "requests_completed": 104,
+              "ttft_ms_p50": 70.4,
+              "ttft_ms_p99": 154.0
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 321.5,
+              "tokens_out": 19280,
+              "tokens_in": 0,
+              "requests_completed": 104,
+              "ttft_ms_p50": 70.5,
+              "ttft_ms_p99": 85.4
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 309.0,
+              "tokens_out": 18541,
+              "tokens_in": 0,
+              "requests_completed": 102,
+              "ttft_ms_p50": 70.4,
+              "ttft_ms_p99": 86.2
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 307.8,
+              "tokens_out": 18477,
+              "tokens_in": 0,
+              "requests_completed": 101,
+              "ttft_ms_p50": 70.4,
+              "ttft_ms_p99": 85.4
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 315.5,
+              "tokens_out": 18930,
+              "tokens_in": 0,
+              "requests_completed": 104,
+              "ttft_ms_p50": 70.4,
+              "ttft_ms_p99": 72.0
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 320.6,
+              "tokens_out": 19232,
+              "tokens_in": 0,
+              "requests_completed": 104,
+              "ttft_ms_p50": 70.4,
+              "ttft_ms_p99": 85.9
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 306.6,
+              "tokens_out": 18403,
+              "tokens_in": 0,
+              "requests_completed": 100,
+              "ttft_ms_p50": 70.6,
+              "ttft_ms_p99": 72.0
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 308.1,
+              "tokens_out": 18483,
+              "tokens_in": 0,
+              "requests_completed": 101,
+              "ttft_ms_p50": 70.2,
+              "ttft_ms_p99": 72.6
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 327.8,
+              "tokens_out": 19673,
+              "tokens_in": 0,
+              "requests_completed": 108,
+              "ttft_ms_p50": 70.4,
+              "ttft_ms_p99": 85.7
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 310.3,
+              "tokens_out": 18617,
+              "tokens_in": 0,
+              "requests_completed": 102,
+              "ttft_ms_p50": 70.5,
+              "ttft_ms_p99": 86.0
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 312.6,
+              "tokens_out": 18753,
+              "tokens_in": 0,
+              "requests_completed": 102,
+              "ttft_ms_p50": 70.3,
+              "ttft_ms_p99": 71.8
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 321.0,
+              "tokens_out": 19255,
+              "tokens_in": 0,
+              "requests_completed": 105,
+              "ttft_ms_p50": 70.3,
+              "ttft_ms_p99": 72.0
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 315.5,
+              "tokens_out": 18930,
+              "tokens_in": 0,
+              "requests_completed": 106,
+              "ttft_ms_p50": 70.4,
+              "ttft_ms_p99": 71.7
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "sustained_throughput_tokens_per_sec": 526.2,
+          "throttle_ratio": 0.855,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -2654.1,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 461.1,
+              "tokens_out": 27681,
+              "tokens_in": 0,
+              "requests_completed": 153,
+              "ttft_ms_p50": 69.6,
+              "ttft_ms_p99": 2715.1
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 528.2,
+              "tokens_out": 31687,
+              "tokens_in": 0,
+              "requests_completed": 171,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 60.4
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 533.2,
+              "tokens_out": 31998,
+              "tokens_in": 0,
+              "requests_completed": 173,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 57.7
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 534.2,
+              "tokens_out": 32045,
+              "tokens_in": 0,
+              "requests_completed": 174,
+              "ttft_ms_p50": 43.7,
+              "ttft_ms_p99": 60.7
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 532.3,
+              "tokens_out": 31952,
+              "tokens_in": 0,
+              "requests_completed": 174,
+              "ttft_ms_p50": 43.6,
+              "ttft_ms_p99": 60.9
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 524.2,
+              "tokens_out": 31451,
+              "tokens_in": 0,
+              "requests_completed": 168,
+              "ttft_ms_p50": 43.5,
+              "ttft_ms_p99": 62.5
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 531.8,
+              "tokens_out": 31893,
+              "tokens_in": 0,
+              "requests_completed": 176,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 61.1
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 534.3,
+              "tokens_out": 32076,
+              "tokens_in": 0,
+              "requests_completed": 174,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 59.9
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 524.9,
+              "tokens_out": 31483,
+              "tokens_in": 0,
+              "requests_completed": 173,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 52.6
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 528.5,
+              "tokens_out": 31707,
+              "tokens_in": 0,
+              "requests_completed": 169,
+              "ttft_ms_p50": 43.4,
+              "ttft_ms_p99": 49.5
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 539.0,
+              "tokens_out": 32351,
+              "tokens_in": 0,
+              "requests_completed": 177,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 60.5
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 529.6,
+              "tokens_out": 31772,
+              "tokens_in": 0,
+              "requests_completed": 172,
+              "ttft_ms_p50": 43.4,
+              "ttft_ms_p99": 53.7
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 531.3,
+              "tokens_out": 31875,
+              "tokens_in": 0,
+              "requests_completed": 174,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 61.4
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 533.6,
+              "tokens_out": 32030,
+              "tokens_in": 0,
+              "requests_completed": 174,
+              "ttft_ms_p50": 43.8,
+              "ttft_ms_p99": 61.0
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "sustained_throughput_tokens_per_sec": 653.7,
+          "throttle_ratio": 0.85,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -2556.3,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 564.4,
+              "tokens_out": 33886,
+              "tokens_in": 0,
+              "requests_completed": 192,
+              "ttft_ms_p50": 75.0,
+              "ttft_ms_p99": 2609.7
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 660.3,
+              "tokens_out": 39619,
+              "tokens_in": 0,
+              "requests_completed": 220,
+              "ttft_ms_p50": 36.6,
+              "ttft_ms_p99": 49.9
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 661.9,
+              "tokens_out": 39693,
+              "tokens_in": 0,
+              "requests_completed": 227,
+              "ttft_ms_p50": 37.1,
+              "ttft_ms_p99": 53.5
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 660.9,
+              "tokens_out": 39668,
+              "tokens_in": 0,
+              "requests_completed": 221,
+              "ttft_ms_p50": 36.6,
+              "ttft_ms_p99": 52.1
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 655.8,
+              "tokens_out": 39353,
+              "tokens_in": 0,
+              "requests_completed": 220,
+              "ttft_ms_p50": 37.1,
+              "ttft_ms_p99": 54.3
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 659.7,
+              "tokens_out": 39582,
+              "tokens_in": 0,
+              "requests_completed": 220,
+              "ttft_ms_p50": 37.0,
+              "ttft_ms_p99": 51.8
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 662.5,
+              "tokens_out": 39746,
+              "tokens_in": 0,
+              "requests_completed": 223,
+              "ttft_ms_p50": 36.6,
+              "ttft_ms_p99": 51.8
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 662.2,
+              "tokens_out": 39737,
+              "tokens_in": 0,
+              "requests_completed": 225,
+              "ttft_ms_p50": 36.7,
+              "ttft_ms_p99": 52.9
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 660.2,
+              "tokens_out": 39589,
+              "tokens_in": 0,
+              "requests_completed": 219,
+              "ttft_ms_p50": 36.7,
+              "ttft_ms_p99": 54.6
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 655.2,
+              "tokens_out": 39332,
+              "tokens_in": 0,
+              "requests_completed": 222,
+              "ttft_ms_p50": 36.6,
+              "ttft_ms_p99": 50.8
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 660.2,
+              "tokens_out": 39602,
+              "tokens_in": 0,
+              "requests_completed": 220,
+              "ttft_ms_p50": 36.5,
+              "ttft_ms_p99": 52.5
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 662.3,
+              "tokens_out": 39733,
+              "tokens_in": 0,
+              "requests_completed": 228,
+              "ttft_ms_p50": 36.6,
+              "ttft_ms_p99": 48.4
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 663.9,
+              "tokens_out": 39837,
+              "tokens_in": 0,
+              "requests_completed": 220,
+              "ttft_ms_p50": 37.0,
+              "ttft_ms_p99": 52.1
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 662.6,
+              "tokens_out": 39752,
+              "tokens_in": 0,
+              "requests_completed": 223,
+              "ttft_ms_p50": 36.5,
+              "ttft_ms_p99": 53.4
+            }
+          ]
+        }
+      ]
+    }
+  },
+  "accuracy": null,
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "10:16:44",
+    "run_id": "4c65fcfb",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T10:11:59.573174+00:00",
+    "benchmark_end_time": "2026-04-29T10:16:44.166472+00:00",
+    "benchmark_elapsed_minutes": 85.7,
+    "model_load_seconds": 63.0,
+    "benchmark_elapsed_minutes_note": "Sum of per-precision benchmark_elapsed_minutes (excludes sleep gaps and orchestrator overhead).",
+    "scenario_dirs": {
+      "bf16/offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/offline",
+      "bf16/online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/online",
+      "bf16/sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/bf16/sustained",
+      "fp8/offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/fp8/offline",
+      "fp8/online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/fp8/online",
+      "fp8/sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/fp8/sustained",
+      "w8a8/offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a8/offline",
+      "w8a8/online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a8/online",
+      "w8a8/sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a8/sustained",
+      "w8a16/offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/offline",
+      "w8a16/online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/online",
+      "w8a16/sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/sustained",
+      "w4a16/offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/offline",
+      "w4a16/online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/online",
+      "w4a16/sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/sustained"
+    },
+    "precision_dirs": {
+      "BF16": "bf16",
+      "FP8": "fp8",
+      "W8A8": "w8a8",
+      "W8A16": "w8a16",
+      "W4A16": "w4a16"
+    },
+    "precision_model_map": {
+      "BF16": {
+        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+        "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+        "dtype_override": "bfloat16"
+      },
+      "FP8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+        "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "Static per-tensor FP8 (weights + activations). Requires Ampere+ (A100, A800, H20). Skipped automatically on FP16-only hardware."
+      },
+      "W8A8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights + INT8 activations via compressed-tensors. Exercises native int8 tensor cores."
+      },
+      "W8A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+        "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights, FP16 activations. Weight-only quantization — reduces memory bandwidth, not compute dtype."
+      },
+      "W4A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+        "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+        "engine_kwargs": {
+          "quantization": "gptq"
+        },
+        "_note": "INT4 weights, FP16 activations via GPTQ Marlin kernels. Weight-only quantization — larger memory saving than W8A16."
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/accuracy/accuracy.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/accuracy/accuracy.json
new file mode 100644
index 00000000..e2c86fd4
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.57,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W4A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/offline/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/offline/result.json
new file mode 100644
index 00000000..bb8d938c
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1116.9,
+          "throughput_tokens_per_sec_per_chip": 1116.9,
+          "elapsed_seconds_median": 31.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1115.69,
+          "throughput_tokens_per_sec_per_chip": 1115.69,
+          "elapsed_seconds_median": 31.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1120.82,
+          "throughput_tokens_per_sec_per_chip": 1120.82,
+          "elapsed_seconds_median": 31.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1117.8,
+          "throughput_tokens_per_sec_per_chip": 1117.8,
+          "elapsed_seconds_median": 31.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "11:39:27",
+    "run_id": "5daf2609",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_5daf2609",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T11:31:05.380698+00:00",
+    "benchmark_end_time": "2026-04-29T11:39:27.782384+00:00",
+    "benchmark_elapsed_minutes": 8.4,
+    "model_load_seconds": 47.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/online/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/online/result.json
new file mode 100644
index 00000000..59f17301
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 92.08,
+          "ttft_ms_p90": 210.3,
+          "ttft_ms_p99": 1668.21,
+          "tpot_ms_p50": 53.34,
+          "tpot_ms_p90": 82.0,
+          "tpot_ms_p99": 90.25,
+          "elapsed_seconds_median": 69.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 91.46,
+          "ttft_ms_p90": 135.97,
+          "ttft_ms_p99": 154.85,
+          "tpot_ms_p50": 71.91,
+          "tpot_ms_p90": 77.39,
+          "tpot_ms_p99": 87.33,
+          "elapsed_seconds_median": 41.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 83.34,
+          "ttft_ms_p90": 140.37,
+          "ttft_ms_p99": 190.61,
+          "tpot_ms_p50": 85.26,
+          "tpot_ms_p90": 91.49,
+          "tpot_ms_p99": 109.69,
+          "elapsed_seconds_median": 28.4,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 75.89,
+          "ttft_ms_p90": 132.81,
+          "ttft_ms_p99": 204.46,
+          "tpot_ms_p50": 89.48,
+          "tpot_ms_p90": 98.79,
+          "tpot_ms_p99": 119.35,
+          "elapsed_seconds_median": 24.4,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "11:48:47",
+    "run_id": "5daf2609",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_5daf2609",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T11:40:35.967057+00:00",
+    "benchmark_end_time": "2026-04-29T11:48:47.468382+00:00",
+    "benchmark_elapsed_minutes": 8.2,
+    "model_load_seconds": 42.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/result.json
new file mode 100644
index 00000000..4948dcc4
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1116.9,
+          "throughput_tokens_per_sec_per_chip": 1116.9,
+          "elapsed_seconds_median": 31.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1115.69,
+          "throughput_tokens_per_sec_per_chip": 1115.69,
+          "elapsed_seconds_median": 31.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1120.82,
+          "throughput_tokens_per_sec_per_chip": 1120.82,
+          "elapsed_seconds_median": 31.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1117.8,
+          "throughput_tokens_per_sec_per_chip": 1117.8,
+          "elapsed_seconds_median": 31.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 92.08,
+          "ttft_ms_p90": 210.3,
+          "ttft_ms_p99": 1668.21,
+          "tpot_ms_p50": 53.34,
+          "tpot_ms_p90": 82.0,
+          "tpot_ms_p99": 90.25,
+          "elapsed_seconds_median": 69.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 91.46,
+          "ttft_ms_p90": 135.97,
+          "ttft_ms_p99": 154.85,
+          "tpot_ms_p50": 71.91,
+          "tpot_ms_p90": 77.39,
+          "tpot_ms_p99": 87.33,
+          "elapsed_seconds_median": 41.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 83.34,
+          "ttft_ms_p90": 140.37,
+          "ttft_ms_p99": 190.61,
+          "tpot_ms_p50": 85.26,
+          "tpot_ms_p90": 91.49,
+          "tpot_ms_p99": 109.69,
+          "elapsed_seconds_median": 28.4,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 75.89,
+          "ttft_ms_p90": 132.81,
+          "ttft_ms_p99": 204.46,
+          "tpot_ms_p50": 89.48,
+          "tpot_ms_p90": 98.79,
+          "tpot_ms_p99": 119.35,
+          "elapsed_seconds_median": 24.4,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 564.4,
+          "tokens_out": 33886,
+          "tokens_in": 0,
+          "requests_completed": 192,
+          "ttft_ms_p50": 75.0,
+          "ttft_ms_p99": 2609.7
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.3,
+          "tokens_out": 39619,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 49.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.9,
+          "tokens_out": 39693,
+          "tokens_in": 0,
+          "requests_completed": 227,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.9,
+          "tokens_out": 39668,
+          "tokens_in": 0,
+          "requests_completed": 221,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.8,
+          "tokens_out": 39353,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 54.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 659.7,
+          "tokens_out": 39582,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.5,
+          "tokens_out": 39746,
+          "tokens_in": 0,
+          "requests_completed": 223,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.2,
+          "tokens_out": 39737,
+          "tokens_in": 0,
+          "requests_completed": 225,
+          "ttft_ms_p50": 36.7,
+          "ttft_ms_p99": 52.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.2,
+          "tokens_out": 39589,
+          "tokens_in": 0,
+          "requests_completed": 219,
+          "ttft_ms_p50": 36.7,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.2,
+          "tokens_out": 39332,
+          "tokens_in": 0,
+          "requests_completed": 222,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 50.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.2,
+          "tokens_out": 39602,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 36.5,
+          "ttft_ms_p99": 52.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.3,
+          "tokens_out": 39733,
+          "tokens_in": 0,
+          "requests_completed": 228,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 48.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.9,
+          "tokens_out": 39837,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.6,
+          "tokens_out": 39752,
+          "tokens_in": 0,
+          "requests_completed": 223,
+          "ttft_ms_p50": 36.5,
+          "ttft_ms_p99": 53.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 653.7,
+      "throttle_ratio": 0.85,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2556.3
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.57,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W4A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "11:39:27",
+    "run_id": "5daf2609",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_5daf2609",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T11:31:05.380698+00:00",
+    "benchmark_end_time": "2026-04-29T11:39:27.782384+00:00",
+    "benchmark_elapsed_minutes": 31.7,
+    "model_load_seconds": 47.3,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/offline",
+      "online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/online",
+      "sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/sustained/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/sustained/result.json
new file mode 100644
index 00000000..fb371e29
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w4a16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "auto",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 564.4,
+          "tokens_out": 33886,
+          "tokens_in": 0,
+          "requests_completed": 192,
+          "ttft_ms_p50": 75.0,
+          "ttft_ms_p99": 2609.7
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.3,
+          "tokens_out": 39619,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 49.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 661.9,
+          "tokens_out": 39693,
+          "tokens_in": 0,
+          "requests_completed": 227,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 53.5
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.9,
+          "tokens_out": 39668,
+          "tokens_in": 0,
+          "requests_completed": 221,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.8,
+          "tokens_out": 39353,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 37.1,
+          "ttft_ms_p99": 54.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 659.7,
+          "tokens_out": 39582,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.5,
+          "tokens_out": 39746,
+          "tokens_in": 0,
+          "requests_completed": 223,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 51.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.2,
+          "tokens_out": 39737,
+          "tokens_in": 0,
+          "requests_completed": 225,
+          "ttft_ms_p50": 36.7,
+          "ttft_ms_p99": 52.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.2,
+          "tokens_out": 39589,
+          "tokens_in": 0,
+          "requests_completed": 219,
+          "ttft_ms_p50": 36.7,
+          "ttft_ms_p99": 54.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.2,
+          "tokens_out": 39332,
+          "tokens_in": 0,
+          "requests_completed": 222,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 50.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 660.2,
+          "tokens_out": 39602,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 36.5,
+          "ttft_ms_p99": 52.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.3,
+          "tokens_out": 39733,
+          "tokens_in": 0,
+          "requests_completed": 228,
+          "ttft_ms_p50": 36.6,
+          "ttft_ms_p99": 48.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 663.9,
+          "tokens_out": 39837,
+          "tokens_in": 0,
+          "requests_completed": 220,
+          "ttft_ms_p50": 37.0,
+          "ttft_ms_p99": 52.1
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 662.6,
+          "tokens_out": 39752,
+          "tokens_in": 0,
+          "requests_completed": 223,
+          "ttft_ms_p50": 36.5,
+          "ttft_ms_p99": 53.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 653.7,
+      "throttle_ratio": 0.85,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2556.3
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "12:05:00",
+    "run_id": "5daf2609",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_5daf2609",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T11:49:56.190663+00:00",
+    "benchmark_end_time": "2026-04-29T12:05:00.544279+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 43.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/accuracy/accuracy.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/accuracy/accuracy.json
new file mode 100644
index 00000000..19c9f93b
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.58,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "W8A16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/offline/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/offline/result.json
new file mode 100644
index 00000000..7df3d894
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2231.0,
+          "throughput_tokens_per_sec_per_chip": 2231.0,
+          "elapsed_seconds_median": 15.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2228.35,
+          "throughput_tokens_per_sec_per_chip": 2228.35,
+          "elapsed_seconds_median": 15.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2225.95,
+          "throughput_tokens_per_sec_per_chip": 2225.95,
+          "elapsed_seconds_median": 15.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2221.05,
+          "throughput_tokens_per_sec_per_chip": 2221.05,
+          "elapsed_seconds_median": 16.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "11:01:14",
+    "run_id": "0ada63b0",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_0ada63b0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T10:56:51.187586+00:00",
+    "benchmark_end_time": "2026-04-29T11:01:14.359571+00:00",
+    "benchmark_elapsed_minutes": 4.4,
+    "model_load_seconds": 45.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/online/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/online/result.json
new file mode 100644
index 00000000..4d99f5f0
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/online/result.json
@@ -0,0 +1,180 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 49.64,
+          "ttft_ms_p90": 136.86,
+          "ttft_ms_p99": 2169.34,
+          "tpot_ms_p50": 17.11,
+          "tpot_ms_p90": 23.7,
+          "tpot_ms_p99": 30.35,
+          "elapsed_seconds_median": 66.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 49.95,
+          "ttft_ms_p90": 60.03,
+          "ttft_ms_p99": 69.32,
+          "tpot_ms_p50": 23.82,
+          "tpot_ms_p90": 26.2,
+          "tpot_ms_p99": 32.79,
+          "elapsed_seconds_median": 33.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 63.29,
+          "ttft_ms_p90": 112.98,
+          "ttft_ms_p99": 155.72,
+          "tpot_ms_p50": 64.65,
+          "tpot_ms_p90": 72.61,
+          "tpot_ms_p99": 94.42,
+          "elapsed_seconds_median": 23.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 58.06,
+          "ttft_ms_p90": 115.18,
+          "ttft_ms_p99": 187.15,
+          "tpot_ms_p50": 78.68,
+          "tpot_ms_p90": 89.12,
+          "tpot_ms_p99": 118.6,
+          "elapsed_seconds_median": 21.5,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "11:09:40",
+    "run_id": "0ada63b0",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_0ada63b0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T11:02:28.295508+00:00",
+    "benchmark_end_time": "2026-04-29T11:09:40.916415+00:00",
+    "benchmark_elapsed_minutes": 7.2,
+    "model_load_seconds": 46.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/result.json
new file mode 100644
index 00000000..2f8dfa86
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 2231.0,
+          "throughput_tokens_per_sec_per_chip": 2231.0,
+          "elapsed_seconds_median": 15.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 2228.35,
+          "throughput_tokens_per_sec_per_chip": 2228.35,
+          "elapsed_seconds_median": 15.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 2225.95,
+          "throughput_tokens_per_sec_per_chip": 2225.95,
+          "elapsed_seconds_median": 15.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 2221.05,
+          "throughput_tokens_per_sec_per_chip": 2221.05,
+          "elapsed_seconds_median": 16.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 50,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 49.64,
+          "ttft_ms_p90": 136.86,
+          "ttft_ms_p99": 2169.34,
+          "tpot_ms_p50": 17.11,
+          "tpot_ms_p90": 23.7,
+          "tpot_ms_p99": 30.35,
+          "elapsed_seconds_median": 66.4,
+          "sla_met": false
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 49.95,
+          "ttft_ms_p90": 60.03,
+          "ttft_ms_p99": 69.32,
+          "tpot_ms_p50": 23.82,
+          "tpot_ms_p90": 26.2,
+          "tpot_ms_p99": 32.79,
+          "elapsed_seconds_median": 33.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 63.29,
+          "ttft_ms_p90": 112.98,
+          "ttft_ms_p99": 155.72,
+          "tpot_ms_p50": 64.65,
+          "tpot_ms_p90": 72.61,
+          "tpot_ms_p99": 94.42,
+          "elapsed_seconds_median": 23.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 58.06,
+          "ttft_ms_p90": 115.18,
+          "ttft_ms_p99": 187.15,
+          "tpot_ms_p50": 78.68,
+          "tpot_ms_p90": 89.12,
+          "tpot_ms_p99": 118.6,
+          "elapsed_seconds_median": 21.5,
+          "sla_met": true
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 461.1,
+          "tokens_out": 27681,
+          "tokens_in": 0,
+          "requests_completed": 153,
+          "ttft_ms_p50": 69.6,
+          "ttft_ms_p99": 2715.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 528.2,
+          "tokens_out": 31687,
+          "tokens_in": 0,
+          "requests_completed": 171,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 60.4
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 533.2,
+          "tokens_out": 31998,
+          "tokens_in": 0,
+          "requests_completed": 173,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 534.2,
+          "tokens_out": 32045,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.7,
+          "ttft_ms_p99": 60.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 532.3,
+          "tokens_out": 31952,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.6,
+          "ttft_ms_p99": 60.9
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 524.2,
+          "tokens_out": 31451,
+          "tokens_in": 0,
+          "requests_completed": 168,
+          "ttft_ms_p50": 43.5,
+          "ttft_ms_p99": 62.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 531.8,
+          "tokens_out": 31893,
+          "tokens_in": 0,
+          "requests_completed": 176,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 61.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 534.3,
+          "tokens_out": 32076,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 59.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 524.9,
+          "tokens_out": 31483,
+          "tokens_in": 0,
+          "requests_completed": 173,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 52.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 528.5,
+          "tokens_out": 31707,
+          "tokens_in": 0,
+          "requests_completed": 169,
+          "ttft_ms_p50": 43.4,
+          "ttft_ms_p99": 49.5
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 539.0,
+          "tokens_out": 32351,
+          "tokens_in": 0,
+          "requests_completed": 177,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 60.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 529.6,
+          "tokens_out": 31772,
+          "tokens_in": 0,
+          "requests_completed": 172,
+          "ttft_ms_p50": 43.4,
+          "ttft_ms_p99": 53.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 531.3,
+          "tokens_out": 31875,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 61.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 533.6,
+          "tokens_out": 32030,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 61.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 526.2,
+      "throttle_ratio": 0.855,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2654.1
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.58,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "W8A16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "11:01:14",
+    "run_id": "0ada63b0",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_0ada63b0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T10:56:51.187586+00:00",
+    "benchmark_end_time": "2026-04-29T11:01:14.359571+00:00",
+    "benchmark_elapsed_minutes": 26.7,
+    "model_load_seconds": 45.7,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/offline",
+      "online": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/online",
+      "sustained": "results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/sustained/result.json b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/sustained/result.json
new file mode 100644
index 00000000..b3c62f33
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_4c65fcfb/w8a16/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-29T10:07:24.359391+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "auto",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 461.1,
+          "tokens_out": 27681,
+          "tokens_in": 0,
+          "requests_completed": 153,
+          "ttft_ms_p50": 69.6,
+          "ttft_ms_p99": 2715.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 528.2,
+          "tokens_out": 31687,
+          "tokens_in": 0,
+          "requests_completed": 171,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 60.4
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 533.2,
+          "tokens_out": 31998,
+          "tokens_in": 0,
+          "requests_completed": 173,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 534.2,
+          "tokens_out": 32045,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.7,
+          "ttft_ms_p99": 60.7
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 532.3,
+          "tokens_out": 31952,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.6,
+          "ttft_ms_p99": 60.9
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 524.2,
+          "tokens_out": 31451,
+          "tokens_in": 0,
+          "requests_completed": 168,
+          "ttft_ms_p50": 43.5,
+          "ttft_ms_p99": 62.5
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 531.8,
+          "tokens_out": 31893,
+          "tokens_in": 0,
+          "requests_completed": 176,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 61.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 534.3,
+          "tokens_out": 32076,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 59.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 524.9,
+          "tokens_out": 31483,
+          "tokens_in": 0,
+          "requests_completed": 173,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 52.6
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 528.5,
+          "tokens_out": 31707,
+          "tokens_in": 0,
+          "requests_completed": 169,
+          "ttft_ms_p50": 43.4,
+          "ttft_ms_p99": 49.5
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 539.0,
+          "tokens_out": 32351,
+          "tokens_in": 0,
+          "requests_completed": 177,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 60.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 529.6,
+          "tokens_out": 31772,
+          "tokens_in": 0,
+          "requests_completed": 172,
+          "ttft_ms_p50": 43.4,
+          "ttft_ms_p99": 53.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 531.3,
+          "tokens_out": 31875,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 61.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 533.6,
+          "tokens_out": 32030,
+          "tokens_in": 0,
+          "requests_completed": 174,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 61.0
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 526.2,
+      "throttle_ratio": 0.855,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -2654.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-29",
+    "time": "11:26:04",
+    "run_id": "0ada63b0",
+    "run_name": "nvidia_rtx_a6000x1_suite_C_nvidia_sglang_c43a8309_0ada63b0",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-29T11:10:58.792403+00:00",
+    "benchmark_end_time": "2026-04-29T11:26:04.147313+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 50.4
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/accuracy/accuracy.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/accuracy/accuracy.json
new file mode 100644
index 00000000..fae398ca
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.57,
+  "baseline_delta": 0.01,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/env_info.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/env_info.json
new file mode 100644
index 00000000..3e4ec07e
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-04-30T07:51:16.080658+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA RTX A6000",
+      "vendor": "NVIDIA",
+      "memory_gb": 48.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.6",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPIX\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPIX\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+    "physical_cores": 76,
+    "logical_cores": 152,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.5,
+  "pcie_generation": "PCIe Gen 1",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/interactive/result.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/interactive/result.json
new file mode 100644
index 00000000..491039ca
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T07:51:16.080658+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPIX\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPIX\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 5946.25,
+      "ttft_ms_p90": 6219.82,
+      "ttft_ms_p99": 6310.43,
+      "tpot_ms_p50": 28.33,
+      "tpot_ms_p90": 28.52,
+      "tpot_ms_p99": 28.56,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 1230.5
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:19:14",
+    "run_id": "4974e840",
+    "run_name": "nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T08:38:13.612044+00:00",
+    "benchmark_end_time": "2026-04-30T09:19:14.591030+00:00",
+    "benchmark_elapsed_minutes": 41.0,
+    "model_load_seconds": 56.8
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/offline/result.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/offline/result.json
new file mode 100644
index 00000000..73cebf97
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/offline/result.json
@@ -0,0 +1,154 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T07:51:16.080658+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPIX\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPIX\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 31.41,
+          "throughput_tokens_per_sec_per_chip": 31.41,
+          "elapsed_seconds_median": 409.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 31.38,
+          "throughput_tokens_per_sec_per_chip": 31.38,
+          "elapsed_seconds_median": 409.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:36:51",
+    "run_id": "4974e840",
+    "run_name": "nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T07:55:51.379978+00:00",
+    "benchmark_end_time": "2026-04-30T08:36:51.414672+00:00",
+    "benchmark_elapsed_minutes": 41.0,
+    "model_load_seconds": 54.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/online/result.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/online/result.json
new file mode 100644
index 00000000..a60d7b8c
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/online/result.json
@@ -0,0 +1,168 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T07:51:16.080658+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPIX\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPIX\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 293244.79,
+          "ttft_ms_p90": 524501.19,
+          "ttft_ms_p99": 574243.71,
+          "tpot_ms_p50": 138.81,
+          "tpot_ms_p90": 212.31,
+          "tpot_ms_p99": 222.29,
+          "elapsed_seconds_median": 785.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 332319.49,
+          "ttft_ms_p90": 588621.66,
+          "ttft_ms_p99": 661976.7,
+          "tpot_ms_p50": 138.95,
+          "tpot_ms_p90": 212.28,
+          "tpot_ms_p99": 222.24,
+          "elapsed_seconds_median": 786.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 366180.69,
+          "ttft_ms_p90": 646640.94,
+          "ttft_ms_p99": 724071.12,
+          "tpot_ms_p50": 138.98,
+          "tpot_ms_p90": 212.36,
+          "tpot_ms_p99": 222.33,
+          "elapsed_seconds_median": 784.0,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "11:11:24",
+    "run_id": "4974e840",
+    "run_name": "nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T09:52:53.121061+00:00",
+    "benchmark_end_time": "2026-04-30T11:11:24.421043+00:00",
+    "benchmark_elapsed_minutes": 78.5,
+    "model_load_seconds": 52.9
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/result.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/result.json
new file mode 100644
index 00000000..d0761ba6
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/result.json
@@ -0,0 +1,551 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T07:51:16.080658+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPIX\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPIX\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "interactive",
+      "sustained",
+      "online",
+      "speculative"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 2,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 31.41,
+          "throughput_tokens_per_sec_per_chip": 31.41,
+          "elapsed_seconds_median": 409.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 31.38,
+          "throughput_tokens_per_sec_per_chip": 31.38,
+          "elapsed_seconds_median": 409.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 5946.25,
+      "ttft_ms_p90": 6219.82,
+      "ttft_ms_p99": 6310.43,
+      "tpot_ms_p50": 28.33,
+      "tpot_ms_p90": 28.52,
+      "tpot_ms_p99": 28.56,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 1230.5
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 26412.4,
+          "ttft_ms_p99": 43394.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29065.7,
+          "ttft_ms_p99": 61856.6
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 30309.3,
+          "ttft_ms_p99": 59762.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29560.5,
+          "ttft_ms_p99": 60419.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28043.7,
+          "ttft_ms_p99": 59182.7
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29340.5,
+          "ttft_ms_p99": 57627.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28189.4,
+          "ttft_ms_p99": 59639.5
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29448.8,
+          "ttft_ms_p99": 57746.5
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29401.0,
+          "ttft_ms_p99": 59412.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 30195.0,
+          "ttft_ms_p99": 59654.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 52.5,
+          "tokens_out": 3150,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 28708.4,
+          "ttft_ms_p99": 61710.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29261.0,
+          "ttft_ms_p99": 57521.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28671.7,
+          "ttft_ms_p99": 59379.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29328.2,
+          "ttft_ms_p99": 58135.7
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29288.6,
+          "ttft_ms_p99": 59155.2
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 30034.4,
+          "ttft_ms_p99": 59361.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29549.3,
+          "ttft_ms_p99": 60892.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 27805.1,
+          "ttft_ms_p99": 59434.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29057.9,
+          "ttft_ms_p99": 57483.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28687.2,
+          "ttft_ms_p99": 58861.7
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29461.9,
+          "ttft_ms_p99": 58274.5
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 52.5,
+          "tokens_out": 3150,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 29594.7,
+          "ttft_ms_p99": 60361.7
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29759.0,
+          "ttft_ms_p99": 60532.0
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28981.0,
+          "ttft_ms_p99": 59814.7
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28908.4,
+          "ttft_ms_p99": 57867.1
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29645.0,
+          "ttft_ms_p99": 58891.7
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28959.1,
+          "ttft_ms_p99": 58359.7
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29113.1,
+          "ttft_ms_p99": 59004.5
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29754.0,
+          "ttft_ms_p99": 59176.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 28.1,
+      "throttle_ratio": 0.499,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": -2679.9
+    },
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 293244.79,
+          "ttft_ms_p90": 524501.19,
+          "ttft_ms_p99": 574243.71,
+          "tpot_ms_p50": 138.81,
+          "tpot_ms_p90": 212.31,
+          "tpot_ms_p99": 222.29,
+          "elapsed_seconds_median": 785.5,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 332319.49,
+          "ttft_ms_p90": 588621.66,
+          "ttft_ms_p99": 661976.7,
+          "tpot_ms_p50": 138.95,
+          "tpot_ms_p90": 212.28,
+          "tpot_ms_p99": 222.24,
+          "elapsed_seconds_median": 786.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 366180.69,
+          "ttft_ms_p90": 646640.94,
+          "ttft_ms_p99": 724071.12,
+          "tpot_ms_p50": 138.98,
+          "tpot_ms_p90": 212.36,
+          "tpot_ms_p99": 222.33,
+          "elapsed_seconds_median": 784.0,
+          "sla_met": false
+        }
+      ]
+    },
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 24.63,
+          "throughput_tokens_per_sec_per_chip": 24.63,
+          "elapsed_seconds_median": 521.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 24.62,
+          "throughput_tokens_per_sec_per_chip": 24.62,
+          "elapsed_seconds_median": 521.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.57,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "08:36:51",
+    "run_id": "4974e840",
+    "run_name": "nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T07:55:51.379978+00:00",
+    "benchmark_end_time": "2026-04-30T08:36:51.414672+00:00",
+    "benchmark_elapsed_minutes": 243.7,
+    "model_load_seconds": 54.5,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'interactive', 'sustained', 'online', 'speculative'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/offline",
+      "interactive": "results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/interactive",
+      "sustained": "results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/sustained",
+      "online": "results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/online",
+      "speculative": "results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/speculative"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/speculative/result.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/speculative/result.json
new file mode 100644
index 00000000..18217c34
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/speculative/result.json
@@ -0,0 +1,154 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T07:51:16.080658+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPIX\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPIX\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "speculative",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "speculative": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 24.63,
+          "throughput_tokens_per_sec_per_chip": 24.63,
+          "elapsed_seconds_median": 521.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 24.62,
+          "throughput_tokens_per_sec_per_chip": 24.62,
+          "elapsed_seconds_median": 521.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "12:07:30",
+    "run_id": "4974e840",
+    "run_name": "nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T11:15:15.959930+00:00",
+    "benchmark_end_time": "2026-04-30T12:07:30.303239+00:00",
+    "benchmark_elapsed_minutes": 52.2,
+    "model_load_seconds": 167.3
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/sustained/result.json b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/sustained/result.json
new file mode 100644
index 00000000..3afc6054
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840/sustained/result.json
@@ -0,0 +1,428 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-04-30T07:51:16.080658+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPIX\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPIX\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 26412.4,
+          "ttft_ms_p99": 43394.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29065.7,
+          "ttft_ms_p99": 61856.6
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 30309.3,
+          "ttft_ms_p99": 59762.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29560.5,
+          "ttft_ms_p99": 60419.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28043.7,
+          "ttft_ms_p99": 59182.7
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29340.5,
+          "ttft_ms_p99": 57627.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28189.4,
+          "ttft_ms_p99": 59639.5
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29448.8,
+          "ttft_ms_p99": 57746.5
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29401.0,
+          "ttft_ms_p99": 59412.3
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 30195.0,
+          "ttft_ms_p99": 59654.7
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 52.5,
+          "tokens_out": 3150,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 28708.4,
+          "ttft_ms_p99": 61710.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29261.0,
+          "ttft_ms_p99": 57521.0
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28671.7,
+          "ttft_ms_p99": 59379.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29328.2,
+          "ttft_ms_p99": 58135.7
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29288.6,
+          "ttft_ms_p99": 59155.2
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 30034.4,
+          "ttft_ms_p99": 59361.4
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29549.3,
+          "ttft_ms_p99": 60892.2
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 27805.1,
+          "ttft_ms_p99": 59434.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29057.9,
+          "ttft_ms_p99": 57483.4
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28687.2,
+          "ttft_ms_p99": 58861.7
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29461.9,
+          "ttft_ms_p99": 58274.5
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 52.5,
+          "tokens_out": 3150,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 29594.7,
+          "ttft_ms_p99": 60361.7
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29759.0,
+          "ttft_ms_p99": 60532.0
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28981.0,
+          "ttft_ms_p99": 59814.7
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28908.4,
+          "ttft_ms_p99": 57867.1
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29645.0,
+          "ttft_ms_p99": 58891.7
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 28959.1,
+          "ttft_ms_p99": 58359.7
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.3,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29113.1,
+          "ttft_ms_p99": 59004.5
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 26.2,
+          "tokens_out": 1575,
+          "tokens_in": 0,
+          "requests_completed": 7,
+          "ttft_ms_p50": 29754.0,
+          "ttft_ms_p99": 59176.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 28.1,
+      "throttle_ratio": 0.499,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": -2679.9
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-04-30",
+    "time": "09:51:34",
+    "run_id": "4974e840",
+    "run_name": "nvidia_rtx_a6000x1_suite_D_nvidia_sglang_c43a8309_4974e840",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-04-30T09:20:34.605248+00:00",
+    "benchmark_end_time": "2026-04-30T09:51:34.624450+00:00",
+    "benchmark_elapsed_minutes": 31.0,
+    "model_load_seconds": 53.7
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/accuracy/accuracy.json b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/accuracy/accuracy.json
new file mode 100644
index 00000000..8825ca96
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.38,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "SGLang",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same SGLang instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/env_info.json b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/env_info.json
new file mode 100644
index 00000000..a6bc323d
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/env_info.json
@@ -0,0 +1,53 @@
+{
+  "collected_at": "2026-05-06T11:15:40.197436+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA RTX A6000",
+      "vendor": "NVIDIA",
+      "memory_gb": 48.0,
+      "driver_version": "565.57.01",
+      "firmware_version": null,
+      "compute_capability": "8.6",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+    "physical_cores": 76,
+    "logical_cores": 152,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.5,
+  "pcie_generation": "PCIe Gen 1",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_3",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.10.20",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 12.8",
+  "pytorch_version": "2.9.1+cu128"
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/interactive/result.json b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/interactive/result.json
new file mode 100644
index 00000000..b9400a20
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/interactive/result.json
@@ -0,0 +1,136 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-06T11:15:40.197436+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 17.75,
+      "ttft_ms_p90": 19.4,
+      "ttft_ms_p99": 25.25,
+      "tpot_ms_p50": 2.22,
+      "tpot_ms_p90": 2.25,
+      "tpot_ms_p99": 2.33,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 65.7
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-06",
+    "time": "11:27:14",
+    "run_id": "68ab5b50",
+    "run_name": "nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-06T11:23:54.684627+00:00",
+    "benchmark_end_time": "2026-05-06T11:27:14.782883+00:00",
+    "benchmark_elapsed_minutes": 3.3,
+    "model_load_seconds": 38.5
+  }
+}
\ No newline at end of file
diff --git a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/offline/result.json b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/offline/result.json
similarity index 53%
rename from results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/offline/result.json
rename to results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/offline/result.json
index 24981672..81aa678f 100644
--- a/results/community/mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d/offline/result.json
+++ b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/offline/result.json
@@ -1,39 +1,39 @@
 {
   "schema_version": "1.0",
   "suite_id": "suite_F",
-  "implementation_id": "moorethreads_vllm_musa_f2f6f965",
+  "implementation_id": "nvidia_sglang_c43a8309",
   "chip": {
-    "name": "MTT S4000",
-    "vendor": "Moore Threads",
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
     "count": 1,
     "memory_gb": 48.0,
     "interconnect_intra_node": null,
     "interconnect_inter_node": null
   },
   "environment": {
-    "collected_at": "2026-05-18T08:40:55.208034+00:00",
+    "collected_at": "2026-05-06T11:15:40.197436+00:00",
     "accelerators": [
       {
         "index": 0,
-        "name": "MTT S4000",
-        "vendor": "Moore Threads",
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
         "memory_gb": 48.0,
-        "driver_version": "2.7.0",
+        "driver_version": "565.57.01",
         "firmware_version": null,
+        "compute_capability": "8.6",
         "supports_bf16": true
       }
     ],
-    "accelerator_platform": "moorethreads",
-    "accelerator_topology": null,
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
     "intra_node_interconnect": null,
     "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6430",
-      "physical_cores": 64,
-      "logical_cores": 128,
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
       "numa_nodes": 2
     },
     "system_memory_gb": 1007.5,
-    "pcie_generation": "PCIe 16x/16x",
+    "pcie_generation": "PCIe Gen 1",
     "cpu_accelerator_bandwidth_gbs": null,
     "network_interfaces": [
       {
@@ -47,24 +47,29 @@
         "bandwidth_gbps": null
       },
       {
-        "name": "mlx5_bond_0",
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
         "type": "InfiniBand/RoCE",
         "bandwidth_gbps": null
       }
     ],
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8",
-    "kernel_version": "5.15.0-105-generic",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "pytorch_version": "2.2.0"
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
   },
   "software": {
-    "framework": "vllm-musa",
-    "framework_version": "0.4.2",
-    "driver_version": "2.7.0",
-    "runtime_version": "Moore Threads Driver 2.7.0",
-    "os": "Ubuntu Jammy Jellyfish (development branch)",
-    "python_version": "3.10.8"
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
   },
   "model": {
     "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
@@ -75,7 +80,7 @@
     "architecture": "dense",
     "parameter_count_b": 0.5,
     "precision": "BF16",
-    "effective_dtype": "float16",
+    "effective_dtype": "bfloat16",
     "quantization_method": null,
     "model_format": "HuggingFace original"
   },
@@ -97,10 +102,9 @@
       "results_by_concurrency": [
         {
           "client_concurrency": 4,
-          "throughput_tokens_per_sec": 1994.51,
-          "throughput_tokens_per_sec_per_chip": 1994.51,
-          "throughput_tokens_per_sec_total": 3642.41,
-          "elapsed_seconds_median": 12.5,
+          "throughput_tokens_per_sec": 10477.62,
+          "throughput_tokens_per_sec_per_chip": 10477.62,
+          "elapsed_seconds_median": 4.0,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -110,10 +114,9 @@
         },
         {
           "client_concurrency": 16,
-          "throughput_tokens_per_sec": 1998.44,
-          "throughput_tokens_per_sec_per_chip": 1998.44,
-          "throughput_tokens_per_sec_total": 3649.59,
-          "elapsed_seconds_median": 12.5,
+          "throughput_tokens_per_sec": 11554.68,
+          "throughput_tokens_per_sec_per_chip": 11554.68,
+          "elapsed_seconds_median": 3.6,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -123,10 +126,9 @@
         },
         {
           "client_concurrency": 64,
-          "throughput_tokens_per_sec": 2004.02,
-          "throughput_tokens_per_sec_per_chip": 2004.02,
-          "throughput_tokens_per_sec_total": 3659.77,
-          "elapsed_seconds_median": 12.5,
+          "throughput_tokens_per_sec": 11509.83,
+          "throughput_tokens_per_sec_per_chip": 11509.83,
+          "elapsed_seconds_median": 3.6,
           "peak_memory_gb": null,
           "power_watts_avg": null,
           "power_watts_peak": null,
@@ -144,21 +146,21 @@
     "notes": "Run --scenario accuracy to check model accuracy."
   },
   "meta": {
-    "submitted_by": "JuhaoLiang1997",
+    "submitted_by": "Gong-K",
     "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "16:48:27",
-    "run_id": "4f66d29d",
-    "run_name": "mtt_s4000x1_suite_F_moorethreads_vllm_musa_f2f6f965_4f66d29d",
+    "date": "2026-05-06",
+    "time": "11:19:35",
+    "run_id": "68ab5b50",
+    "run_name": "nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50",
     "flagged": null,
-    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
     "env_info_file": "../env_info.json",
     "log_file": "run.log",
     "samples_file": "samples.jsonl",
     "notes": null,
-    "benchmark_start_time": "2026-05-18T08:45:57.373367+00:00",
-    "benchmark_end_time": "2026-05-18T08:48:27.423209+00:00",
-    "benchmark_elapsed_minutes": 2.5,
-    "model_load_seconds": 146.8
+    "benchmark_start_time": "2026-05-06T11:18:46.260573+00:00",
+    "benchmark_end_time": "2026-05-06T11:19:35.049014+00:00",
+    "benchmark_elapsed_minutes": 0.8,
+    "model_load_seconds": 41.3
   }
 }
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/online/result.json b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/online/result.json
new file mode 100644
index 00000000..e69e1438
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/online/result.json
@@ -0,0 +1,156 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-06T11:15:40.197436+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 21.59,
+          "ttft_ms_p90": 34.08,
+          "ttft_ms_p99": 1763.06,
+          "tpot_ms_p50": 3.15,
+          "tpot_ms_p90": 3.63,
+          "tpot_ms_p99": 6.7,
+          "elapsed_seconds_median": 32.0,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 38.68,
+          "ttft_ms_p90": 45.67,
+          "ttft_ms_p99": 51.86,
+          "tpot_ms_p50": 21.19,
+          "tpot_ms_p90": 25.29,
+          "tpot_ms_p99": 33.55,
+          "elapsed_seconds_median": 10.0,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-06",
+    "time": "11:22:46",
+    "run_id": "68ab5b50",
+    "run_name": "nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-06T11:20:41.202166+00:00",
+    "benchmark_end_time": "2026-05-06T11:22:46.287116+00:00",
+    "benchmark_elapsed_minutes": 2.1,
+    "model_load_seconds": 37.6
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/result.json b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/result.json
new file mode 100644
index 00000000..4c34f2be
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/result.json
@@ -0,0 +1,371 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-06T11:15:40.197436+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 10477.62,
+          "throughput_tokens_per_sec_per_chip": 10477.62,
+          "elapsed_seconds_median": 4.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 11554.68,
+          "throughput_tokens_per_sec_per_chip": 11554.68,
+          "elapsed_seconds_median": 3.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 11509.83,
+          "throughput_tokens_per_sec_per_chip": 11509.83,
+          "elapsed_seconds_median": 3.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 21.59,
+          "ttft_ms_p90": 34.08,
+          "ttft_ms_p99": 1763.06,
+          "tpot_ms_p50": 3.15,
+          "tpot_ms_p90": 3.63,
+          "tpot_ms_p99": 6.7,
+          "elapsed_seconds_median": 32.0,
+          "sla_met": false
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 38.68,
+          "ttft_ms_p90": 45.67,
+          "ttft_ms_p99": 51.86,
+          "tpot_ms_p50": 21.19,
+          "tpot_ms_p90": 25.29,
+          "tpot_ms_p99": 33.55,
+          "elapsed_seconds_median": 10.0,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 17.75,
+      "ttft_ms_p90": 19.4,
+      "ttft_ms_p99": 25.25,
+      "tpot_ms_p50": 2.22,
+      "tpot_ms_p90": 2.25,
+      "tpot_ms_p99": 2.33,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 65.7
+    },
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 5992.8,
+          "tokens_out": 359668,
+          "tokens_in": 0,
+          "requests_completed": 1933,
+          "ttft_ms_p50": 28.4,
+          "ttft_ms_p99": 2491.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6297.3,
+          "tokens_out": 378010,
+          "tokens_in": 0,
+          "requests_completed": 2026,
+          "ttft_ms_p50": 28.1,
+          "ttft_ms_p99": 44.2
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6346.4,
+          "tokens_out": 380716,
+          "tokens_in": 0,
+          "requests_completed": 2040,
+          "ttft_ms_p50": 28.2,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6281.5,
+          "tokens_out": 376749,
+          "tokens_in": 0,
+          "requests_completed": 2020,
+          "ttft_ms_p50": 27.0,
+          "ttft_ms_p99": 44.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6323.2,
+          "tokens_out": 379405,
+          "tokens_in": 0,
+          "requests_completed": 2035,
+          "ttft_ms_p50": 28.2,
+          "ttft_ms_p99": 43.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6294.6,
+          "tokens_out": 377666,
+          "tokens_in": 0,
+          "requests_completed": 2028,
+          "ttft_ms_p50": 27.9,
+          "ttft_ms_p99": 43.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6314.8,
+          "tokens_out": 379013,
+          "tokens_in": 0,
+          "requests_completed": 2037,
+          "ttft_ms_p50": 27.6,
+          "ttft_ms_p99": 45.0
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6355.4,
+          "tokens_out": 381323,
+          "tokens_in": 0,
+          "requests_completed": 2042,
+          "ttft_ms_p50": 27.8,
+          "ttft_ms_p99": 45.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6282.5,
+          "tokens_out": 377022,
+          "tokens_in": 0,
+          "requests_completed": 2016,
+          "ttft_ms_p50": 27.6,
+          "ttft_ms_p99": 45.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6287.5,
+          "tokens_out": 377177,
+          "tokens_in": 0,
+          "requests_completed": 2026,
+          "ttft_ms_p50": 27.0,
+          "ttft_ms_p99": 43.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6325.7,
+          "tokens_out": 379662,
+          "tokens_in": 0,
+          "requests_completed": 2039,
+          "ttft_ms_p50": 28.2,
+          "ttft_ms_p99": 44.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6366.5,
+          "tokens_out": 381732,
+          "tokens_in": 0,
+          "requests_completed": 2046,
+          "ttft_ms_p50": 27.1,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6358.8,
+          "tokens_out": 381608,
+          "tokens_in": 0,
+          "requests_completed": 2043,
+          "ttft_ms_p50": 27.8,
+          "ttft_ms_p99": 43.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6361.9,
+          "tokens_out": 381743,
+          "tokens_in": 0,
+          "requests_completed": 2048,
+          "ttft_ms_p50": 27.1,
+          "ttft_ms_p99": 43.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 6299.2,
+      "throttle_ratio": 0.941,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2447.7
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.38,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "SGLang",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check — used same SGLang instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-06",
+    "time": "11:19:35",
+    "run_id": "68ab5b50",
+    "run_name": "nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-06T11:18:46.260573+00:00",
+    "benchmark_end_time": "2026-05-06T11:19:35.049014+00:00",
+    "benchmark_elapsed_minutes": 21.2,
+    "model_load_seconds": 41.3,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/offline",
+      "online": "results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/online",
+      "interactive": "results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/interactive",
+      "sustained": "results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/sustained/result.json b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/sustained/result.json
new file mode 100644
index 00000000..23e00da2
--- /dev/null
+++ b/results/community/nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50/sustained/result.json
@@ -0,0 +1,278 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_sglang_c43a8309",
+  "chip": {
+    "name": "NVIDIA RTX A6000",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 48.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-06T11:15:40.197436+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA RTX A6000",
+        "vendor": "NVIDIA",
+        "memory_gb": 48.0,
+        "driver_version": "565.57.01",
+        "firmware_version": null,
+        "compute_capability": "8.6",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tNIC3\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tNODE\tPXB\tSYS\tSYS\t0-37,76-113\t0\t\tN/A\nNIC0\tNODE\t X \tNODE\tSYS\tSYS\t\t\t\t\nNIC1\tPXB\tNODE\t X \tSYS\tSYS\t\t\t\t\nNIC2\tSYS\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC3\tSYS\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n  NIC3: mlx5_3\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
+      "physical_cores": 76,
+      "logical_cores": 152,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.5,
+    "pcie_generation": "PCIe Gen 1",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_3",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 12.8",
+    "pytorch_version": "2.9.1+cu128"
+  },
+  "software": {
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "driver_version": "565.57.01",
+    "runtime_version": "CUDA 12.8",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.10.20"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 5992.8,
+          "tokens_out": 359668,
+          "tokens_in": 0,
+          "requests_completed": 1933,
+          "ttft_ms_p50": 28.4,
+          "ttft_ms_p99": 2491.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6297.3,
+          "tokens_out": 378010,
+          "tokens_in": 0,
+          "requests_completed": 2026,
+          "ttft_ms_p50": 28.1,
+          "ttft_ms_p99": 44.2
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6346.4,
+          "tokens_out": 380716,
+          "tokens_in": 0,
+          "requests_completed": 2040,
+          "ttft_ms_p50": 28.2,
+          "ttft_ms_p99": 44.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6281.5,
+          "tokens_out": 376749,
+          "tokens_in": 0,
+          "requests_completed": 2020,
+          "ttft_ms_p50": 27.0,
+          "ttft_ms_p99": 44.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6323.2,
+          "tokens_out": 379405,
+          "tokens_in": 0,
+          "requests_completed": 2035,
+          "ttft_ms_p50": 28.2,
+          "ttft_ms_p99": 43.3
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6294.6,
+          "tokens_out": 377666,
+          "tokens_in": 0,
+          "requests_completed": 2028,
+          "ttft_ms_p50": 27.9,
+          "ttft_ms_p99": 43.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6314.8,
+          "tokens_out": 379013,
+          "tokens_in": 0,
+          "requests_completed": 2037,
+          "ttft_ms_p50": 27.6,
+          "ttft_ms_p99": 45.0
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6355.4,
+          "tokens_out": 381323,
+          "tokens_in": 0,
+          "requests_completed": 2042,
+          "ttft_ms_p50": 27.8,
+          "ttft_ms_p99": 45.2
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6282.5,
+          "tokens_out": 377022,
+          "tokens_in": 0,
+          "requests_completed": 2016,
+          "ttft_ms_p50": 27.6,
+          "ttft_ms_p99": 45.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6287.5,
+          "tokens_out": 377177,
+          "tokens_in": 0,
+          "requests_completed": 2026,
+          "ttft_ms_p50": 27.0,
+          "ttft_ms_p99": 43.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6325.7,
+          "tokens_out": 379662,
+          "tokens_in": 0,
+          "requests_completed": 2039,
+          "ttft_ms_p50": 28.2,
+          "ttft_ms_p99": 44.3
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6366.5,
+          "tokens_out": 381732,
+          "tokens_in": 0,
+          "requests_completed": 2046,
+          "ttft_ms_p50": 27.1,
+          "ttft_ms_p99": 44.5
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6358.8,
+          "tokens_out": 381608,
+          "tokens_in": 0,
+          "requests_completed": 2043,
+          "ttft_ms_p50": 27.8,
+          "ttft_ms_p99": 43.4
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 6361.9,
+          "tokens_out": 381743,
+          "tokens_in": 0,
+          "requests_completed": 2048,
+          "ttft_ms_p50": 27.1,
+          "ttft_ms_p99": 43.4
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 6299.2,
+      "throttle_ratio": 0.941,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -2447.7
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "Gong-K",
+    "submission_type": "individual",
+    "date": "2026-05-06",
+    "time": "11:43:22",
+    "run_id": "68ab5b50",
+    "run_name": "nvidia_rtx_a6000x1_suite_F_nvidia_sglang_c43a8309_68ab5b50",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-06T11:28:21.385518+00:00",
+    "benchmark_end_time": "2026-05-06T11:43:22.981588+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 39.2
+  }
+}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/accuracy/accuracy.json b/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/accuracy/accuracy.json
deleted file mode 100644
index 304c3db9..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/accuracy/accuracy.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "subset_score": 0.61,
-  "baseline_delta": null,
-  "valid": true,
-  "framework": "1Cat-vLLM",
-  "precision": "FP16",
-  "notes": "Integrated accuracy check \u2014 used same 1Cat-vLLM instance as benchmark."
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/env_info.json b/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/env_info.json
deleted file mode 100644
index 52c2fdcb..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/env_info.json
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-  "collected_at": "2026-05-18T09:38:50.346241+00:00",
-  "accelerators": [
-    {
-      "index": 0,
-      "name": "Tesla V100-PCIE-32GB",
-      "vendor": "NVIDIA",
-      "memory_gb": 32.0,
-      "driver_version": "580.82.07",
-      "firmware_version": null,
-      "compute_capability": "7.0",
-      "supports_bf16": false
-    }
-  ],
-  "accelerator_platform": "nvidia",
-  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
-  "intra_node_interconnect": null,
-  "cpu": {
-    "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-    "physical_cores": 26,
-    "logical_cores": 52,
-    "numa_nodes": 2
-  },
-  "system_memory_gb": 214.5,
-  "pcie_generation": "PCIe Gen 3",
-  "cpu_accelerator_bandwidth_gbs": null,
-  "network_interfaces": null,
-  "os": "Ubuntu 22.04.5 LTS",
-  "python_version": "3.12.13",
-  "kernel_version": "5.4.0-149-generic",
-  "runtime_version": "CUDA 12.8",
-  "pytorch_version": "2.9.1+cu128"
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/online/result.json b/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/online/result.json
deleted file mode 100644
index 66aeb486..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/online/result.json
+++ /dev/null
@@ -1,158 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_A",
-  "implementation_id": "nvidia_onecat_vllm_12a253c2",
-  "chip": {
-    "name": "Tesla V100-PCIE-32GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 32.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T09:38:50.346241+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "Tesla V100-PCIE-32GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 32.0,
-        "driver_version": "580.82.07",
-        "firmware_version": null,
-        "compute_capability": "7.0",
-        "supports_bf16": false
-      }
-    ],
-    "accelerator_platform": "nvidia",
-    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-      "physical_cores": 26,
-      "logical_cores": 52,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 214.5,
-    "pcie_generation": "PCIe Gen 3",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": null,
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13",
-    "kernel_version": "5.4.0-149-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "1Cat-vLLM",
-    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
-    "driver_version": "580.82.07",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13"
-  },
-  "model": {
-    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
-    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 8.0,
-    "precision": "FP16",
-    "effective_dtype": null,
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenario": "online",
-    "num_runs": 3,
-    "warmup_runs": 1,
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "extra_config": {
-      "tensor_parallel_size": 1,
-      "enforce_eager": false,
-      "max_num_seqs": 512,
-      "gpu_memory_utilization": 0.9,
-      "engine_kwargs": {
-        "enable_prefix_caching": false,
-        "enable_chunked_prefill": false,
-        "kv_cache_auto_trim_ratio": 0.0
-      }
-    },
-    "runtime_metrics": null
-  },
-  "metrics": {
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 0.0,
-      "results_by_qps": [
-        {
-          "target_qps": 5,
-          "achieved_qps": 5.0,
-          "ttft_ms_p50": 113119.0,
-          "ttft_ms_p90": 832380.28,
-          "ttft_ms_p99": 872316.46,
-          "tpot_ms_p50": 1274.2,
-          "tpot_ms_p90": 1801.34,
-          "tpot_ms_p99": 4289.09,
-          "elapsed_seconds_median": 968.7,
-          "sla_met": false
-        },
-        {
-          "target_qps": 25,
-          "achieved_qps": 25.0,
-          "ttft_ms_p50": 130646.03,
-          "ttft_ms_p90": 865522.04,
-          "ttft_ms_p99": 901339.26,
-          "tpot_ms_p50": 1262.15,
-          "tpot_ms_p90": 1785.02,
-          "tpot_ms_p99": 4287.18,
-          "elapsed_seconds_median": 936.5,
-          "sla_met": false
-        },
-        {
-          "target_qps": 100,
-          "achieved_qps": 100.0,
-          "ttft_ms_p50": 132710.0,
-          "ttft_ms_p90": 863880.66,
-          "ttft_ms_p99": 888527.06,
-          "tpot_ms_p50": 1248.86,
-          "tpot_ms_p90": 1740.58,
-          "tpot_ms_p99": 4225.34,
-          "elapsed_seconds_median": 921.5,
-          "sla_met": false
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": null,
-    "baseline_delta": null,
-    "valid": false,
-    "notes": "Run --scenario accuracy to check model accuracy."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "20:25:39",
-    "run_id": "4e0e6eba",
-    "run_name": "tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T10:04:46.235502+00:00",
-    "benchmark_end_time": "2026-05-18T12:25:39.450279+00:00",
-    "benchmark_elapsed_minutes": 140.9,
-    "model_load_seconds": 45.2
-  }
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/result.json b/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/result.json
deleted file mode 100644
index 07930da0..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/result.json
+++ /dev/null
@@ -1,210 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_A",
-  "implementation_id": "nvidia_onecat_vllm_12a253c2",
-  "chip": {
-    "name": "Tesla V100-PCIE-32GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 32.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T09:38:50.346241+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "Tesla V100-PCIE-32GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 32.0,
-        "driver_version": "580.82.07",
-        "firmware_version": null,
-        "compute_capability": "7.0",
-        "supports_bf16": false
-      }
-    ],
-    "accelerator_platform": "nvidia",
-    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-      "physical_cores": 26,
-      "logical_cores": 52,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 214.5,
-    "pcie_generation": "PCIe Gen 3",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": null,
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13",
-    "kernel_version": "5.4.0-149-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "1Cat-vLLM",
-    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
-    "driver_version": "580.82.07",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13"
-  },
-  "model": {
-    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
-    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 8.0,
-    "precision": "FP16",
-    "effective_dtype": "float16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenarios_run": [
-      "offline",
-      "online"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 3,
-    "extra_config": {
-      "tensor_parallel_size": 1,
-      "enforce_eager": false,
-      "max_num_seqs": 512,
-      "gpu_memory_utilization": 0.9,
-      "engine_kwargs": {
-        "enable_prefix_caching": false,
-        "enable_chunked_prefill": false,
-        "kv_cache_auto_trim_ratio": 0.0
-      }
-    }
-  },
-  "metrics": {
-    "derived": {},
-    "offline": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 8,
-          "throughput_tokens_per_sec": 671.32,
-          "throughput_tokens_per_sec_per_chip": 671.32,
-          "throughput_tokens_per_sec_total": 1168.67,
-          "elapsed_seconds_median": 51.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 32,
-          "throughput_tokens_per_sec": 670.99,
-          "throughput_tokens_per_sec_per_chip": 670.99,
-          "throughput_tokens_per_sec_total": 1168.09,
-          "elapsed_seconds_median": 51.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 128,
-          "throughput_tokens_per_sec": 671.43,
-          "throughput_tokens_per_sec_per_chip": 671.43,
-          "throughput_tokens_per_sec_total": 1168.44,
-          "elapsed_seconds_median": 51.6,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 0.0,
-      "results_by_qps": [
-        {
-          "target_qps": 5,
-          "achieved_qps": 5.0,
-          "ttft_ms_p50": 113119.0,
-          "ttft_ms_p90": 832380.28,
-          "ttft_ms_p99": 872316.46,
-          "tpot_ms_p50": 1274.2,
-          "tpot_ms_p90": 1801.34,
-          "tpot_ms_p99": 4289.09,
-          "elapsed_seconds_median": 968.7,
-          "sla_met": false
-        },
-        {
-          "target_qps": 25,
-          "achieved_qps": 25.0,
-          "ttft_ms_p50": 130646.03,
-          "ttft_ms_p90": 865522.04,
-          "ttft_ms_p99": 901339.26,
-          "tpot_ms_p50": 1262.15,
-          "tpot_ms_p90": 1785.02,
-          "tpot_ms_p99": 4287.18,
-          "elapsed_seconds_median": 936.5,
-          "sla_met": false
-        },
-        {
-          "target_qps": 100,
-          "achieved_qps": 100.0,
-          "ttft_ms_p50": 132710.0,
-          "ttft_ms_p90": 863880.66,
-          "ttft_ms_p99": 888527.06,
-          "tpot_ms_p50": 1248.86,
-          "tpot_ms_p90": 1740.58,
-          "tpot_ms_p99": 4225.34,
-          "elapsed_seconds_median": 921.5,
-          "sla_met": false
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": 0.61,
-    "baseline_delta": null,
-    "valid": true,
-    "framework": "1Cat-vLLM",
-    "precision": "FP16",
-    "notes": "Integrated accuracy check \u2014 used same 1Cat-vLLM instance as benchmark."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "18:03:39",
-    "run_id": "4e0e6eba",
-    "run_name": "tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T09:53:19.928949+00:00",
-    "benchmark_end_time": "2026-05-18T10:03:39.512440+00:00",
-    "benchmark_elapsed_minutes": 151.2,
-    "model_load_seconds": 47.8,
-    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online'] scenarios.",
-    "scenario_dirs": {
-      "offline": "results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/offline",
-      "online": "results/community/tesla_v100_pcie_32gbx1_suite_A_nvidia_onecat_vllm_12a253c2_4e0e6eba/online"
-    }
-  }
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/accuracy/accuracy.json b/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/accuracy/accuracy.json
deleted file mode 100644
index 94e55472..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/accuracy/accuracy.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "subset_score": 0.37,
-  "baseline_delta": 0.0,
-  "valid": true,
-  "framework": "1Cat-vLLM",
-  "precision": "FP16",
-  "notes": "Integrated accuracy check \u2014 used same 1Cat-vLLM instance as benchmark."
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/env_info.json b/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/env_info.json
deleted file mode 100644
index 1f8b6bd5..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/env_info.json
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-  "collected_at": "2026-05-18T12:26:03.593928+00:00",
-  "accelerators": [
-    {
-      "index": 0,
-      "name": "Tesla V100-PCIE-32GB",
-      "vendor": "NVIDIA",
-      "memory_gb": 32.0,
-      "driver_version": "580.82.07",
-      "firmware_version": null,
-      "compute_capability": "7.0",
-      "supports_bf16": false
-    }
-  ],
-  "accelerator_platform": "nvidia",
-  "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
-  "intra_node_interconnect": null,
-  "cpu": {
-    "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-    "physical_cores": 26,
-    "logical_cores": 52,
-    "numa_nodes": 2
-  },
-  "system_memory_gb": 214.5,
-  "pcie_generation": "PCIe Gen 3",
-  "cpu_accelerator_bandwidth_gbs": null,
-  "network_interfaces": null,
-  "os": "Ubuntu 22.04.5 LTS",
-  "python_version": "3.12.13",
-  "kernel_version": "5.4.0-149-generic",
-  "runtime_version": "CUDA 12.8",
-  "pytorch_version": "2.9.1+cu128"
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/interactive/result.json b/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/interactive/result.json
deleted file mode 100644
index f017bc27..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/interactive/result.json
+++ /dev/null
@@ -1,126 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_F",
-  "implementation_id": "nvidia_onecat_vllm_12a253c2",
-  "chip": {
-    "name": "Tesla V100-PCIE-32GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 32.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T12:26:03.593928+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "Tesla V100-PCIE-32GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 32.0,
-        "driver_version": "580.82.07",
-        "firmware_version": null,
-        "compute_capability": "7.0",
-        "supports_bf16": false
-      }
-    ],
-    "accelerator_platform": "nvidia",
-    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-      "physical_cores": 26,
-      "logical_cores": 52,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 214.5,
-    "pcie_generation": "PCIe Gen 3",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": null,
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13",
-    "kernel_version": "5.4.0-149-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "1Cat-vLLM",
-    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
-    "driver_version": "580.82.07",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13"
-  },
-  "model": {
-    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 0.5,
-    "precision": "FP16",
-    "effective_dtype": null,
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenario": "interactive",
-    "num_runs": 3,
-    "warmup_runs": 1,
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "extra_config": {
-      "tensor_parallel_size": 1,
-      "enforce_eager": false,
-      "max_num_seqs": 512,
-      "gpu_memory_utilization": 0.9,
-      "engine_kwargs": {
-        "enable_prefix_caching": false,
-        "enable_chunked_prefill": false,
-        "kv_cache_auto_trim_ratio": 0.0
-      }
-    },
-    "runtime_metrics": null
-  },
-  "metrics": {
-    "interactive": {
-      "ttft_ms_p50": 26.76,
-      "ttft_ms_p90": 29.57,
-      "ttft_ms_p99": 40.69,
-      "tpot_ms_p50": 3.51,
-      "tpot_ms_p90": 3.76,
-      "tpot_ms_p99": 3.81,
-      "peak_memory_gb": null,
-      "elapsed_seconds_median": 116.9
-    }
-  },
-  "accuracy": {
-    "subset_score": null,
-    "baseline_delta": null,
-    "valid": false,
-    "notes": "Run --scenario accuracy to check model accuracy."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "20:45:36",
-    "run_id": "419b138c",
-    "run_name": "tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T12:39:46.224469+00:00",
-    "benchmark_end_time": "2026-05-18T12:45:36.498231+00:00",
-    "benchmark_elapsed_minutes": 5.8,
-    "model_load_seconds": 27.8
-  }
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/online/result.json b/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/online/result.json
deleted file mode 100644
index 170f9d0d..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/online/result.json
+++ /dev/null
@@ -1,146 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_F",
-  "implementation_id": "nvidia_onecat_vllm_12a253c2",
-  "chip": {
-    "name": "Tesla V100-PCIE-32GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 32.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T12:26:03.593928+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "Tesla V100-PCIE-32GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 32.0,
-        "driver_version": "580.82.07",
-        "firmware_version": null,
-        "compute_capability": "7.0",
-        "supports_bf16": false
-      }
-    ],
-    "accelerator_platform": "nvidia",
-    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-      "physical_cores": 26,
-      "logical_cores": 52,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 214.5,
-    "pcie_generation": "PCIe Gen 3",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": null,
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13",
-    "kernel_version": "5.4.0-149-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "1Cat-vLLM",
-    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
-    "driver_version": "580.82.07",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13"
-  },
-  "model": {
-    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 0.5,
-    "precision": "FP16",
-    "effective_dtype": null,
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenario": "online",
-    "num_runs": 3,
-    "warmup_runs": 1,
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "extra_config": {
-      "tensor_parallel_size": 1,
-      "enforce_eager": false,
-      "max_num_seqs": 512,
-      "gpu_memory_utilization": 0.9,
-      "engine_kwargs": {
-        "enable_prefix_caching": false,
-        "enable_chunked_prefill": false,
-        "kv_cache_auto_trim_ratio": 0.0
-      }
-    },
-    "runtime_metrics": null
-  },
-  "metrics": {
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 0.0,
-      "results_by_qps": [
-        {
-          "target_qps": 10,
-          "achieved_qps": 10.0,
-          "ttft_ms_p50": 6316.13,
-          "ttft_ms_p90": 53409.43,
-          "ttft_ms_p99": 67932.56,
-          "tpot_ms_p50": 206.23,
-          "tpot_ms_p90": 291.3,
-          "tpot_ms_p99": 636.32,
-          "elapsed_seconds_median": 103.3,
-          "sla_met": false
-        },
-        {
-          "target_qps": 40,
-          "achieved_qps": 40.0,
-          "ttft_ms_p50": 19238.78,
-          "ttft_ms_p90": 56898.27,
-          "ttft_ms_p99": 75398.9,
-          "tpot_ms_p50": 189.24,
-          "tpot_ms_p90": 300.17,
-          "tpot_ms_p99": 582.22,
-          "elapsed_seconds_median": 86.3,
-          "sla_met": false
-        }
-      ]
-    }
-  },
-  "accuracy": {
-    "subset_score": null,
-    "baseline_delta": null,
-    "valid": false,
-    "notes": "Run --scenario accuracy to check model accuracy."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "20:38:56",
-    "run_id": "419b138c",
-    "run_name": "tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T12:29:46.673625+00:00",
-    "benchmark_end_time": "2026-05-18T12:38:56.798553+00:00",
-    "benchmark_elapsed_minutes": 9.2,
-    "model_load_seconds": 28.7
-  }
-}
\ No newline at end of file
diff --git a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/result.json b/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/result.json
deleted file mode 100644
index 12baab45..00000000
--- a/results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/result.json
+++ /dev/null
@@ -1,210 +0,0 @@
-{
-  "schema_version": "1.0",
-  "suite_id": "suite_F",
-  "implementation_id": "nvidia_onecat_vllm_12a253c2",
-  "chip": {
-    "name": "Tesla V100-PCIE-32GB",
-    "vendor": "NVIDIA",
-    "count": 1,
-    "memory_gb": 32.0,
-    "interconnect_intra_node": null,
-    "interconnect_inter_node": null
-  },
-  "environment": {
-    "collected_at": "2026-05-18T12:26:03.593928+00:00",
-    "accelerators": [
-      {
-        "index": 0,
-        "name": "Tesla V100-PCIE-32GB",
-        "vendor": "NVIDIA",
-        "memory_gb": 32.0,
-        "driver_version": "580.82.07",
-        "firmware_version": null,
-        "compute_capability": "7.0",
-        "supports_bf16": false
-      }
-    ],
-    "accelerator_platform": "nvidia",
-    "accelerator_topology": "\tGPU0\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \t0-25\t0\t\tN/A\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n",
-    "intra_node_interconnect": null,
-    "cpu": {
-      "model": "Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz",
-      "physical_cores": 26,
-      "logical_cores": 52,
-      "numa_nodes": 2
-    },
-    "system_memory_gb": 214.5,
-    "pcie_generation": "PCIe Gen 3",
-    "cpu_accelerator_bandwidth_gbs": null,
-    "network_interfaces": null,
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13",
-    "kernel_version": "5.4.0-149-generic",
-    "runtime_version": "CUDA 12.8",
-    "pytorch_version": "2.9.1+cu128"
-  },
-  "software": {
-    "framework": "1Cat-vLLM",
-    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
-    "driver_version": "580.82.07",
-    "runtime_version": "CUDA 12.8",
-    "os": "Ubuntu 22.04.5 LTS",
-    "python_version": "3.12.13"
-  },
-  "model": {
-    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_name": null,
-    "model_note": null,
-    "model_source": "local",
-    "architecture": "dense",
-    "parameter_count_b": 0.5,
-    "precision": "FP16",
-    "effective_dtype": "float16",
-    "quantization_method": null,
-    "model_format": "HuggingFace original"
-  },
-  "task": {
-    "scenarios_run": [
-      "offline",
-      "online",
-      "interactive"
-    ],
-    "parallelism": {
-      "tensor_parallel_size": 1,
-      "pipeline_parallel_size": 1,
-      "expert_parallel_size": 1,
-      "data_parallel_size": 1
-    },
-    "num_runs": 3,
-    "extra_config": {
-      "tensor_parallel_size": 1,
-      "enforce_eager": false,
-      "max_num_seqs": 512,
-      "gpu_memory_utilization": 0.9,
-      "engine_kwargs": {
-        "enable_prefix_caching": false,
-        "enable_chunked_prefill": false,
-        "kv_cache_auto_trim_ratio": 0.0
-      }
-    }
-  },
-  "metrics": {
-    "derived": {},
-    "offline": {
-      "results_by_concurrency": [
-        {
-          "client_concurrency": 4,
-          "throughput_tokens_per_sec": 6234.82,
-          "throughput_tokens_per_sec_per_chip": 6234.82,
-          "throughput_tokens_per_sec_total": 9303.11,
-          "elapsed_seconds_median": 6.8,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 16,
-          "throughput_tokens_per_sec": 6292.79,
-          "throughput_tokens_per_sec_per_chip": 6292.79,
-          "throughput_tokens_per_sec_total": 9356.18,
-          "elapsed_seconds_median": 6.7,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        },
-        {
-          "client_concurrency": 64,
-          "throughput_tokens_per_sec": 6243.51,
-          "throughput_tokens_per_sec_per_chip": 6243.51,
-          "throughput_tokens_per_sec_total": 9267.55,
-          "elapsed_seconds_median": 6.8,
-          "peak_memory_gb": null,
-          "power_watts_avg": null,
-          "power_watts_peak": null,
-          "oom": false,
-          "_throughput_note": "output_only",
-          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
-        }
-      ]
-    },
-    "online": {
-      "sla_ttft_ms": 500,
-      "max_valid_qps": 0.0,
-      "results_by_qps": [
-        {
-          "target_qps": 10,
-          "achieved_qps": 10.0,
-          "ttft_ms_p50": 6316.13,
-          "ttft_ms_p90": 53409.43,
-          "ttft_ms_p99": 67932.56,
-          "tpot_ms_p50": 206.23,
-          "tpot_ms_p90": 291.3,
-          "tpot_ms_p99": 636.32,
-          "elapsed_seconds_median": 103.3,
-          "sla_met": false
-        },
-        {
-          "target_qps": 40,
-          "achieved_qps": 40.0,
-          "ttft_ms_p50": 19238.78,
-          "ttft_ms_p90": 56898.27,
-          "ttft_ms_p99": 75398.9,
-          "tpot_ms_p50": 189.24,
-          "tpot_ms_p90": 300.17,
-          "tpot_ms_p99": 582.22,
-          "elapsed_seconds_median": 86.3,
-          "sla_met": false
-        }
-      ]
-    },
-    "interactive": {
-      "ttft_ms_p50": 26.76,
-      "ttft_ms_p90": 29.57,
-      "ttft_ms_p99": 40.69,
-      "tpot_ms_p50": 3.51,
-      "tpot_ms_p90": 3.76,
-      "tpot_ms_p99": 3.81,
-      "peak_memory_gb": null,
-      "elapsed_seconds_median": 116.9
-    }
-  },
-  "accuracy": {
-    "subset_score": 0.37,
-    "baseline_delta": 0.0,
-    "valid": true,
-    "framework": "1Cat-vLLM",
-    "precision": "FP16",
-    "notes": "Integrated accuracy check \u2014 used same 1Cat-vLLM instance as benchmark."
-  },
-  "meta": {
-    "submitted_by": "JuhaoLiang1997",
-    "submission_type": "individual",
-    "date": "2026-05-18",
-    "time": "20:28:55",
-    "run_id": "419b138c",
-    "run_name": "tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c",
-    "flagged": null,
-    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
-    "env_info_file": "../env_info.json",
-    "log_file": "run.log",
-    "samples_file": "samples.jsonl",
-    "notes": null,
-    "benchmark_start_time": "2026-05-18T12:27:34.502139+00:00",
-    "benchmark_end_time": "2026-05-18T12:28:55.745031+00:00",
-    "benchmark_elapsed_minutes": 16.4,
-    "model_load_seconds": 31.7,
-    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive'] scenarios.",
-    "scenario_dirs": {
-      "offline": "results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/offline",
-      "online": "results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/online",
-      "interactive": "results/community/tesla_v100_pcie_32gbx1_suite_F_nvidia_onecat_vllm_12a253c2_419b138c/interactive"
-    }
-  }
-}
\ No newline at end of file