From c9d84cff649328e39265efe067ffc08407a82425 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sun, 1 Mar 2026 08:12:24 +0100 Subject: [PATCH] ggml: add BF16/F16 support to snake and col2im_1d ops --- build.sh | 1 - buildtermux.sh | 8 ++ buildvulkan.sh | 8 ++ examples/full.json | 14 ++-- ggml | 2 +- src/vae.h | 26 +++++- tests/BF16.log | 8 +- tests/CPU_BF16.log | 166 ++++++++++++++++++------------------ tests/CPU_Q4_K_M.log | 178 ++++++++++++++++++++------------------- tests/CPU_Q5_K_M.log | 178 ++++++++++++++++++++------------------- tests/CPU_Q6_K.log | 180 ++++++++++++++++++++-------------------- tests/CPU_Q8_0.log | 172 +++++++++++++++++++------------------- tests/Q4_K_M.log | 8 +- tests/Q5_K_M.log | 8 +- tests/Q6_K.log | 8 +- tests/Q8_0.log | 8 +- tests/Vulkan_BF16.log | 90 ++++++++++---------- tests/Vulkan_Q4_K_M.log | 132 ++++++++++++++--------------- tests/Vulkan_Q5_K_M.log | 138 +++++++++++++++--------------- tests/Vulkan_Q6_K.log | 108 ++++++++++++------------ tests/Vulkan_Q8_0.log | 126 ++++++++++++++-------------- 21 files changed, 794 insertions(+), 773 deletions(-) create mode 100755 buildtermux.sh create mode 100755 buildvulkan.sh diff --git a/build.sh b/build.sh index 55a6ce4..67f711f 100755 --- a/build.sh +++ b/build.sh @@ -5,5 +5,4 @@ mkdir build cd build cmake .. -DGGML_CUDA=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -#cmake .. -DGGML_VULKAN=ON cmake --build . --config Release -j "$(nproc)" diff --git a/buildtermux.sh b/buildtermux.sh new file mode 100755 index 0000000..711d9e7 --- /dev/null +++ b/buildtermux.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +rm -rf build +mkdir build +cd build + +cmake .. -DGGML_BLAS=ON -DBLAS_INCLUDE_DIRS=$PREFIX/include/openblas +cmake --build . --config Release -j "$(nproc)" diff --git a/buildvulkan.sh b/buildvulkan.sh new file mode 100755 index 0000000..307e912 --- /dev/null +++ b/buildvulkan.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +rm -rf build +mkdir build +cd build + +cmake .. -DGGML_VULKAN=ON +cmake --build . --config Release -j "$(nproc)" diff --git a/examples/full.json b/examples/full.json index 2e91d11..670fffd 100644 --- a/examples/full.json +++ b/examples/full.json @@ -1,11 +1,9 @@ { - "caption": "Vibrant French house meets tech-house fusion track featuring filtered disco samples, driving funky basslines, and classic four-on-the-floor beats with signature Bob Sinclar vocal chops. Analytical yet euphoric mood blending advanced AI technical vocabulary with dancefloor energy. Instruments include talkbox lead vocals, analog Moog bass synths, glitchy arpeggiated sequencers, punchy TR-808 drum machine, and shimmering high-hat rolls.", - "lyrics": "[Intro - Filtered Disco Sample & Synth Arp]\n\n[Verse 1]\nOptimisation des poids synaptiques en temps réel\nRéseaux neuronaux convolutifs, profondeur idéale\nBackpropagation stochastique, gradient ajusté\nModèles GAN génératifs, data-set finalisé\n\n[Pre-Chorus]\nLatence zéro, flux continu\nL'IA évolue, circuit virtuel\n\n[Chorus - Talkbox Vocals]\nC'est l'ère de l'intelligence artificielle\nAlgorithmes de backpropagation dansent sur le beat\nDeep learning en action, réseau fully connected\nProcessing en temps réel, le futur est lancé !\n\n[Bridge - Synth Pad Build-Up]\nStochastic gradient descent, optimise le modèle\nEarly stopping activé, le réseau se stabilise\n\n[Outro - Vinyl Crackles & Fade Out]\nIA... Intelligence Artificielle...\nRéseaux neuronaux... dansent... forever...", + "caption": "Upbeat French house with infectious disco-inspired bassline, crisp four-on-the-floor kick pattern, wah-wah filtered guitar riffs, retro synth stabs, soulful male lead vocals with gospel-style backing harmonies, smooth saxophone accents, warm vinyl crackle texture, bright summer vibe, polished modern mix with vintage analog warmth, driving yet laid-back energy perfect for rooftop parties and sunset drives", + "lyrics": "[Intro - Ligne de Basse Funk & Beat House]\n\n[Verse 1]\nSous le soleil de Paris, on danse sans fin\nLa nuit s'allume, le beat nous guide\nLes étoiles scintillent au rythme du kick\nUn sourire léger, tout est si vivant\n\n[Pre-Chorus]\nLaisse-toi porter par la musique qui chante\n\n[Chorus]\nOn danse sous le ciel étoilé\nLe monde s'arrête, on s'envole\nAvec ce groove qui nous emporte\nJusqu'au matin, on ne s'arrête pas\n\n[Verse 2]\nLa ville respire au son des cuivres légers\nLes mains en l'air, on oublie le temps\nLa basse funk nous secoue les pieds\nUn été éternel, rien ne peut nous briser\n\n[Chorus]\nOn danse sous le ciel étoilé\nLe monde s'arrête, on s'envole\nAvec ce groove qui nous emporte\nJusqu'au matin, on ne s'arrête pas\n\n[Guitar Solo - Wah-Wah Funk]\n\n[Bridge - Saxophone & Cordes]\nRespire profondément, l'univers t'appelle\n\n[Outro - Synth Fade avec Craquement Vinyle]", + "duration": 240, "bpm": 124, - "duration": 220, - "keyscale": "F# minor", - "timesignature": "4", "vocal_language": "fr", - "inference_steps": 8, - "shift": 3.0 -} \ No newline at end of file + "keyscale": "F# major", + "timesignature": "4" +} diff --git a/ggml b/ggml index 20675b0..c04770a 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 20675b097204068916d88c6d0e2d5b12ee285c9e +Subproject commit c04770a7056267bf0264b7c96d34cd84b24b04e8 diff --git a/src/vae.h b/src/vae.h index 1211b09..bed531a 100644 --- a/src/vae.h +++ b/src/vae.h @@ -248,19 +248,27 @@ static void vae_ggml_load(VAEGGML * m, const char * path) { vae_load_snake_inv(m->sb, gf, "decoder.snake1.beta"); vae_fuse_wn(m->c2w, gf, "decoder.conv2"); - fprintf(stderr, "[VAE] Loaded: 5 blocks, upsample=1920x\n"); + fprintf(stderr, "[VAE] Loaded: 5 blocks, upsample=1920x, BF16 activations\n"); gf_close(&gf); } // Graph building // Snake activation (fused): y = x + sin^2(a * x) * inv_b // x: [T, C], exp_a: [1, C], inv_b: [1, C] (pre-computed at load) +// Casts to BF16 before snake, back to F32 after. static struct ggml_tensor * vae_snake( struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * exp_a, struct ggml_tensor * inv_b) { - return ggml_snake(ctx, x, exp_a, inv_b); + if (x->type == GGML_TYPE_F32) { + x = ggml_cast(ctx, x, GGML_TYPE_BF16); + } + x = ggml_snake(ctx, x, exp_a, inv_b); + if (x->type != GGML_TYPE_F32) { + x = ggml_cast(ctx, x, GGML_TYPE_F32); + } + return x; } // Conv1d + bias: data [T, IC] -> [T_out, OC] @@ -298,11 +306,21 @@ static struct ggml_tensor * vae_conv_t1d( // w: [IC, K*OC] xt: [IC, T_in] -> col: [K*OC, T_in] struct ggml_tensor * col = ggml_mul_mat(ctx, w, xt); - // Step 3: col2im - scatter-add columns to signal, fused padding crop + // Step 3: cast to BF16 before col2im_1d + if (col->type == GGML_TYPE_F32) { + col = ggml_cast(ctx, col, GGML_TYPE_BF16); + } + + // Step 4: col2im - scatter-add columns to signal, fused padding crop // [K*OC, T_in] -> [T_out, OC] where T_out = (T_in-1)*stride + K - 2*padding struct ggml_tensor * y = ggml_col2im_1d(ctx, col, stride, oc, padding); - // Step 4: Add bias + // Step 5: cast back to F32 + if (y->type != GGML_TYPE_F32) { + y = ggml_cast(ctx, y, GGML_TYPE_F32); + } + + // Step 6: Add bias if (b) { struct ggml_tensor * b2d = ggml_reshape_2d(ctx, b, 1, b->ne[0]); y = ggml_add(ctx, y, b2d); diff --git a/tests/BF16.log b/tests/BF16.log index 88ede30..7ea7d57 100644 --- a/tests/BF16.log +++ b/tests/BF16.log @@ -41,8 +41,8 @@ Using precomputed LM hints dit_step6_xt 0.988188 dit_step7_vt 0.969375 dit_x0 0.979213 - vae_audio 0.901411 - vae_audio (STFT cosine) 0.975533 + vae_audio 0.901377 + vae_audio (STFT cosine) 0.975525 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999945 0.135628 0.006709 -0.002312 0.972932 -0.002342 0.972003 @@ -114,8 +114,8 @@ Using precomputed LM hints dit_step49_vt_cond 0.983553 dit_step49_vt 0.924041 dit_x0 0.990243 - vae_audio 0.956365 - vae_audio (STFT cosine) 0.981932 + vae_audio 0.956370 + vae_audio (STFT cosine) 0.981929 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999998 0.038950 0.002063 -0.001725 0.980009 -0.001741 0.980402 diff --git a/tests/CPU_BF16.log b/tests/CPU_BF16.log index e378790..fcae074 100644 --- a/tests/CPU_BF16.log +++ b/tests/CPU_BF16.log @@ -1,6 +1,4 @@ [Request] Loaded request0.json -[Noise] Generating T=2200 (duration=88.0s)... -[Noise] Generated rng_philox_seed42.bf16: [2200, 64] bf16 (281600 bytes) [Turbo] steps=8, shift=3.0 | acestep-v15-turbo-BF16.gguf [GGML] Running acestep-v15-turbo-BF16.gguf... [GGML] Done, 47 dump files @@ -12,48 +10,48 @@ Using precomputed LM hints [Python] Done, 40 dump files [Turbo] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999809 + enc_hidden 0.999841 detok_output 0.999995 context 0.999997 noise 1.000000 temb_t 0.999999 hidden_after_proj_in 0.999988 - enc_after_cond_emb 0.999791 + enc_after_cond_emb 0.999832 layer0_sa_output 0.999960 - hidden_after_layer0 0.999981 + hidden_after_layer0 0.999982 hidden_after_layer6 0.999924 - hidden_after_layer12 0.999220 - hidden_after_layer18 0.996570 - hidden_after_layer23 0.993544 - dit_step0_vt 0.974748 - dit_step0_xt 0.999944 - dit_step1_vt 0.978754 - dit_step1_xt 0.999828 - dit_step2_vt 0.980585 - dit_step2_xt 0.999534 - dit_step3_vt 0.982107 - dit_step3_xt 0.998886 - dit_step4_vt 0.981219 - dit_step4_xt 0.997453 - dit_step5_vt 0.978640 - dit_step5_xt 0.994290 - dit_step6_vt 0.975385 - dit_step6_xt 0.988279 - dit_step7_vt 0.969594 - dit_x0 0.979396 - vae_audio 0.900831 - vae_audio (log spectral) 0.999827 + hidden_after_layer12 0.999332 + hidden_after_layer18 0.996692 + hidden_after_layer23 0.993786 + dit_step0_vt 0.975712 + dit_step0_xt 0.999946 + dit_step1_vt 0.979525 + dit_step1_xt 0.999833 + dit_step2_vt 0.981808 + dit_step2_xt 0.999552 + dit_step3_vt 0.982382 + dit_step3_xt 0.998917 + dit_step4_vt 0.980777 + dit_step4_xt 0.997480 + dit_step5_vt 0.978078 + dit_step5_xt 0.994264 + dit_step6_vt 0.974849 + dit_step6_xt 0.988142 + dit_step7_vt 0.969102 + dit_x0 0.979106 + vae_audio 0.901370 + vae_audio (STFT cosine) 0.975816 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999944 0.139834 0.006735 -0.002329 0.972955 -0.002342 0.972003 - dit_step1_xt 0.999828 0.271987 0.011491 -0.005329 0.942705 -0.005313 0.941730 - dit_step2_xt 0.999534 0.461474 0.017839 -0.009366 0.909252 -0.009311 0.908527 - dit_step3_xt 0.998886 0.660416 0.026355 -0.014729 0.873915 -0.014577 0.873624 - dit_step4_xt 0.997453 0.836992 0.038155 -0.021772 0.842138 -0.021660 0.841995 - dit_step5_xt 0.994290 1.265321 0.055698 -0.031831 0.825532 -0.032109 0.824593 - dit_step6_xt 0.988279 2.080415 0.082441 -0.046092 0.856505 -0.046482 0.855546 + dit_step0_xt 0.999946 0.136541 0.006626 -0.002312 0.972951 -0.002342 0.972003 + dit_step1_xt 0.999833 0.265486 0.011288 -0.005309 0.942692 -0.005313 0.941730 + dit_step2_xt 0.999552 0.451896 0.017477 -0.009347 0.909217 -0.009311 0.908527 + dit_step3_xt 0.998917 0.642624 0.025957 -0.014710 0.873863 -0.014577 0.873624 + dit_step4_xt 0.997480 0.778374 0.037868 -0.021751 0.842047 -0.021660 0.841995 + dit_step5_xt 0.994264 1.244624 0.055630 -0.031814 0.825360 -0.032109 0.824593 + dit_step6_xt 0.988142 2.080976 0.082605 -0.046091 0.856212 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-BF16.gguf [GGML] Running acestep-v15-sft-BF16.gguf... [GGML] Done, 233 dump files @@ -65,68 +63,68 @@ Using precomputed LM hints [Python] Done, 218 dump files [SFT] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999809 + enc_hidden 0.999841 detok_output 0.999995 context 0.999997 noise 1.000000 temb_t 0.999998 hidden_after_proj_in 0.999988 - enc_after_cond_emb 0.999794 + enc_after_cond_emb 0.999834 layer0_sa_output 0.999959 hidden_after_layer0 0.999984 - hidden_after_layer6 0.999852 + hidden_after_layer6 0.999851 hidden_after_layer12 0.999471 - hidden_after_layer18 0.998743 - hidden_after_layer23 0.998998 + hidden_after_layer18 0.998749 + hidden_after_layer23 0.998994 null_condition_emb 1.000000 null_enc_hidden 1.000000 - dit_step0_vt_cond 0.998968 - dit_step0_vt_uncond 0.998728 - dit_step0_vt 0.995811 + dit_step0_vt_cond 0.998963 + dit_step0_vt_uncond 0.998717 + dit_step0_vt 0.995766 dit_step0_xt 0.999998 - dit_step5_vt_cond 0.999488 - dit_step5_vt 0.993679 + dit_step5_vt_cond 0.999507 + dit_step5_vt 0.993884 dit_step5_xt 0.999963 - dit_step10_vt_cond 0.998829 - dit_step10_vt 0.993532 - dit_step10_xt 0.999888 - dit_step15_vt_cond 0.996990 - dit_step15_vt 0.985156 - dit_step15_xt 0.999658 - dit_step20_vt_cond 0.994347 - dit_step20_vt 0.979694 - dit_step20_xt 0.999039 - dit_step25_vt_cond 0.989302 - dit_step25_vt 0.969289 - dit_step25_xt 0.997768 - dit_step30_vt_cond 0.984077 - dit_step30_vt 0.964410 - dit_step30_xt 0.995793 - dit_step35_vt_cond 0.979244 - dit_step35_vt 0.955834 - dit_step35_xt 0.993373 - dit_step40_vt_cond 0.976554 - dit_step40_vt 0.950866 - dit_step40_xt 0.991062 - dit_step45_vt_cond 0.979301 - dit_step45_vt 0.952689 - dit_step45_xt 0.989446 - dit_step49_vt_cond 0.981868 - dit_step49_vt 0.921025 - dit_x0 0.988916 - vae_audio 0.947636 - vae_audio (log spectral) 0.999651 + dit_step10_vt_cond 0.998797 + dit_step10_vt 0.993423 + dit_step10_xt 0.999887 + dit_step15_vt_cond 0.997670 + dit_step15_vt 0.988372 + dit_step15_xt 0.999682 + dit_step20_vt_cond 0.995498 + dit_step20_vt 0.982137 + dit_step20_xt 0.999190 + dit_step25_vt_cond 0.991181 + dit_step25_vt 0.972161 + dit_step25_xt 0.998167 + dit_step30_vt_cond 0.986183 + dit_step30_vt 0.967394 + dit_step30_xt 0.996519 + dit_step35_vt_cond 0.981815 + dit_step35_vt 0.959696 + dit_step35_xt 0.994436 + dit_step40_vt_cond 0.979298 + dit_step40_vt 0.954151 + dit_step40_xt 0.992400 + dit_step45_vt_cond 0.981642 + dit_step45_vt 0.955459 + dit_step45_xt 0.990953 + dit_step49_vt_cond 0.982680 + dit_step49_vt 0.941788 + dit_x0 0.990427 + vae_audio 0.960778 + vae_audio (STFT cosine) 0.984703 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999998 0.038570 0.002031 -0.001742 0.980032 -0.001741 0.980402 - dit_step5_xt 0.999963 0.128356 0.005832 -0.006967 0.888993 -0.007143 0.887999 - dit_step10_xt 0.999888 0.221095 0.008891 -0.012449 0.810442 -0.012603 0.811299 - dit_step15_xt 0.999658 0.362084 0.013808 -0.017776 0.745381 -0.018114 0.745268 - dit_step20_xt 0.999039 0.559352 0.021038 -0.023205 0.699913 -0.023808 0.699582 - dit_step25_xt 0.997768 0.856427 0.030573 -0.028769 0.678624 -0.029311 0.679278 - dit_step30_xt 0.995793 1.124315 0.042139 -0.034362 0.684716 -0.035027 0.685262 - dit_step35_xt 0.993373 1.609566 0.055461 -0.039908 0.716931 -0.040716 0.717195 - dit_step40_xt 0.991062 2.075305 0.069778 -0.045420 0.770802 -0.046462 0.771853 - dit_step45_xt 0.989446 2.361056 0.083301 -0.051419 0.842083 -0.052475 0.843036 + dit_step0_xt 0.999998 0.038465 0.002037 -0.001739 0.980023 -0.001741 0.980402 + dit_step5_xt 0.999963 0.130767 0.005794 -0.006951 0.888986 -0.007143 0.887999 + dit_step10_xt 0.999887 0.230145 0.008907 -0.012421 0.810420 -0.012603 0.811299 + dit_step15_xt 0.999682 0.369882 0.013468 -0.017757 0.745283 -0.018114 0.745268 + dit_step20_xt 0.999190 0.439784 0.019899 -0.023189 0.699688 -0.023808 0.699582 + dit_step25_xt 0.998167 0.657918 0.028642 -0.028736 0.678283 -0.029311 0.679278 + dit_step30_xt 0.996519 1.070616 0.039415 -0.034342 0.684394 -0.035027 0.685262 + dit_step35_xt 0.994436 1.684599 0.051968 -0.039891 0.716568 -0.040716 0.717195 + dit_step40_xt 0.992400 2.115248 0.065570 -0.045402 0.770424 -0.046462 0.771853 + dit_step45_xt 0.990953 2.369087 0.078496 -0.051406 0.841668 -0.052475 0.843036 diff --git a/tests/CPU_Q4_K_M.log b/tests/CPU_Q4_K_M.log index 9181941..44fd5b2 100644 --- a/tests/CPU_Q4_K_M.log +++ b/tests/CPU_Q4_K_M.log @@ -1,6 +1,4 @@ [Request] Loaded request0.json -[Noise] Generating T=2200 (duration=88.0s)... -[Noise] Generated rng_philox_seed42.bf16: [2200, 64] bf16 (281600 bytes) [Turbo] steps=8, shift=3.0 | acestep-v15-turbo-Q4_K_M.gguf [GGML] Running acestep-v15-turbo-Q4_K_M.gguf... [GGML] Done, 47 dump files @@ -12,48 +10,48 @@ Using precomputed LM hints [Python] Done, 40 dump files [Turbo] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.997063 + enc_hidden 0.997095 detok_output 0.999577 context 0.999730 noise 1.000000 temb_t 0.999896 - hidden_after_proj_in 0.999902 - enc_after_cond_emb 0.997533 - layer0_sa_output 0.998371 - hidden_after_layer0 0.999615 - hidden_after_layer6 0.999233 - hidden_after_layer12 0.995253 - hidden_after_layer18 0.991409 - hidden_after_layer23 0.985132 - dit_step0_vt 0.946072 - dit_step0_xt 0.999882 - dit_step1_vt 0.948670 - dit_step1_xt 0.999617 - dit_step2_vt 0.959875 - dit_step2_xt 0.999031 - dit_step3_vt 0.963067 - dit_step3_xt 0.997805 - dit_step4_vt 0.961818 - dit_step4_xt 0.995068 - dit_step5_vt 0.959624 - dit_step5_xt 0.989091 - dit_step6_vt 0.954394 - dit_step6_xt 0.977947 - dit_step7_vt 0.943006 - dit_x0 0.961532 - vae_audio 0.844614 - vae_audio (log spectral) 0.999454 + hidden_after_proj_in 0.999903 + enc_after_cond_emb 0.997571 + layer0_sa_output 0.998370 + hidden_after_layer0 0.999619 + hidden_after_layer6 0.999177 + hidden_after_layer12 0.995111 + hidden_after_layer18 0.991459 + hidden_after_layer23 0.985217 + dit_step0_vt 0.946613 + dit_step0_xt 0.999883 + dit_step1_vt 0.947613 + dit_step1_xt 0.999611 + dit_step2_vt 0.958491 + dit_step2_xt 0.999010 + dit_step3_vt 0.962965 + dit_step3_xt 0.997773 + dit_step4_vt 0.960997 + dit_step4_xt 0.994989 + dit_step5_vt 0.957636 + dit_step5_xt 0.988832 + dit_step6_vt 0.952016 + dit_step6_xt 0.977196 + dit_step7_vt 0.939970 + dit_x0 0.959881 + vae_audio 0.834993 + vae_audio (STFT cosine) 0.955098 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999882 0.167055 0.010319 -0.002261 0.973189 -0.002342 0.972003 - dit_step1_xt 0.999617 0.260431 0.018015 -0.005142 0.943228 -0.005313 0.941730 - dit_step2_xt 0.999031 0.426887 0.027445 -0.009057 0.910254 -0.009311 0.908527 - dit_step3_xt 0.997805 0.600273 0.039583 -0.014366 0.875296 -0.014577 0.873624 - dit_step4_xt 0.995068 0.864324 0.056698 -0.021350 0.843715 -0.021660 0.841995 - dit_step5_xt 0.989091 1.350770 0.082194 -0.031344 0.827236 -0.032109 0.824593 - dit_step6_xt 0.977947 2.001190 0.121137 -0.045717 0.858307 -0.046482 0.855546 + dit_step0_xt 0.999883 0.167680 0.010319 -0.002256 0.973185 -0.002342 0.972003 + dit_step1_xt 0.999611 0.268237 0.018204 -0.005104 0.943179 -0.005313 0.941730 + dit_step2_xt 0.999010 0.434671 0.027774 -0.009029 0.910147 -0.009311 0.908527 + dit_step3_xt 0.997773 0.601206 0.039926 -0.014325 0.875171 -0.014577 0.873624 + dit_step4_xt 0.994989 0.892883 0.057385 -0.021274 0.843615 -0.021660 0.841995 + dit_step5_xt 0.988832 1.381146 0.083605 -0.031218 0.827061 -0.032109 0.824593 + dit_step6_xt 0.977196 2.021005 0.123750 -0.045473 0.858175 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q4_K_M.gguf [GGML] Running acestep-v15-sft-Q4_K_M.gguf... [GGML] Done, 233 dump files @@ -65,68 +63,68 @@ Using precomputed LM hints [Python] Done, 218 dump files [SFT] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.997063 + enc_hidden 0.997095 detok_output 0.999577 context 0.999730 noise 1.000000 temb_t 0.999645 hidden_after_proj_in 0.999904 - enc_after_cond_emb 0.997523 - layer0_sa_output 0.998514 - hidden_after_layer0 0.999613 - hidden_after_layer6 0.999089 - hidden_after_layer12 0.997602 - hidden_after_layer18 0.996736 - hidden_after_layer23 0.996899 + enc_after_cond_emb 0.997560 + layer0_sa_output 0.998513 + hidden_after_layer0 0.999624 + hidden_after_layer6 0.999091 + hidden_after_layer12 0.997675 + hidden_after_layer18 0.996682 + hidden_after_layer23 0.996897 null_condition_emb 1.000000 null_enc_hidden 1.000000 - dit_step0_vt_cond 0.996878 - dit_step0_vt_uncond 0.996136 - dit_step0_vt 0.990281 + dit_step0_vt_cond 0.996806 + dit_step0_vt_uncond 0.996163 + dit_step0_vt 0.990085 dit_step0_xt 0.999995 - dit_step5_vt_cond 0.995376 - dit_step5_vt 0.978180 + dit_step5_vt_cond 0.995410 + dit_step5_vt 0.978964 dit_step5_xt 0.999822 - dit_step10_vt_cond 0.991455 - dit_step10_vt 0.969954 - dit_step10_xt 0.999213 - dit_step15_vt_cond 0.981764 - dit_step15_vt 0.944797 - dit_step15_xt 0.997466 - dit_step20_vt_cond 0.967532 - dit_step20_vt 0.920176 - dit_step20_xt 0.993398 - dit_step25_vt_cond 0.950347 - dit_step25_vt 0.894558 - dit_step25_xt 0.986352 - dit_step30_vt_cond 0.930725 - dit_step30_vt 0.871103 - dit_step30_xt 0.976342 - dit_step35_vt_cond 0.910505 - dit_step35_vt 0.846070 - dit_step35_xt 0.965219 - dit_step40_vt_cond 0.898264 - dit_step40_vt 0.828032 - dit_step40_xt 0.955205 - dit_step45_vt_cond 0.909317 - dit_step45_vt 0.842699 - dit_step45_xt 0.948495 - dit_step49_vt_cond 0.927921 - dit_step49_vt 0.868889 - dit_x0 0.946312 - vae_audio 0.821255 - vae_audio (log spectral) 0.999479 + dit_step10_vt_cond 0.991521 + dit_step10_vt 0.970202 + dit_step10_xt 0.999221 + dit_step15_vt_cond 0.981975 + dit_step15_vt 0.945173 + dit_step15_xt 0.997485 + dit_step20_vt_cond 0.967221 + dit_step20_vt 0.918272 + dit_step20_xt 0.993402 + dit_step25_vt_cond 0.950021 + dit_step25_vt 0.894843 + dit_step25_xt 0.986289 + dit_step30_vt_cond 0.929833 + dit_step30_vt 0.870341 + dit_step30_xt 0.976182 + dit_step35_vt_cond 0.909548 + dit_step35_vt 0.845635 + dit_step35_xt 0.964963 + dit_step40_vt_cond 0.897534 + dit_step40_vt 0.827777 + dit_step40_xt 0.954875 + dit_step45_vt_cond 0.908619 + dit_step45_vt 0.841100 + dit_step45_xt 0.948114 + dit_step49_vt_cond 0.927278 + dit_step49_vt 0.867932 + dit_x0 0.945906 + vae_audio 0.825297 + vae_audio (STFT cosine) 0.924406 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999995 0.035463 0.002863 -0.001832 0.980372 -0.001741 0.980402 - dit_step5_xt 0.999822 0.191374 0.013093 -0.007283 0.890630 -0.007143 0.887999 - dit_step10_xt 0.999213 0.533874 0.024278 -0.012967 0.812531 -0.012603 0.811299 - dit_step15_xt 0.997466 0.841167 0.039325 -0.018588 0.747859 -0.018114 0.745268 - dit_step20_xt 0.993398 1.144428 0.058918 -0.024345 0.702154 -0.023808 0.699582 - dit_step25_xt 0.986352 1.529792 0.081732 -0.030250 0.679756 -0.029311 0.679278 - dit_step30_xt 0.976342 1.901214 0.108310 -0.036321 0.684271 -0.035027 0.685262 - dit_step35_xt 0.965219 2.233485 0.137544 -0.042375 0.714577 -0.040716 0.717195 - dit_step40_xt 0.955205 2.559541 0.168362 -0.048429 0.766980 -0.046462 0.771853 - dit_step45_xt 0.948495 2.888254 0.197775 -0.054749 0.837482 -0.052475 0.843036 + dit_step0_xt 0.999995 0.035570 0.002883 -0.001844 0.980345 -0.001741 0.980402 + dit_step5_xt 0.999822 0.188835 0.013032 -0.007303 0.890510 -0.007143 0.887999 + dit_step10_xt 0.999221 0.527206 0.024125 -0.012987 0.812393 -0.012603 0.811299 + dit_step15_xt 0.997485 0.839391 0.039117 -0.018648 0.747696 -0.018114 0.745268 + dit_step20_xt 0.993402 1.146206 0.058860 -0.024311 0.701939 -0.023808 0.699582 + dit_step25_xt 0.986289 1.528936 0.081899 -0.030231 0.679540 -0.029311 0.679278 + dit_step30_xt 0.976182 1.891257 0.108598 -0.036282 0.684111 -0.035027 0.685262 + dit_step35_xt 0.964963 2.208873 0.137902 -0.042366 0.714637 -0.040716 0.717195 + dit_step40_xt 0.954875 2.494038 0.168832 -0.048453 0.767102 -0.046462 0.771853 + dit_step45_xt 0.948114 2.800970 0.198350 -0.054785 0.837697 -0.052475 0.843036 diff --git a/tests/CPU_Q5_K_M.log b/tests/CPU_Q5_K_M.log index a4ee305..4732362 100644 --- a/tests/CPU_Q5_K_M.log +++ b/tests/CPU_Q5_K_M.log @@ -1,6 +1,4 @@ [Request] Loaded request0.json -[Noise] Generating T=2200 (duration=88.0s)... -[Noise] Generated rng_philox_seed42.bf16: [2200, 64] bf16 (281600 bytes) [Turbo] steps=8, shift=3.0 | acestep-v15-turbo-Q5_K_M.gguf [GGML] Running acestep-v15-turbo-Q5_K_M.gguf... [GGML] Done, 47 dump files @@ -12,48 +10,48 @@ Using precomputed LM hints [Python] Done, 40 dump files [Turbo] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999059 + enc_hidden 0.999099 detok_output 0.999843 context 0.999900 noise 1.000000 temb_t 0.999968 hidden_after_proj_in 0.999954 - enc_after_cond_emb 0.999148 - layer0_sa_output 0.999387 - hidden_after_layer0 0.999763 - hidden_after_layer6 0.999705 - hidden_after_layer12 0.998661 - hidden_after_layer18 0.995364 - hidden_after_layer23 0.991292 - dit_step0_vt 0.967619 - dit_step0_xt 0.999929 - dit_step1_vt 0.972993 - dit_step1_xt 0.999788 - dit_step2_vt 0.970423 - dit_step2_xt 0.999381 - dit_step3_vt 0.972983 - dit_step3_xt 0.998515 - dit_step4_vt 0.971443 - dit_step4_xt 0.996580 - dit_step5_vt 0.967329 - dit_step5_xt 0.992110 - dit_step6_vt 0.962824 - dit_step6_xt 0.983364 - dit_step7_vt 0.954026 - dit_x0 0.970155 - vae_audio 0.873919 - vae_audio (log spectral) 0.999804 + enc_after_cond_emb 0.999196 + layer0_sa_output 0.999388 + hidden_after_layer0 0.999773 + hidden_after_layer6 0.999687 + hidden_after_layer12 0.998560 + hidden_after_layer18 0.995178 + hidden_after_layer23 0.990907 + dit_step0_vt 0.966084 + dit_step0_xt 0.999926 + dit_step1_vt 0.972329 + dit_step1_xt 0.999780 + dit_step2_vt 0.971107 + dit_step2_xt 0.999383 + dit_step3_vt 0.973886 + dit_step3_xt 0.998543 + dit_step4_vt 0.971976 + dit_step4_xt 0.996642 + dit_step5_vt 0.967575 + dit_step5_xt 0.992211 + dit_step6_vt 0.962964 + dit_step6_xt 0.983513 + dit_step7_vt 0.954349 + dit_x0 0.970379 + vae_audio 0.874800 + vae_audio (STFT cosine) 0.967703 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999929 0.140089 0.007891 -0.002298 0.973057 -0.002342 0.972003 - dit_step1_xt 0.999788 0.266546 0.013296 -0.005302 0.942929 -0.005313 0.941730 - dit_step2_xt 0.999381 0.463225 0.021255 -0.009318 0.909586 -0.009311 0.908527 - dit_step3_xt 0.998515 0.708636 0.031731 -0.014737 0.874334 -0.014577 0.873624 - dit_step4_xt 0.996580 0.901781 0.046168 -0.021971 0.842638 -0.021660 0.841995 - dit_step5_xt 0.992110 1.343460 0.068203 -0.032360 0.826198 -0.032109 0.824593 - dit_step6_xt 0.983364 2.208066 0.102056 -0.047009 0.857381 -0.046482 0.855546 + dit_step0_xt 0.999926 0.135378 0.008030 -0.002303 0.973012 -0.002342 0.972003 + dit_step1_xt 0.999780 0.276712 0.013491 -0.005310 0.942849 -0.005313 0.941730 + dit_step2_xt 0.999383 0.460420 0.021261 -0.009337 0.909465 -0.009311 0.908527 + dit_step3_xt 0.998543 0.681684 0.031463 -0.014739 0.874175 -0.014577 0.873624 + dit_step4_xt 0.996642 0.853164 0.045737 -0.021967 0.842445 -0.021660 0.841995 + dit_step5_xt 0.992211 1.314129 0.067657 -0.032346 0.825989 -0.032109 0.824593 + dit_step6_xt 0.983513 2.191432 0.101363 -0.046949 0.857195 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q5_K_M.gguf [GGML] Running acestep-v15-sft-Q5_K_M.gguf... [GGML] Done, 233 dump files @@ -65,68 +63,68 @@ Using precomputed LM hints [Python] Done, 218 dump files [SFT] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999059 + enc_hidden 0.999099 detok_output 0.999843 context 0.999900 noise 1.000000 temb_t 0.999877 hidden_after_proj_in 0.999954 - enc_after_cond_emb 0.999150 - layer0_sa_output 0.999444 - hidden_after_layer0 0.999821 - hidden_after_layer6 0.999570 - hidden_after_layer12 0.998941 - hidden_after_layer18 0.997948 - hidden_after_layer23 0.998430 + enc_after_cond_emb 0.999196 + layer0_sa_output 0.999446 + hidden_after_layer0 0.999823 + hidden_after_layer6 0.999554 + hidden_after_layer12 0.998967 + hidden_after_layer18 0.997974 + hidden_after_layer23 0.998436 null_condition_emb 1.000000 null_enc_hidden 1.000000 - dit_step0_vt_cond 0.998358 - dit_step0_vt_uncond 0.998280 - dit_step0_vt 0.994319 + dit_step0_vt_cond 0.998372 + dit_step0_vt_uncond 0.998354 + dit_step0_vt 0.994379 dit_step0_xt 0.999997 - dit_step5_vt_cond 0.998638 - dit_step5_vt 0.988043 - dit_step5_xt 0.999934 - dit_step10_vt_cond 0.996725 - dit_step10_vt 0.984761 - dit_step10_xt 0.999747 - dit_step15_vt_cond 0.991458 - dit_step15_vt 0.967695 - dit_step15_xt 0.999090 - dit_step20_vt_cond 0.983977 - dit_step20_vt 0.952867 - dit_step20_xt 0.997434 - dit_step25_vt_cond 0.974558 - dit_step25_vt 0.937891 - dit_step25_xt 0.994385 - dit_step30_vt_cond 0.964275 - dit_step30_vt 0.923952 - dit_step30_xt 0.989835 - dit_step35_vt_cond 0.954146 - dit_step35_vt 0.908242 - dit_step35_xt 0.984618 - dit_step40_vt_cond 0.948368 - dit_step40_vt 0.894170 - dit_step40_xt 0.979771 - dit_step45_vt_cond 0.955139 - dit_step45_vt 0.899362 - dit_step45_xt 0.976424 - dit_step49_vt_cond 0.965193 - dit_step49_vt 0.911812 - dit_x0 0.975271 - vae_audio 0.881985 - vae_audio (log spectral) 0.999561 + dit_step5_vt_cond 0.998658 + dit_step5_vt 0.988358 + dit_step5_xt 0.999933 + dit_step10_vt_cond 0.997095 + dit_step10_vt 0.985993 + dit_step10_xt 0.999758 + dit_step15_vt_cond 0.993108 + dit_step15_vt 0.970538 + dit_step15_xt 0.999209 + dit_step20_vt_cond 0.985753 + dit_step20_vt 0.954524 + dit_step20_xt 0.997715 + dit_step25_vt_cond 0.976423 + dit_step25_vt 0.938088 + dit_step25_xt 0.994906 + dit_step30_vt_cond 0.965769 + dit_step30_vt 0.925268 + dit_step30_xt 0.990600 + dit_step35_vt_cond 0.955274 + dit_step35_vt 0.909442 + dit_step35_xt 0.985533 + dit_step40_vt_cond 0.949378 + dit_step40_vt 0.894016 + dit_step40_xt 0.980757 + dit_step45_vt_cond 0.956168 + dit_step45_vt 0.901535 + dit_step45_xt 0.977447 + dit_step49_vt_cond 0.966288 + dit_step49_vt 0.914297 + dit_x0 0.976302 + vae_audio 0.889659 + vae_audio (STFT cosine) 0.945409 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999997 0.038320 0.002303 -0.001772 0.980121 -0.001741 0.980402 - dit_step5_xt 0.999934 0.101929 0.007972 -0.006958 0.889606 -0.007143 0.887999 - dit_step10_xt 0.999747 0.206345 0.013583 -0.012531 0.811047 -0.012603 0.811299 - dit_step15_xt 0.999090 0.420523 0.022754 -0.018021 0.745676 -0.018114 0.745268 - dit_step20_xt 0.997434 0.626724 0.034904 -0.023739 0.699521 -0.023808 0.699582 - dit_step25_xt 0.994385 0.885344 0.049534 -0.029511 0.676977 -0.029311 0.679278 - dit_step30_xt 0.989835 1.169288 0.067080 -0.035341 0.681209 -0.035027 0.685262 - dit_step35_xt 0.984618 1.446514 0.086575 -0.041125 0.711396 -0.040716 0.717195 - dit_step40_xt 0.979771 1.780856 0.107412 -0.046934 0.763967 -0.046462 0.771853 - dit_step45_xt 0.976424 2.129282 0.127304 -0.053123 0.834743 -0.052475 0.843036 + dit_step0_xt 0.999997 0.037808 0.002296 -0.001776 0.980078 -0.001741 0.980402 + dit_step5_xt 0.999933 0.104447 0.007971 -0.006973 0.889460 -0.007143 0.887999 + dit_step10_xt 0.999758 0.210002 0.013370 -0.012530 0.810881 -0.012603 0.811299 + dit_step15_xt 0.999209 0.418503 0.021538 -0.017971 0.745622 -0.018114 0.745268 + dit_step20_xt 0.997715 0.623172 0.033317 -0.023695 0.699368 -0.023808 0.699582 + dit_step25_xt 0.994906 0.874752 0.047642 -0.029485 0.676770 -0.029311 0.679278 + dit_step30_xt 0.990600 1.161649 0.065018 -0.035311 0.680992 -0.035027 0.685262 + dit_step35_xt 0.985533 1.453686 0.084547 -0.041122 0.711332 -0.040716 0.717195 + dit_step40_xt 0.980757 1.810532 0.105436 -0.046941 0.764001 -0.046462 0.771853 + dit_step45_xt 0.977447 2.167346 0.125231 -0.053123 0.834843 -0.052475 0.843036 diff --git a/tests/CPU_Q6_K.log b/tests/CPU_Q6_K.log index 3293fa9..93d1e05 100644 --- a/tests/CPU_Q6_K.log +++ b/tests/CPU_Q6_K.log @@ -1,6 +1,4 @@ [Request] Loaded request0.json -[Noise] Generating T=2200 (duration=88.0s)... -[Noise] Generated rng_philox_seed42.bf16: [2200, 64] bf16 (281600 bytes) [Turbo] steps=8, shift=3.0 | acestep-v15-turbo-Q6_K.gguf [GGML] Running acestep-v15-turbo-Q6_K.gguf... [GGML] Done, 47 dump files @@ -12,48 +10,48 @@ Using precomputed LM hints [Python] Done, 40 dump files [Turbo] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999594 + enc_hidden 0.999634 detok_output 0.999927 context 0.999954 noise 1.000000 temb_t 0.999986 hidden_after_proj_in 0.999975 - enc_after_cond_emb 0.999574 - layer0_sa_output 0.999717 - hidden_after_layer0 0.999819 - hidden_after_layer6 0.999796 - hidden_after_layer12 0.998673 - hidden_after_layer18 0.995619 - hidden_after_layer23 0.991734 - dit_step0_vt 0.969206 - dit_step0_xt 0.999932 - dit_step1_vt 0.973064 - dit_step1_xt 0.999789 - dit_step2_vt 0.974843 - dit_step2_xt 0.999431 - dit_step3_vt 0.974760 - dit_step3_xt 0.998601 - dit_step4_vt 0.971372 - dit_step4_xt 0.996642 - dit_step5_vt 0.966576 - dit_step5_xt 0.992020 - dit_step6_vt 0.962352 - dit_step6_xt 0.982974 - dit_step7_vt 0.954566 - dit_x0 0.969537 - vae_audio 0.884646 - vae_audio (log spectral) 0.999800 + enc_after_cond_emb 0.999619 + layer0_sa_output 0.999718 + hidden_after_layer0 0.999827 + hidden_after_layer6 0.999788 + hidden_after_layer12 0.998843 + hidden_after_layer18 0.995848 + hidden_after_layer23 0.992196 + dit_step0_vt 0.971124 + dit_step0_xt 0.999936 + dit_step1_vt 0.975111 + dit_step1_xt 0.999802 + dit_step2_vt 0.978218 + dit_step2_xt 0.999477 + dit_step3_vt 0.977576 + dit_step3_xt 0.998723 + dit_step4_vt 0.973938 + dit_step4_xt 0.996945 + dit_step5_vt 0.969356 + dit_step5_xt 0.992753 + dit_step6_vt 0.965671 + dit_step6_xt 0.984569 + dit_step7_vt 0.958147 + dit_x0 0.972312 + vae_audio 0.891761 + vae_audio (STFT cosine) 0.969080 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999932 0.151740 0.007444 -0.002272 0.972929 -0.002342 0.972003 - dit_step1_xt 0.999789 0.296944 0.012894 -0.005217 0.942698 -0.005313 0.941730 - dit_step2_xt 0.999431 0.480226 0.020058 -0.009169 0.909198 -0.009311 0.908527 - dit_step3_xt 0.998601 0.735558 0.029990 -0.014527 0.873715 -0.014577 0.873624 - dit_step4_xt 0.996642 1.052198 0.044521 -0.021723 0.841752 -0.021660 0.841995 - dit_step5_xt 0.992020 1.529275 0.066809 -0.032045 0.824936 -0.032109 0.824593 - dit_step6_xt 0.982974 2.218505 0.100581 -0.046647 0.855983 -0.046482 0.855546 + dit_step0_xt 0.999936 0.151952 0.007283 -0.002271 0.972870 -0.002342 0.972003 + dit_step1_xt 0.999802 0.296519 0.012516 -0.005212 0.942575 -0.005313 0.941730 + dit_step2_xt 0.999477 0.478400 0.019283 -0.009184 0.908992 -0.009311 0.908527 + dit_step3_xt 0.998723 0.734609 0.028810 -0.014535 0.873457 -0.014577 0.873624 + dit_step4_xt 0.996945 1.045720 0.042804 -0.021712 0.841447 -0.021660 0.841995 + dit_step5_xt 0.992753 1.512605 0.064324 -0.032020 0.824620 -0.032109 0.824593 + dit_step6_xt 0.984569 2.166596 0.096699 -0.046604 0.855715 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q6_K.gguf [GGML] Running acestep-v15-sft-Q6_K.gguf... [GGML] Done, 233 dump files @@ -65,68 +63,68 @@ Using precomputed LM hints [Python] Done, 218 dump files [SFT] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999594 + enc_hidden 0.999634 detok_output 0.999927 context 0.999954 noise 1.000000 temb_t 0.999952 - hidden_after_proj_in 0.999975 - enc_after_cond_emb 0.999578 - layer0_sa_output 0.999729 - hidden_after_layer0 0.999850 - hidden_after_layer6 0.999739 - hidden_after_layer12 0.999297 - hidden_after_layer18 0.998408 - hidden_after_layer23 0.998695 + hidden_after_proj_in 0.999974 + enc_after_cond_emb 0.999624 + layer0_sa_output 0.999731 + hidden_after_layer0 0.999858 + hidden_after_layer6 0.999745 + hidden_after_layer12 0.999282 + hidden_after_layer18 0.998391 + hidden_after_layer23 0.998703 null_condition_emb 1.000000 null_enc_hidden 1.000000 - dit_step0_vt_cond 0.998599 - dit_step0_vt_uncond 0.998145 - dit_step0_vt 0.994450 + dit_step0_vt_cond 0.998624 + dit_step0_vt_uncond 0.998134 + dit_step0_vt 0.994531 dit_step0_xt 0.999997 - dit_step5_vt_cond 0.999073 - dit_step5_vt 0.990884 - dit_step5_xt 0.999949 - dit_step10_vt_cond 0.997877 - dit_step10_vt 0.988107 - dit_step10_xt 0.999824 - dit_step15_vt_cond 0.995617 - dit_step15_vt 0.978185 - dit_step15_xt 0.999461 - dit_step20_vt_cond 0.992383 - dit_step20_vt 0.966489 - dit_step20_xt 0.998586 - dit_step25_vt_cond 0.986529 - dit_step25_vt 0.953404 - dit_step25_xt 0.996971 - dit_step30_vt_cond 0.980144 - dit_step30_vt 0.943520 - dit_step30_xt 0.994482 - dit_step35_vt_cond 0.973693 - dit_step35_vt 0.931521 - dit_step35_xt 0.991507 - dit_step40_vt_cond 0.969689 - dit_step40_vt 0.917785 - dit_step40_xt 0.988595 - dit_step45_vt_cond 0.972971 - dit_step45_vt 0.917770 - dit_step45_xt 0.986498 - dit_step49_vt_cond 0.976321 - dit_step49_vt 0.906986 - dit_x0 0.985726 - vae_audio 0.941740 - vae_audio (log spectral) 0.999656 + dit_step5_vt_cond 0.999105 + dit_step5_vt 0.991049 + dit_step5_xt 0.999950 + dit_step10_vt_cond 0.997890 + dit_step10_vt 0.988681 + dit_step10_xt 0.999825 + dit_step15_vt_cond 0.995763 + dit_step15_vt 0.978576 + dit_step15_xt 0.999458 + dit_step20_vt_cond 0.991824 + dit_step20_vt 0.966730 + dit_step20_xt 0.998566 + dit_step25_vt_cond 0.986001 + dit_step25_vt 0.952775 + dit_step25_xt 0.996897 + dit_step30_vt_cond 0.979821 + dit_step30_vt 0.943526 + dit_step30_xt 0.994344 + dit_step35_vt_cond 0.973662 + dit_step35_vt 0.929345 + dit_step35_xt 0.991309 + dit_step40_vt_cond 0.969585 + dit_step40_vt 0.918968 + dit_step40_xt 0.988416 + dit_step45_vt_cond 0.972816 + dit_step45_vt 0.918164 + dit_step45_xt 0.986334 + dit_step49_vt_cond 0.976204 + dit_step49_vt 0.909094 + dit_x0 0.985561 + vae_audio 0.940827 + vae_audio (STFT cosine) 0.976287 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999997 0.038008 0.002247 -0.001751 0.980202 -0.001741 0.980402 - dit_step5_xt 0.999949 0.135739 0.006985 -0.006980 0.889900 -0.007143 0.887999 - dit_step10_xt 0.999824 0.193634 0.011359 -0.012388 0.811396 -0.012603 0.811299 - dit_step15_xt 0.999461 0.312492 0.017994 -0.017664 0.745799 -0.018114 0.745268 - dit_step20_xt 0.998586 0.525532 0.026868 -0.022959 0.699406 -0.023808 0.699582 - dit_step25_xt 0.996971 0.780155 0.037686 -0.028371 0.677020 -0.029311 0.679278 - dit_step30_xt 0.994482 1.081808 0.050865 -0.033803 0.681274 -0.035027 0.685262 - dit_step35_xt 0.991507 1.668724 0.065783 -0.039118 0.711362 -0.040716 0.717195 - dit_step40_xt 0.988595 2.092443 0.082225 -0.044398 0.763266 -0.046462 0.771853 - dit_step45_xt 0.986498 2.350159 0.098049 -0.050127 0.833073 -0.052475 0.843036 + dit_step0_xt 0.999997 0.037619 0.002240 -0.001750 0.980170 -0.001741 0.980402 + dit_step5_xt 0.999950 0.129572 0.006928 -0.006971 0.889777 -0.007143 0.887999 + dit_step10_xt 0.999825 0.192490 0.011325 -0.012410 0.811294 -0.012603 0.811299 + dit_step15_xt 0.999458 0.319211 0.017944 -0.017698 0.745779 -0.018114 0.745268 + dit_step20_xt 0.998566 0.553748 0.026838 -0.023098 0.699443 -0.023808 0.699582 + dit_step25_xt 0.996897 0.760972 0.037747 -0.028532 0.677161 -0.029311 0.679278 + dit_step30_xt 0.994344 1.235259 0.050893 -0.033936 0.681526 -0.035027 0.685262 + dit_step35_xt 0.991309 1.863492 0.065806 -0.039291 0.711899 -0.040716 0.717195 + dit_step40_xt 0.988416 2.112072 0.082079 -0.044606 0.764056 -0.046462 0.771853 + dit_step45_xt 0.986334 2.338981 0.097741 -0.050358 0.834033 -0.052475 0.843036 diff --git a/tests/CPU_Q8_0.log b/tests/CPU_Q8_0.log index a9b056b..f4a9086 100644 --- a/tests/CPU_Q8_0.log +++ b/tests/CPU_Q8_0.log @@ -1,6 +1,4 @@ [Request] Loaded request0.json -[Noise] Generating T=2200 (duration=88.0s)... -[Noise] Generated rng_philox_seed42.bf16: [2200, 64] bf16 (281600 bytes) [Turbo] steps=8, shift=3.0 | acestep-v15-turbo-Q8_0.gguf [GGML] Running acestep-v15-turbo-Q8_0.gguf... [GGML] Done, 47 dump files @@ -12,48 +10,48 @@ Using precomputed LM hints [Python] Done, 40 dump files [Turbo] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999778 + enc_hidden 0.999814 detok_output 0.999983 context 0.999990 noise 1.000000 temb_t 0.999997 hidden_after_proj_in 0.999985 - enc_after_cond_emb 0.999748 - layer0_sa_output 0.999924 - hidden_after_layer0 0.999954 - hidden_after_layer6 0.999897 - hidden_after_layer12 0.999310 - hidden_after_layer18 0.996631 - hidden_after_layer23 0.993566 - dit_step0_vt 0.974984 - dit_step0_xt 0.999945 - dit_step1_vt 0.977784 - dit_step1_xt 0.999824 - dit_step2_vt 0.979755 - dit_step2_xt 0.999524 - dit_step3_vt 0.981904 - dit_step3_xt 0.998870 - dit_step4_vt 0.980595 - dit_step4_xt 0.997413 - dit_step5_vt 0.978234 - dit_step5_xt 0.994201 - dit_step6_vt 0.976122 - dit_step6_xt 0.988245 - dit_step7_vt 0.970430 - dit_x0 0.979612 - vae_audio 0.901765 - vae_audio (log spectral) 0.999815 + enc_after_cond_emb 0.999791 + layer0_sa_output 0.999925 + hidden_after_layer0 0.999955 + hidden_after_layer6 0.999892 + hidden_after_layer12 0.999219 + hidden_after_layer18 0.996644 + hidden_after_layer23 0.993707 + dit_step0_vt 0.975605 + dit_step0_xt 0.999946 + dit_step1_vt 0.978928 + dit_step1_xt 0.999831 + dit_step2_vt 0.981129 + dit_step2_xt 0.999551 + dit_step3_vt 0.982813 + dit_step3_xt 0.998932 + dit_step4_vt 0.981292 + dit_step4_xt 0.997544 + dit_step5_vt 0.979091 + dit_step5_xt 0.994467 + dit_step6_vt 0.976152 + dit_step6_xt 0.988647 + dit_step7_vt 0.970238 + dit_x0 0.980014 + vae_audio 0.903408 + vae_audio (STFT cosine) 0.976427 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999945 0.139953 0.006750 -0.002330 0.972900 -0.002342 0.972003 - dit_step1_xt 0.999824 0.274569 0.011648 -0.005344 0.942624 -0.005313 0.941730 - dit_step2_xt 0.999524 0.463327 0.018106 -0.009387 0.909150 -0.009311 0.908527 - dit_step3_xt 0.998870 0.655081 0.026657 -0.014750 0.873791 -0.014577 0.873624 - dit_step4_xt 0.997413 0.812823 0.038603 -0.021809 0.842020 -0.021660 0.841995 - dit_step5_xt 0.994201 1.259496 0.056254 -0.031893 0.825376 -0.032109 0.824593 - dit_step6_xt 0.988245 2.060305 0.082686 -0.046224 0.856386 -0.046482 0.855546 + dit_step0_xt 0.999946 0.139652 0.006645 -0.002330 0.972930 -0.002342 0.972003 + dit_step1_xt 0.999831 0.267117 0.011368 -0.005325 0.942659 -0.005313 0.941730 + dit_step2_xt 0.999551 0.452101 0.017578 -0.009369 0.909163 -0.009311 0.908527 + dit_step3_xt 0.998932 0.629880 0.025911 -0.014735 0.873792 -0.014577 0.873624 + dit_step4_xt 0.997544 0.759572 0.037583 -0.021796 0.841987 -0.021660 0.841995 + dit_step5_xt 0.994467 1.235701 0.054893 -0.031886 0.825306 -0.032109 0.824593 + dit_step6_xt 0.988647 2.096131 0.081207 -0.046181 0.856264 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q8_0.gguf [GGML] Running acestep-v15-sft-Q8_0.gguf... [GGML] Done, 233 dump files @@ -65,68 +63,68 @@ Using precomputed LM hints [Python] Done, 218 dump files [SFT] Cosine similarities GGML vs Python stage GGML vs Python - text_hidden 0.999815 + text_hidden 0.999816 lyric_embed 1.000000 - enc_hidden 0.999778 + enc_hidden 0.999814 detok_output 0.999983 context 0.999990 noise 1.000000 temb_t 0.999991 hidden_after_proj_in 0.999986 - enc_after_cond_emb 0.999751 + enc_after_cond_emb 0.999795 layer0_sa_output 0.999912 - hidden_after_layer0 0.999959 - hidden_after_layer6 0.999822 + hidden_after_layer0 0.999958 + hidden_after_layer6 0.999824 hidden_after_layer12 0.999445 - hidden_after_layer18 0.998681 - hidden_after_layer23 0.998954 + hidden_after_layer18 0.998719 + hidden_after_layer23 0.998974 null_condition_emb 1.000000 null_enc_hidden 1.000000 - dit_step0_vt_cond 0.998904 - dit_step0_vt_uncond 0.998464 - dit_step0_vt 0.995434 + dit_step0_vt_cond 0.998922 + dit_step0_vt_uncond 0.998427 + dit_step0_vt 0.995455 dit_step0_xt 0.999998 - dit_step5_vt_cond 0.999451 - dit_step5_vt 0.993036 + dit_step5_vt_cond 0.999446 + dit_step5_vt 0.993188 dit_step5_xt 0.999961 - dit_step10_vt_cond 0.998730 - dit_step10_vt 0.992810 - dit_step10_xt 0.999882 - dit_step15_vt_cond 0.996688 - dit_step15_vt 0.983019 - dit_step15_xt 0.999634 - dit_step20_vt_cond 0.993629 - dit_step20_vt 0.975715 - dit_step20_xt 0.998965 - dit_step25_vt_cond 0.988889 - dit_step25_vt 0.965091 - dit_step25_xt 0.997640 - dit_step30_vt_cond 0.983564 - dit_step30_vt 0.958811 - dit_step30_xt 0.995606 - dit_step35_vt_cond 0.978723 - dit_step35_vt 0.949661 - dit_step35_xt 0.993168 - dit_step40_vt_cond 0.976039 - dit_step40_vt 0.940546 - dit_step40_xt 0.990824 - dit_step45_vt_cond 0.978812 - dit_step45_vt 0.941556 - dit_step45_xt 0.989176 - dit_step49_vt_cond 0.980998 - dit_step49_vt 0.921771 - dit_x0 0.988563 - vae_audio 0.945658 - vae_audio (log spectral) 0.999656 + dit_step10_vt_cond 0.998529 + dit_step10_vt 0.992281 + dit_step10_xt 0.999875 + dit_step15_vt_cond 0.996311 + dit_step15_vt 0.982856 + dit_step15_xt 0.999609 + dit_step20_vt_cond 0.992095 + dit_step20_vt 0.974098 + dit_step20_xt 0.998863 + dit_step25_vt_cond 0.986516 + dit_step25_vt 0.962299 + dit_step25_xt 0.997338 + dit_step30_vt_cond 0.980702 + dit_step30_vt 0.955880 + dit_step30_xt 0.995005 + dit_step35_vt_cond 0.975404 + dit_step35_vt 0.945189 + dit_step35_xt 0.992202 + dit_step40_vt_cond 0.972588 + dit_step40_vt 0.935722 + dit_step40_xt 0.989533 + dit_step45_vt_cond 0.975984 + dit_step45_vt 0.937094 + dit_step45_xt 0.987666 + dit_step49_vt_cond 0.978734 + dit_step49_vt 0.917631 + dit_x0 0.986993 + vae_audio 0.937093 + vae_audio (STFT cosine) 0.971416 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999998 0.038723 0.002101 -0.001709 0.980024 -0.001741 0.980402 - dit_step5_xt 0.999961 0.132626 0.006031 -0.006898 0.889062 -0.007143 0.887999 - dit_step10_xt 0.999882 0.210852 0.009250 -0.012324 0.810419 -0.012603 0.811299 - dit_step15_xt 0.999634 0.352374 0.014434 -0.017540 0.745234 -0.018114 0.745268 - dit_step20_xt 0.998965 0.572398 0.021951 -0.022850 0.699512 -0.023808 0.699582 - dit_step25_xt 0.997640 0.869255 0.031495 -0.028278 0.677795 -0.029311 0.679278 - dit_step30_xt 0.995606 1.124816 0.042978 -0.033776 0.683657 -0.035027 0.685262 - dit_step35_xt 0.993168 1.517812 0.056123 -0.039231 0.715343 -0.040716 0.717195 - dit_step40_xt 0.990824 1.969018 0.070364 -0.044647 0.768849 -0.046462 0.771853 - dit_step45_xt 0.989176 2.337864 0.084066 -0.050524 0.839939 -0.052475 0.843036 + dit_step0_xt 0.999998 0.038134 0.002096 -0.001710 0.980019 -0.001741 0.980402 + dit_step5_xt 0.999961 0.137689 0.005996 -0.006894 0.889095 -0.007143 0.887999 + dit_step10_xt 0.999875 0.219306 0.009469 -0.012337 0.810457 -0.012603 0.811299 + dit_step15_xt 0.999609 0.356501 0.014905 -0.017570 0.745282 -0.018114 0.745268 + dit_step20_xt 0.998863 0.570726 0.023002 -0.022897 0.699575 -0.023808 0.699582 + dit_step25_xt 0.997338 0.870836 0.033418 -0.028306 0.678021 -0.029311 0.679278 + dit_step30_xt 0.995005 1.126647 0.045749 -0.033772 0.683965 -0.035027 0.685262 + dit_step35_xt 0.992202 1.561250 0.059823 -0.039172 0.715848 -0.040716 0.717195 + dit_step40_xt 0.989533 1.985042 0.074909 -0.044584 0.769539 -0.046462 0.771853 + dit_step45_xt 0.987666 2.384698 0.089346 -0.050474 0.840839 -0.052475 0.843036 diff --git a/tests/Q4_K_M.log b/tests/Q4_K_M.log index 5d8df67..d76238c 100644 --- a/tests/Q4_K_M.log +++ b/tests/Q4_K_M.log @@ -41,8 +41,8 @@ Using precomputed LM hints dit_step6_xt 0.976494 dit_step7_vt 0.938658 dit_x0 0.958725 - vae_audio 0.837780 - vae_audio (STFT cosine) 0.954457 + vae_audio 0.837763 + vae_audio (STFT cosine) 0.954448 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999885 0.165835 0.010206 -0.002260 0.973133 -0.002342 0.972003 @@ -114,8 +114,8 @@ Using precomputed LM hints dit_step49_vt_cond 0.928566 dit_step49_vt 0.867519 dit_x0 0.947240 - vae_audio 0.830952 - vae_audio (STFT cosine) 0.926934 + vae_audio 0.830949 + vae_audio (STFT cosine) 0.926924 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999995 0.037971 0.002890 -0.001839 0.980350 -0.001741 0.980402 diff --git a/tests/Q5_K_M.log b/tests/Q5_K_M.log index 101eda3..5989b97 100644 --- a/tests/Q5_K_M.log +++ b/tests/Q5_K_M.log @@ -41,8 +41,8 @@ Using precomputed LM hints dit_step6_xt 0.983446 dit_step7_vt 0.953383 dit_x0 0.970119 - vae_audio 0.883245 - vae_audio (STFT cosine) 0.968470 + vae_audio 0.883226 + vae_audio (STFT cosine) 0.968463 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999930 0.139407 0.007818 -0.002306 0.973025 -0.002342 0.972003 @@ -114,8 +114,8 @@ Using precomputed LM hints dit_step49_vt_cond 0.967216 dit_step49_vt 0.914978 dit_x0 0.977077 - vae_audio 0.891876 - vae_audio (STFT cosine) 0.946063 + vae_audio 0.891856 + vae_audio (STFT cosine) 0.946058 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999997 0.038463 0.002320 -0.001770 0.980102 -0.001741 0.980402 diff --git a/tests/Q6_K.log b/tests/Q6_K.log index 6947437..6cd4c1c 100644 --- a/tests/Q6_K.log +++ b/tests/Q6_K.log @@ -41,8 +41,8 @@ Using precomputed LM hints dit_step6_xt 0.985862 dit_step7_vt 0.962454 dit_x0 0.974866 - vae_audio 0.893720 - vae_audio (STFT cosine) 0.969672 + vae_audio 0.893678 + vae_audio (STFT cosine) 0.969663 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999937 0.147590 0.007252 -0.002265 0.972930 -0.002342 0.972003 @@ -114,8 +114,8 @@ Using precomputed LM hints dit_step49_vt_cond 0.978000 dit_step49_vt 0.949509 dit_x0 0.984147 - vae_audio 0.935408 - vae_audio (STFT cosine) 0.974488 + vae_audio 0.935392 + vae_audio (STFT cosine) 0.974483 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999997 0.038602 0.002180 -0.001744 0.980167 -0.001741 0.980402 diff --git a/tests/Q8_0.log b/tests/Q8_0.log index f9ee663..27ba118 100644 --- a/tests/Q8_0.log +++ b/tests/Q8_0.log @@ -41,8 +41,8 @@ Using precomputed LM hints dit_step6_xt 0.988641 dit_step7_vt 0.970144 dit_x0 0.979969 - vae_audio 0.905563 - vae_audio (STFT cosine) 0.976538 + vae_audio 0.905525 + vae_audio (STFT cosine) 0.976530 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999948 0.134961 0.006551 -0.002307 0.972901 -0.002342 0.972003 @@ -114,8 +114,8 @@ Using precomputed LM hints dit_step49_vt_cond 0.980854 dit_step49_vt 0.920647 dit_x0 0.988696 - vae_audio 0.944456 - vae_audio (STFT cosine) 0.974763 + vae_audio 0.944426 + vae_audio (STFT cosine) 0.974764 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999998 0.038422 0.002097 -0.001714 0.980004 -0.001741 0.980402 diff --git a/tests/Vulkan_BF16.log b/tests/Vulkan_BF16.log index fd6e4cc..bd5f26b 100644 --- a/tests/Vulkan_BF16.log +++ b/tests/Vulkan_BF16.log @@ -39,10 +39,10 @@ Using precomputed LM hints dit_step5_xt 0.965846 dit_step6_vt 0.840298 dit_step6_xt 0.925771 - dit_step7_vt 0.818271 - dit_x0 0.867399 - vae_audio 0.680421 - vae_audio (STFT cosine) 0.855363 + dit_step7_vt 0.818300 + dit_x0 0.867401 + vae_audio 0.680429 + vae_audio (STFT cosine) 0.855382 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999946 0.135811 0.006633 -0.002316 0.972919 -0.002342 0.972003 @@ -84,47 +84,47 @@ Using precomputed LM hints dit_step0_vt_uncond 0.973704 dit_step0_vt 0.986492 dit_step0_xt 0.999992 - dit_step5_vt_cond 0.978322 - dit_step5_vt 0.907487 - dit_step5_xt 0.999306 - dit_step10_vt_cond 0.961168 - dit_step10_vt 0.886437 - dit_step10_xt 0.996597 - dit_step15_vt_cond 0.935890 - dit_step15_vt 0.834541 - dit_step15_xt 0.989364 - dit_step20_vt_cond 0.898859 - dit_step20_vt 0.796763 - dit_step20_xt 0.975134 - dit_step25_vt_cond 0.851665 - dit_step25_vt 0.742613 - dit_step25_xt 0.951668 - dit_step30_vt_cond 0.798515 - dit_step30_vt 0.683872 - dit_step30_xt 0.917806 - dit_step35_vt_cond 0.745660 - dit_step35_vt 0.634947 - dit_step35_xt 0.881355 - dit_step40_vt_cond 0.706539 - dit_step40_vt 0.590506 - dit_step40_xt 0.849819 - dit_step45_vt_cond 0.728151 - dit_step45_vt 0.601299 - dit_step45_xt 0.829105 - dit_step49_vt_cond 0.773523 - dit_step49_vt 0.651069 - dit_x0 0.821683 - vae_audio 0.531927 - vae_audio (STFT cosine) 0.733118 + dit_step5_vt_cond 0.978980 + dit_step5_vt 0.906055 + dit_step5_xt 0.999319 + dit_step10_vt_cond 0.961518 + dit_step10_vt 0.898737 + dit_step10_xt 0.996347 + dit_step15_vt_cond 0.933830 + dit_step15_vt 0.840233 + dit_step15_xt 0.988073 + dit_step20_vt_cond 0.894620 + dit_step20_vt 0.796873 + dit_step20_xt 0.970961 + dit_step25_vt_cond 0.845710 + dit_step25_vt 0.737589 + dit_step25_xt 0.943356 + dit_step30_vt_cond 0.791700 + dit_step30_vt 0.686150 + dit_step30_xt 0.906182 + dit_step35_vt_cond 0.734800 + dit_step35_vt 0.627091 + dit_step35_xt 0.866844 + dit_step40_vt_cond 0.692744 + dit_step40_vt 0.579983 + dit_step40_xt 0.832660 + dit_step45_vt_cond 0.707766 + dit_step45_vt 0.576903 + dit_step45_xt 0.809828 + dit_step49_vt_cond 0.753038 + dit_step49_vt 0.625137 + dit_x0 0.801669 + vae_audio 0.494694 + vae_audio (STFT cosine) 0.706773 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999992 0.064200 0.003294 -0.001888 0.980082 -0.001741 0.980402 - dit_step5_xt 0.999306 0.582577 0.024216 -0.006626 0.887832 -0.007143 0.887999 - dit_step10_xt 0.996597 1.283863 0.049282 -0.010938 0.808039 -0.012603 0.811299 - dit_step15_xt 0.989364 1.034479 0.080662 -0.015519 0.732355 -0.018114 0.745268 - dit_step20_xt 0.975134 1.052949 0.116255 -0.019967 0.671876 -0.023808 0.699582 - dit_step25_xt 0.951668 1.447092 0.157337 -0.024174 0.630995 -0.029311 0.679278 - dit_step30_xt 0.917806 1.890517 0.206712 -0.028566 0.609127 -0.035027 0.685262 - dit_step35_xt 0.881355 2.346080 0.258694 -0.032873 0.611288 -0.040716 0.717195 - dit_step40_xt 0.849819 2.813365 0.311538 -0.037189 0.636890 -0.046462 0.771853 - dit_step45_xt 0.829105 3.188671 0.361659 -0.041781 0.681869 -0.052475 0.843036 + dit_step5_xt 0.999319 0.557092 0.024040 -0.006621 0.887864 -0.007143 0.887999 + dit_step10_xt 0.996347 0.965268 0.050926 -0.011718 0.806420 -0.012603 0.811299 + dit_step15_xt 0.988073 0.861492 0.085157 -0.016277 0.731584 -0.018114 0.745268 + dit_step20_xt 0.970961 1.278730 0.125264 -0.020700 0.671902 -0.023808 0.699582 + dit_step25_xt 0.943356 1.796219 0.169586 -0.025074 0.633808 -0.029311 0.679278 + dit_step30_xt 0.906182 2.190889 0.219620 -0.029769 0.614453 -0.035027 0.685262 + dit_step35_xt 0.866844 2.605400 0.272383 -0.034410 0.619164 -0.040716 0.717195 + dit_step40_xt 0.832660 3.030330 0.326889 -0.039011 0.646487 -0.046462 0.771853 + dit_step45_xt 0.809828 3.411977 0.379136 -0.043945 0.692545 -0.052475 0.843036 diff --git a/tests/Vulkan_Q4_K_M.log b/tests/Vulkan_Q4_K_M.log index 065da54..2c1b7e2 100644 --- a/tests/Vulkan_Q4_K_M.log +++ b/tests/Vulkan_Q4_K_M.log @@ -25,33 +25,33 @@ Using precomputed LM hints hidden_after_layer12 0.969957 hidden_after_layer18 0.964919 hidden_after_layer23 0.947132 - dit_step0_vt 0.790630 - dit_step0_xt 0.999550 - dit_step1_vt 0.812267 - dit_step1_xt 0.998316 - dit_step2_vt 0.797831 - dit_step2_xt 0.994983 - dit_step3_vt 0.785511 - dit_step3_xt 0.987162 - dit_step4_vt 0.777729 - dit_step4_xt 0.969915 - dit_step5_vt 0.765534 - dit_step5_xt 0.933311 - dit_step6_vt 0.748340 - dit_step6_xt 0.865701 - dit_step7_vt 0.704709 - dit_x0 0.768869 - vae_audio 0.377669 - vae_audio (STFT cosine) 0.670090 + dit_step0_vt 0.790633 + dit_step0_xt 0.999549 + dit_step1_vt 0.812278 + dit_step1_xt 0.998317 + dit_step2_vt 0.797899 + dit_step2_xt 0.994987 + dit_step3_vt 0.785709 + dit_step3_xt 0.987168 + dit_step4_vt 0.777756 + dit_step4_xt 0.969910 + dit_step5_vt 0.739552 + dit_step5_xt 0.933874 + dit_step6_vt 0.745520 + dit_step6_xt 0.867311 + dit_step7_vt 0.704124 + dit_x0 0.770712 + vae_audio 0.383362 + vae_audio (STFT cosine) 0.669931 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999550 0.201120 0.022082 -0.002496 0.972768 -0.002342 0.972003 - dit_step1_xt 0.998316 0.415084 0.041258 -0.005641 0.942202 -0.005313 0.941730 - dit_step2_xt 0.994983 0.709212 0.068497 -0.010237 0.907721 -0.009311 0.908527 - dit_step3_xt 0.987162 1.070859 0.105297 -0.016408 0.870168 -0.014577 0.873624 - dit_step4_xt 0.969915 1.455907 0.155264 -0.024586 0.833838 -0.021660 0.841995 - dit_step5_xt 0.933311 1.994973 0.225859 -0.035923 0.808908 -0.032109 0.824593 - dit_step6_xt 0.865701 3.020382 0.331394 -0.051659 0.828884 -0.046482 0.855546 + dit_step0_xt 0.999549 0.201087 0.022082 -0.002495 0.972767 -0.002342 0.972003 + dit_step1_xt 0.998317 0.415437 0.041246 -0.005636 0.942205 -0.005313 0.941730 + dit_step2_xt 0.994987 0.709212 0.068458 -0.010217 0.907730 -0.009311 0.908527 + dit_step3_xt 0.987168 1.068925 0.105239 -0.016380 0.870170 -0.014577 0.873624 + dit_step4_xt 0.969910 1.456167 0.155261 -0.024550 0.833831 -0.021660 0.841995 + dit_step5_xt 0.933874 2.028250 0.225222 -0.035727 0.809987 -0.032109 0.824593 + dit_step6_xt 0.867311 3.033199 0.329427 -0.051895 0.826478 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q4_K_M.gguf [GGML] Running acestep-v15-sft-Q4_K_M.gguf... [GGML] Done, 233 dump files @@ -84,47 +84,47 @@ Using precomputed LM hints dit_step0_vt_uncond 0.980925 dit_step0_vt 0.934226 dit_step0_xt 0.999962 - dit_step5_vt_cond 0.967782 - dit_step5_vt 0.910691 - dit_step5_xt 0.998853 - dit_step10_vt_cond 0.946657 - dit_step10_vt 0.867419 - dit_step10_xt 0.994730 - dit_step15_vt_cond 0.908855 - dit_step15_vt 0.812130 - dit_step15_xt 0.984236 - dit_step20_vt_cond 0.810747 - dit_step20_vt 0.648045 - dit_step20_xt 0.963921 - dit_step25_vt_cond 0.808923 - dit_step25_vt 0.710620 - dit_step25_xt 0.933694 - dit_step30_vt_cond 0.751136 - dit_step30_vt 0.650217 - dit_step30_xt 0.893337 - dit_step35_vt_cond 0.696450 - dit_step35_vt 0.597934 - dit_step35_xt 0.853079 - dit_step40_vt_cond 0.667379 - dit_step40_vt 0.573352 - dit_step40_xt 0.819961 - dit_step45_vt_cond 0.698704 - dit_step45_vt 0.604049 - dit_step45_xt 0.799520 - dit_step49_vt_cond 0.746222 - dit_step49_vt 0.659191 - dit_x0 0.793317 - vae_audio 0.446954 - vae_audio (STFT cosine) 0.722110 + dit_step5_vt_cond 0.967427 + dit_step5_vt 0.910792 + dit_step5_xt 0.998806 + dit_step10_vt_cond 0.948369 + dit_step10_vt 0.866632 + dit_step10_xt 0.994857 + dit_step15_vt_cond 0.909778 + dit_step15_vt 0.814508 + dit_step15_xt 0.984920 + dit_step20_vt_cond 0.863625 + dit_step20_vt 0.764052 + dit_step20_xt 0.965868 + dit_step25_vt_cond 0.811103 + dit_step25_vt 0.700861 + dit_step25_xt 0.937051 + dit_step30_vt_cond 0.753305 + dit_step30_vt 0.655816 + dit_step30_xt 0.899063 + dit_step35_vt_cond 0.699261 + dit_step35_vt 0.599863 + dit_step35_xt 0.859178 + dit_step40_vt_cond 0.670103 + dit_step40_vt 0.573321 + dit_step40_xt 0.825435 + dit_step45_vt_cond 0.701869 + dit_step45_vt 0.600028 + dit_step45_xt 0.803747 + dit_step49_vt_cond 0.749100 + dit_step49_vt 0.652063 + dit_x0 0.796334 + vae_audio 0.454343 + vae_audio (STFT cosine) 0.718386 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999962 0.072923 0.006527 -0.001861 0.980234 -0.001741 0.980402 - dit_step5_xt 0.998853 0.367353 0.031474 -0.007168 0.889950 -0.007143 0.887999 - dit_step10_xt 0.994730 0.725936 0.061110 -0.013432 0.812173 -0.012603 0.811299 - dit_step15_xt 0.984236 1.160907 0.097021 -0.019428 0.748175 -0.018114 0.745268 - dit_step20_xt 0.963921 1.676288 0.138477 -0.025527 0.703600 -0.023808 0.699582 - dit_step25_xt 0.933694 2.173347 0.182544 -0.031809 0.678179 -0.029311 0.679278 - dit_step30_xt 0.893337 2.750668 0.234019 -0.038175 0.679913 -0.035027 0.685262 - dit_step35_xt 0.853079 3.282243 0.287744 -0.044405 0.706473 -0.040716 0.717195 - dit_step40_xt 0.819961 3.812806 0.342967 -0.050578 0.754828 -0.046462 0.771853 - dit_step45_xt 0.799520 4.382803 0.395693 -0.056828 0.822238 -0.052475 0.843036 + dit_step5_xt 0.998806 0.371089 0.032132 -0.007108 0.889710 -0.007143 0.887999 + dit_step10_xt 0.994857 0.721153 0.060355 -0.013425 0.811244 -0.012603 0.811299 + dit_step15_xt 0.984920 1.170655 0.094867 -0.019480 0.745370 -0.018114 0.745268 + dit_step20_xt 0.965868 1.624943 0.135007 -0.025812 0.700521 -0.023808 0.699582 + dit_step25_xt 0.937051 2.025275 0.178318 -0.032528 0.673256 -0.029311 0.679278 + dit_step30_xt 0.899063 2.555359 0.227638 -0.038874 0.670375 -0.035027 0.685262 + dit_step35_xt 0.859178 3.109559 0.281450 -0.045209 0.695123 -0.040716 0.717195 + dit_step40_xt 0.825435 3.695475 0.337125 -0.051359 0.742071 -0.046462 0.771853 + dit_step45_xt 0.803747 4.263174 0.390511 -0.057731 0.807748 -0.052475 0.843036 diff --git a/tests/Vulkan_Q5_K_M.log b/tests/Vulkan_Q5_K_M.log index 79191f9..e6ff2d6 100644 --- a/tests/Vulkan_Q5_K_M.log +++ b/tests/Vulkan_Q5_K_M.log @@ -24,34 +24,34 @@ Using precomputed LM hints hidden_after_layer6 0.996691 hidden_after_layer12 0.982345 hidden_after_layer18 0.974400 - hidden_after_layer23 0.959738 - dit_step0_vt 0.838705 + hidden_after_layer23 0.959734 + dit_step0_vt 0.838690 dit_step0_xt 0.999650 - dit_step1_vt 0.859037 - dit_step1_xt 0.998762 - dit_step2_vt 0.841190 - dit_step2_xt 0.996285 - dit_step3_vt 0.832187 - dit_step3_xt 0.990429 - dit_step4_vt 0.826908 - dit_step4_xt 0.977405 - dit_step5_vt 0.714862 - dit_step5_xt 0.944488 - dit_step6_vt 0.801939 - dit_step6_xt 0.892734 - dit_step7_vt 0.769764 - dit_x0 0.819094 - vae_audio 0.477731 - vae_audio (STFT cosine) 0.751380 + dit_step1_vt 0.854798 + dit_step1_xt 0.998726 + dit_step2_vt 0.843823 + dit_step2_xt 0.996265 + dit_step3_vt 0.832135 + dit_step3_xt 0.990412 + dit_step4_vt 0.826630 + dit_step4_xt 0.977378 + dit_step5_vt 0.824313 + dit_step5_xt 0.950549 + dit_step6_vt 0.806361 + dit_step6_xt 0.899178 + dit_step7_vt 0.774146 + dit_x0 0.825965 + vae_audio 0.488652 + vae_audio (STFT cosine) 0.756261 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999650 0.235954 0.018872 -0.002255 0.973213 -0.002342 0.972003 - dit_step1_xt 0.998762 0.423025 0.034159 -0.005155 0.943015 -0.005313 0.941730 - dit_step2_xt 0.996285 0.680067 0.057040 -0.009180 0.909222 -0.009311 0.908527 - dit_step3_xt 0.990429 0.971207 0.088133 -0.014783 0.872842 -0.014577 0.873624 - dit_step4_xt 0.977405 1.460206 0.130740 -0.022196 0.838494 -0.021660 0.841995 - dit_step5_xt 0.944488 2.195178 0.201637 -0.033145 0.821886 -0.032109 0.824593 - dit_step6_xt 0.892734 3.274052 0.291172 -0.048154 0.843324 -0.046482 0.855546 + dit_step0_xt 0.999650 0.235943 0.018873 -0.002256 0.973219 -0.002342 0.972003 + dit_step1_xt 0.998726 0.436601 0.034659 -0.005174 0.942992 -0.005313 0.941730 + dit_step2_xt 0.996265 0.716827 0.057185 -0.009195 0.909263 -0.009311 0.908527 + dit_step3_xt 0.990412 0.968242 0.088230 -0.014806 0.872959 -0.014577 0.873624 + dit_step4_xt 0.977378 1.455533 0.130847 -0.022234 0.838622 -0.021660 0.841995 + dit_step5_xt 0.950549 2.134846 0.189630 -0.032763 0.816673 -0.032109 0.824593 + dit_step6_xt 0.899178 3.163587 0.280857 -0.047640 0.840933 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q5_K_M.gguf [GGML] Running acestep-v15-sft-Q5_K_M.gguf... [GGML] Done, 233 dump files @@ -81,50 +81,50 @@ Using precomputed LM hints null_condition_emb 1.000000 null_enc_hidden 1.000000 dit_step0_vt_cond 0.978964 - dit_step0_vt_uncond 0.974281 - dit_step0_vt 0.937276 + dit_step0_vt_uncond 0.973976 + dit_step0_vt 0.937223 dit_step0_xt 0.999964 - dit_step5_vt_cond 0.870562 - dit_step5_vt 0.717401 - dit_step5_xt 0.998794 - dit_step10_vt_cond 0.945766 - dit_step10_vt 0.849281 - dit_step10_xt 0.994688 - dit_step15_vt_cond 0.912096 - dit_step15_vt 0.817441 - dit_step15_xt 0.984560 - dit_step20_vt_cond 0.864020 - dit_step20_vt 0.763685 - dit_step20_xt 0.966236 - dit_step25_vt_cond 0.806131 - dit_step25_vt 0.705795 - dit_step25_xt 0.937666 - dit_step30_vt_cond 0.744771 - dit_step30_vt 0.652030 - dit_step30_xt 0.900195 - dit_step35_vt_cond 0.690858 - dit_step35_vt 0.598677 - dit_step35_xt 0.862692 - dit_step40_vt_cond 0.663437 - dit_step40_vt 0.564051 - dit_step40_xt 0.830709 - dit_step45_vt_cond 0.701709 - dit_step45_vt 0.615719 - dit_step45_xt 0.811593 - dit_step49_vt_cond 0.753685 - dit_step49_vt 0.680531 - dit_x0 0.804397 - vae_audio 0.457281 - vae_audio (STFT cosine) 0.689387 + dit_step5_vt_cond 0.967160 + dit_step5_vt 0.909198 + dit_step5_xt 0.998804 + dit_step10_vt_cond 0.950415 + dit_step10_vt 0.867165 + dit_step10_xt 0.994875 + dit_step15_vt_cond 0.914609 + dit_step15_vt 0.816760 + dit_step15_xt 0.985212 + dit_step20_vt_cond 0.868346 + dit_step20_vt 0.771014 + dit_step20_xt 0.966347 + dit_step25_vt_cond 0.813828 + dit_step25_vt 0.714557 + dit_step25_xt 0.936240 + dit_step30_vt_cond 0.758857 + dit_step30_vt 0.662399 + dit_step30_xt 0.898782 + dit_step35_vt_cond 0.707135 + dit_step35_vt 0.617898 + dit_step35_xt 0.859637 + dit_step40_vt_cond 0.679574 + dit_step40_vt 0.584797 + dit_step40_xt 0.827363 + dit_step45_vt_cond 0.709869 + dit_step45_vt 0.613484 + dit_step45_xt 0.805902 + dit_step49_vt_cond 0.756478 + dit_step49_vt 0.658766 + dit_x0 0.797882 + vae_audio 0.472032 + vae_audio (STFT cosine) 0.708586 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B - dit_step0_xt 0.999964 0.073219 0.006359 -0.001779 0.980208 -0.001741 0.980402 - dit_step5_xt 0.998794 0.375116 0.032183 -0.006513 0.890747 -0.007143 0.887999 - dit_step10_xt 0.994688 0.584900 0.061541 -0.011866 0.811651 -0.012603 0.811299 - dit_step15_xt 0.984560 1.116284 0.096649 -0.017213 0.745483 -0.018114 0.745268 - dit_step20_xt 0.966236 1.548674 0.135086 -0.022828 0.695324 -0.023808 0.699582 - dit_step25_xt 0.937666 1.932962 0.178568 -0.028552 0.666695 -0.029311 0.679278 - dit_step30_xt 0.900195 2.318772 0.227936 -0.034341 0.662972 -0.035027 0.685262 - dit_step35_xt 0.862692 2.754600 0.279195 -0.039718 0.682117 -0.040716 0.717195 - dit_step40_xt 0.830709 3.323758 0.333172 -0.045381 0.725861 -0.046462 0.771853 - dit_step45_xt 0.811593 3.819057 0.383462 -0.051060 0.786041 -0.052475 0.843036 + dit_step0_xt 0.999964 0.073235 0.006362 -0.001778 0.980214 -0.001741 0.980402 + dit_step5_xt 0.998804 0.348623 0.032282 -0.006098 0.890574 -0.007143 0.887999 + dit_step10_xt 0.994875 0.617850 0.060577 -0.011335 0.811641 -0.012603 0.811299 + dit_step15_xt 0.985212 1.165812 0.094804 -0.016284 0.748105 -0.018114 0.745268 + dit_step20_xt 0.966347 1.619635 0.134939 -0.021429 0.702593 -0.023808 0.699582 + dit_step25_xt 0.936240 2.011917 0.181224 -0.026596 0.681069 -0.029311 0.679278 + dit_step30_xt 0.898782 2.443318 0.230607 -0.031965 0.682407 -0.035027 0.685262 + dit_step35_xt 0.859637 2.917810 0.284657 -0.037104 0.710155 -0.040716 0.717195 + dit_step40_xt 0.827363 3.602165 0.340057 -0.042128 0.759737 -0.046462 0.771853 + dit_step45_xt 0.805902 4.251132 0.394434 -0.047162 0.828316 -0.052475 0.843036 diff --git a/tests/Vulkan_Q6_K.log b/tests/Vulkan_Q6_K.log index e9254f1..916944c 100644 --- a/tests/Vulkan_Q6_K.log +++ b/tests/Vulkan_Q6_K.log @@ -33,25 +33,25 @@ Using precomputed LM hints dit_step2_xt 0.998650 dit_step3_vt 0.915489 dit_step3_xt 0.996123 - dit_step4_vt 0.917296 - dit_step4_xt 0.990541 - dit_step5_vt 0.910111 - dit_step5_xt 0.977801 - dit_step6_vt 0.901129 - dit_step6_xt 0.953258 - dit_step7_vt 0.881080 - dit_x0 0.916874 - vae_audio 0.753115 - vae_audio (STFT cosine) 0.882103 + dit_step4_vt 0.916835 + dit_step4_xt 0.990527 + dit_step5_vt 0.909275 + dit_step5_xt 0.977470 + dit_step6_vt 0.899986 + dit_step6_xt 0.952353 + dit_step7_vt 0.880023 + dit_x0 0.915268 + vae_audio 0.753562 + vae_audio (STFT cosine) 0.882452 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999934 0.147239 0.007394 -0.002260 0.973056 -0.002342 0.972003 dit_step1_xt 0.999651 0.410402 0.017745 -0.005286 0.943565 -0.005313 0.941730 dit_step2_xt 0.998650 0.806730 0.033672 -0.009524 0.911097 -0.009311 0.908527 dit_step3_xt 0.996123 1.479887 0.054500 -0.015235 0.876469 -0.014577 0.873624 - dit_step4_xt 0.990541 2.300086 0.081685 -0.022724 0.844246 -0.021660 0.841995 - dit_step5_xt 0.977801 2.650593 0.122445 -0.033594 0.824670 -0.032109 0.824593 - dit_step6_xt 0.953258 2.730330 0.184237 -0.049089 0.850044 -0.046482 0.855546 + dit_step4_xt 0.990527 2.298363 0.081794 -0.022731 0.844225 -0.021660 0.841995 + dit_step5_xt 0.977470 3.296017 0.123177 -0.033626 0.825405 -0.032109 0.824593 + dit_step6_xt 0.952353 4.545029 0.185597 -0.049157 0.851892 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q6_K.gguf [GGML] Running acestep-v15-sft-Q6_K.gguf... [GGML] Done, 233 dump files @@ -84,47 +84,47 @@ Using precomputed LM hints dit_step0_vt_uncond 0.962163 dit_step0_vt 0.981229 dit_step0_xt 0.999989 - dit_step5_vt_cond 0.980832 - dit_step5_vt 0.927888 - dit_step5_xt 0.999468 - dit_step10_vt_cond 0.955635 - dit_step10_vt 0.867645 - dit_step10_xt 0.996729 - dit_step15_vt_cond 0.897652 - dit_step15_vt 0.762297 - dit_step15_xt 0.987703 - dit_step20_vt_cond 0.812139 - dit_step20_vt 0.674413 - dit_step20_xt 0.968584 - dit_step25_vt_cond 0.720133 - dit_step25_vt 0.601775 - dit_step25_xt 0.939427 - dit_step30_vt_cond 0.642645 - dit_step30_vt 0.556864 - dit_step30_xt 0.903368 - dit_step35_vt_cond 0.597472 - dit_step35_vt 0.521858 - dit_step35_xt 0.867364 - dit_step40_vt_cond 0.605191 - dit_step40_vt 0.523006 - dit_step40_xt 0.837585 - dit_step45_vt_cond 0.683627 - dit_step45_vt 0.601542 - dit_step45_xt 0.818529 - dit_step49_vt_cond 0.757030 - dit_step49_vt 0.685266 - dit_x0 0.811821 - vae_audio 0.591982 - vae_audio (STFT cosine) 0.747614 + dit_step5_vt_cond 0.978548 + dit_step5_vt 0.903995 + dit_step5_xt 0.999251 + dit_step10_vt_cond 0.949676 + dit_step10_vt 0.866414 + dit_step10_xt 0.996103 + dit_step15_vt_cond 0.890112 + dit_step15_vt 0.755968 + dit_step15_xt 0.986117 + dit_step20_vt_cond 0.800524 + dit_step20_vt 0.668617 + dit_step20_xt 0.965883 + dit_step25_vt_cond 0.715616 + dit_step25_vt 0.707363 + dit_step25_xt 0.936566 + dit_step30_vt_cond 0.651806 + dit_step30_vt 0.573252 + dit_step30_xt 0.901106 + dit_step35_vt_cond 0.613517 + dit_step35_vt 0.548023 + dit_step35_xt 0.866538 + dit_step40_vt_cond 0.617661 + dit_step40_vt 0.531763 + dit_step40_xt 0.837556 + dit_step45_vt_cond 0.690489 + dit_step45_vt 0.608902 + dit_step45_xt 0.819015 + dit_step49_vt_cond 0.760344 + dit_step49_vt 0.689227 + dit_x0 0.812918 + vae_audio 0.596607 + vae_audio (STFT cosine) 0.752876 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999989 0.053618 0.003814 -0.002076 0.980489 -0.001741 0.980402 - dit_step5_xt 0.999468 0.487150 0.021337 -0.008499 0.891669 -0.007143 0.887999 - dit_step10_xt 0.996729 1.227563 0.049393 -0.015713 0.820441 -0.012603 0.811299 - dit_step15_xt 0.987703 1.842653 0.092167 -0.023137 0.773637 -0.018114 0.745268 - dit_step20_xt 0.968584 2.400791 0.145872 -0.030092 0.756982 -0.023808 0.699582 - dit_step25_xt 0.939427 3.105427 0.205593 -0.036934 0.769300 -0.029311 0.679278 - dit_step30_xt 0.903368 3.847515 0.270348 -0.043779 0.808295 -0.035027 0.685262 - dit_step35_xt 0.867364 4.580956 0.337341 -0.050240 0.870802 -0.040716 0.717195 - dit_step40_xt 0.837585 5.449077 0.404176 -0.056546 0.952457 -0.046462 0.771853 - dit_step45_xt 0.818529 6.481643 0.466515 -0.062895 1.047753 -0.052475 0.843036 + dit_step5_xt 0.999251 0.748318 0.025536 -0.008766 0.893415 -0.007143 0.887999 + dit_step10_xt 0.996103 1.428011 0.054273 -0.016368 0.822729 -0.012603 0.811299 + dit_step15_xt 0.986117 2.055885 0.098667 -0.024122 0.777367 -0.018114 0.745268 + dit_step20_xt 0.965883 2.750473 0.153407 -0.031399 0.762304 -0.023808 0.699582 + dit_step25_xt 0.936566 3.458536 0.209270 -0.038856 0.768389 -0.029311 0.679278 + dit_step30_xt 0.901106 4.182745 0.271563 -0.045971 0.805686 -0.035027 0.685262 + dit_step35_xt 0.866538 4.941256 0.336049 -0.053191 0.866756 -0.040716 0.717195 + dit_step40_xt 0.837556 5.867188 0.401823 -0.059864 0.948138 -0.046462 0.771853 + dit_step45_xt 0.819015 6.961776 0.463382 -0.066566 1.043107 -0.052475 0.843036 diff --git a/tests/Vulkan_Q8_0.log b/tests/Vulkan_Q8_0.log index 2d981f1..9262047 100644 --- a/tests/Vulkan_Q8_0.log +++ b/tests/Vulkan_Q8_0.log @@ -27,31 +27,31 @@ Using precomputed LM hints hidden_after_layer23 0.993183 dit_step0_vt 0.973885 dit_step0_xt 0.999943 - dit_step1_vt 0.914787 - dit_step1_xt 0.999631 - dit_step2_vt 0.911658 - dit_step2_xt 0.998534 - dit_step3_vt 0.912117 - dit_step3_xt 0.995827 - dit_step4_vt 0.905709 - dit_step4_xt 0.989441 - dit_step5_vt 0.896027 - dit_step5_xt 0.974507 - dit_step6_vt 0.885453 - dit_step6_xt 0.945533 - dit_step7_vt 0.869020 - dit_x0 0.904391 - vae_audio 0.742908 - vae_audio (STFT cosine) 0.897333 + dit_step1_vt 0.915468 + dit_step1_xt 0.999633 + dit_step2_vt 0.912211 + dit_step2_xt 0.998544 + dit_step3_vt 0.912707 + dit_step3_xt 0.995860 + dit_step4_vt 0.906019 + dit_step4_xt 0.989505 + dit_step5_vt 0.896537 + dit_step5_xt 0.974659 + dit_step6_vt 0.886047 + dit_step6_xt 0.945866 + dit_step7_vt 0.869793 + dit_x0 0.905017 + vae_audio 0.746037 + vae_audio (STFT cosine) 0.898352 [Turbo] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999943 0.140034 0.006943 -0.002318 0.973036 -0.002342 0.972003 - dit_step1_xt 0.999631 0.423617 0.018111 -0.005257 0.943022 -0.005313 0.941730 - dit_step2_xt 0.998534 0.842476 0.034652 -0.009211 0.910288 -0.009311 0.908527 - dit_step3_xt 0.995827 1.522163 0.055911 -0.014627 0.875179 -0.014577 0.873624 - dit_step4_xt 0.989441 2.346339 0.085700 -0.021794 0.842353 -0.021660 0.841995 - dit_step5_xt 0.974507 3.387154 0.131251 -0.032210 0.822385 -0.032109 0.824593 - dit_step6_xt 0.945533 4.810903 0.200447 -0.047267 0.846776 -0.046482 0.855546 + dit_step1_xt 0.999633 0.423125 0.018056 -0.005257 0.943026 -0.005313 0.941730 + dit_step2_xt 0.998544 0.841908 0.034537 -0.009209 0.910286 -0.009311 0.908527 + dit_step3_xt 0.995860 1.521911 0.055719 -0.014626 0.875169 -0.014577 0.873624 + dit_step4_xt 0.989505 2.346452 0.085477 -0.021803 0.842334 -0.021660 0.841995 + dit_step5_xt 0.974659 3.387389 0.130921 -0.032225 0.822365 -0.032109 0.824593 + dit_step6_xt 0.945866 4.812943 0.199910 -0.047290 0.846751 -0.046482 0.855546 [SFT] steps=50, shift=1.0, CFG=7.0 | acestep-v15-sft-Q8_0.gguf [GGML] Running acestep-v15-sft-Q8_0.gguf... [GGML] Done, 233 dump files @@ -84,47 +84,47 @@ Using precomputed LM hints dit_step0_vt_uncond 0.964971 dit_step0_vt 0.982622 dit_step0_xt 0.999990 - dit_step5_vt_cond 0.975690 - dit_step5_vt 0.894165 - dit_step5_xt 0.999156 - dit_step10_vt_cond 0.947326 - dit_step10_vt 0.837964 - dit_step10_xt 0.996080 - dit_step15_vt_cond 0.888846 - dit_step15_vt 0.758028 - dit_step15_xt 0.986787 - dit_step20_vt_cond 0.806829 - dit_step20_vt 0.772109 - dit_step20_xt 0.968575 - dit_step25_vt_cond 0.738761 - dit_step25_vt 0.607592 - dit_step25_xt 0.941107 - dit_step30_vt_cond 0.679957 - dit_step30_vt 0.608037 - dit_step30_xt 0.907446 - dit_step35_vt_cond 0.642501 - dit_step35_vt 0.558733 - dit_step35_xt 0.873485 - dit_step40_vt_cond 0.639589 - dit_step40_vt 0.546906 - dit_step40_xt 0.845121 - dit_step45_vt_cond 0.704781 - dit_step45_vt 0.616522 - dit_step45_xt 0.826863 - dit_step49_vt_cond 0.769364 - dit_step49_vt 0.691657 - dit_x0 0.820339 - vae_audio 0.580466 - vae_audio (STFT cosine) 0.722894 + dit_step5_vt_cond 0.978187 + dit_step5_vt 0.910806 + dit_step5_xt 0.999338 + dit_step10_vt_cond 0.948119 + dit_step10_vt 0.856732 + dit_step10_xt 0.996258 + dit_step15_vt_cond 0.885149 + dit_step15_vt 0.741011 + dit_step15_xt 0.986353 + dit_step20_vt_cond 0.792343 + dit_step20_vt 0.735701 + dit_step20_xt 0.966995 + dit_step25_vt_cond 0.713669 + dit_step25_vt 0.604646 + dit_step25_xt 0.937523 + dit_step30_vt_cond 0.654759 + dit_step30_vt 0.575313 + dit_step30_xt 0.901384 + dit_step35_vt_cond 0.616330 + dit_step35_vt 0.533322 + dit_step35_xt 0.865098 + dit_step40_vt_cond 0.615497 + dit_step40_vt 0.525598 + dit_step40_xt 0.834978 + dit_step45_vt_cond 0.687607 + dit_step45_vt 0.600947 + dit_step45_xt 0.816193 + dit_step49_vt_cond 0.757023 + dit_step49_vt 0.678778 + dit_x0 0.809822 + vae_audio 0.552742 + vae_audio (STFT cosine) 0.704247 [SFT] Error growth GGML vs Python stage cos max_err mean_err mean_A std_A mean_B std_B dit_step0_xt 0.999990 0.053120 0.003694 -0.002028 0.980340 -0.001741 0.980402 - dit_step5_xt 0.999156 0.748809 0.027232 -0.008196 0.893925 -0.007143 0.887999 - dit_step10_xt 0.996080 1.270393 0.054585 -0.015100 0.823065 -0.012603 0.811299 - dit_step15_xt 0.986787 1.873581 0.096876 -0.021630 0.778746 -0.018114 0.745268 - dit_step20_xt 0.968575 2.677009 0.145646 -0.028226 0.757788 -0.023808 0.699582 - dit_step25_xt 0.941107 3.356963 0.201194 -0.034512 0.767431 -0.029311 0.679278 - dit_step30_xt 0.907446 4.082834 0.260819 -0.040761 0.802227 -0.035027 0.685262 - dit_step35_xt 0.873485 4.781623 0.322767 -0.046876 0.860549 -0.040716 0.717195 - dit_step40_xt 0.845121 5.661494 0.385688 -0.052923 0.939761 -0.046462 0.771853 - dit_step45_xt 0.826863 6.725895 0.444910 -0.058926 1.033490 -0.052475 0.843036 + dit_step5_xt 0.999338 0.528079 0.024091 -0.008167 0.891761 -0.007143 0.887999 + dit_step10_xt 0.996258 1.260570 0.054251 -0.014905 0.821682 -0.012603 0.811299 + dit_step15_xt 0.986353 1.896362 0.099359 -0.021353 0.777987 -0.018114 0.745268 + dit_step20_xt 0.966995 2.558488 0.150921 -0.027607 0.759790 -0.023808 0.699582 + dit_step25_xt 0.937523 3.268598 0.209264 -0.033645 0.770984 -0.029311 0.679278 + dit_step30_xt 0.901384 3.973653 0.271000 -0.039796 0.805477 -0.035027 0.685262 + dit_step35_xt 0.865098 4.656569 0.335194 -0.045754 0.864460 -0.040716 0.717195 + dit_step40_xt 0.834978 5.519352 0.400309 -0.051630 0.944399 -0.046462 0.771853 + dit_step45_xt 0.816193 6.556623 0.460383 -0.057408 1.036260 -0.052475 0.843036