From a540a5fd10b2ba661161aa5c763a8c6822bca4a1 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Sun, 15 Mar 2026 20:45:00 +0100 Subject: [PATCH] fix simul-kv audio trim bug, add 1.7B v2 alignment heads --- .../alignment_heads_qwen3_asr_1.7B_v2.json | 3292 +++++++++++++++++ whisperlivekit/qwen3_simul_kv.py | 7 +- 2 files changed, 3297 insertions(+), 2 deletions(-) create mode 100644 scripts/alignment_heads_qwen3_asr_1.7B_v2.json diff --git a/scripts/alignment_heads_qwen3_asr_1.7B_v2.json b/scripts/alignment_heads_qwen3_asr_1.7B_v2.json new file mode 100644 index 0000000..e1915d9 --- /dev/null +++ b/scripts/alignment_heads_qwen3_asr_1.7B_v2.json @@ -0,0 +1,3292 @@ +{ + "model": "Qwen/Qwen3-ASR-1.7B", + "language": "English", + "num_layers": 28, + "num_heads": 16, + "num_kv_heads": 8, + "num_samples": 100, + "total_alignable_tokens": 2020, + "ts_threshold": 0.1, + "ts_matrix": [ + [ + 0.06930693069306931, + 0.08762376237623762, + 0.09207920792079208, + 0.10198019801980197, + 0.03811881188118812, + 0.06584158415841584, + 0.020792079207920793, + 0.055445544554455446, + 0.020297029702970298, + 0.061386138613861385, + 0.13514851485148516, + 0.13415841584158417, + 0.031188118811881188, + 0.024752475247524754, + 0.0504950495049505, + 0.03861386138613861 + ], + [ + 0.1400990099009901, + 0.12623762376237624, + 0.07277227722772277, + 0.12227722772277227, + 0.04603960396039604, + 0.024257425742574258, + 0.04554455445544554, + 0.04801980198019802, + 0.4376237623762376, + 0.03712871287128713, + 0.04504950495049505, + 0.02920792079207921, + 0.015841584158415842, + 0.04801980198019802, + 0.15, + 0.5396039603960396 + ], + [ + 0.08514851485148515, + 0.05297029702970297, + 0.30594059405940593, + 0.7336633663366336, + 0.04356435643564356, + 0.03415841584158416, + 0.1707920792079208, + 0.03861386138613861, + 0.37475247524752475, + 0.05495049504950495, + 0.7242574257425742, + 0.748019801980198, + 0.07227722772277227, + 0.06980198019801981, + 0.33564356435643566, + 0.04950495049504951 + ], + [ + 0.053465346534653464, + 0.10396039603960396, + 0.15, + 0.1613861386138614, + 0.26683168316831685, + 0.0797029702970297, + 0.06683168316831684, + 0.03910891089108911, + 0.10643564356435643, + 0.07871287128712871, + 0.7623762376237624, + 0.6787128712871288, + 0.1, + 0.3405940594059406, + 0.20643564356435642, + 0.1797029702970297 + ], + [ + 0.03514851485148515, + 0.03712871287128713, + 0.10841584158415841, + 0.08415841584158416, + 0.10445544554455445, + 0.05297029702970297, + 0.030198019801980197, + 0.08613861386138613, + 0.11683168316831684, + 0.07475247524752475, + 0.41237623762376235, + 0.21386138613861386, + 0.4915841584158416, + 0.7183168316831683, + 0.07821782178217822, + 0.2876237623762376 + ], + [ + 0.11584158415841585, + 0.11386138613861387, + 0.05297029702970297, + 0.04504950495049505, + 0.12376237623762376, + 0.2698019801980198, + 0.11584158415841585, + 0.12871287128712872, + 0.1311881188118812, + 0.12079207920792079, + 0.48366336633663365, + 0.11534653465346535, + 0.04356435643564356, + 0.03415841584158416, + 0.1297029702970297, + 0.34405940594059403 + ], + [ + 0.2693069306930693, + 0.22772277227722773, + 0.05148514851485148, + 0.11386138613861387, + 0.19752475247524753, + 0.14257425742574256, + 0.7980198019801981, + 0.7945544554455446, + 0.019306930693069307, + 0.2524752475247525, + 0.7801980198019802, + 0.7579207920792079, + 0.7188118811881188, + 0.755940594059406, + 0.18465346534653465, + 0.14504950495049504 + ], + [ + 0.08762376237623762, + 0.03217821782178218, + 0.06435643564356436, + 0.07376237623762376, + 0.33861386138613864, + 0.17227722772277226, + 0.18762376237623762, + 0.05297029702970297, + 0.06584158415841584, + 0.030693069306930693, + 0.11485148514851486, + 0.13514851485148516, + 0.14356435643564355, + 0.23613861386138613, + 0.14504950495049504, + 0.09356435643564356 + ], + [ + 0.11287128712871287, + 0.6831683168316832, + 0.11485148514851486, + 0.053465346534653464, + 0.6737623762376238, + 0.3811881188118812, + 0.2693069306930693, + 0.31633663366336634, + 0.060396039603960394, + 0.09554455445544555, + 0.19603960396039605, + 0.16435643564356436, + 0.09702970297029703, + 0.20396039603960395, + 0.3193069306930693, + 0.09900990099009901 + ], + [ + 0.2, + 0.04455445544554455, + 0.17425742574257425, + 0.0702970297029703, + 0.19752475247524753, + 0.20445544554455444, + 0.026732673267326732, + 0.18267326732673267, + 0.25594059405940595, + 0.250990099009901, + 0.17722772277227722, + 0.08613861386138613, + 0.5618811881188119, + 0.44504950495049506, + 0.0594059405940594, + 0.008415841584158416 + ], + [ + 0.031188118811881188, + 0.02128712871287129, + 0.2193069306930693, + 0.2905940594059406, + 0.1915841584158416, + 0.3608910891089109, + 0.019306930693069307, + 0.032673267326732675, + 0.1311881188118812, + 0.15495049504950495, + 0.08168316831683169, + 0.1702970297029703, + 0.10297029702970296, + 0.1405940594059406, + 0.04108910891089109, + 0.08514851485148515 + ], + [ + 0.20495049504950494, + 0.25792079207920793, + 0.8356435643564356, + 0.7930693069306931, + 0.1301980198019802, + 0.19603960396039605, + 0.1910891089108911, + 0.29158415841584157, + 0.21188118811881188, + 0.09851485148514852, + 0.33960396039603963, + 0.6851485148514852, + 0.801980198019802, + 0.8272277227722772, + 0.04257425742574258, + 0.09653465346534654 + ], + [ + 0.42277227722772276, + 0.43316831683168316, + 0.17524752475247524, + 0.27574257425742577, + 0.07821782178217822, + 0.1405940594059406, + 0.6059405940594059, + 0.08316831683168317, + 0.38811881188118813, + 0.12079207920792079, + 0.2613861386138614, + 0.0297029702970297, + 0.1787128712871287, + 0.13217821782178218, + 0.24257425742574257, + 0.20594059405940593 + ], + [ + 0.7118811881188118, + 0.22722772277227724, + 0.2306930693069307, + 0.17376237623762375, + 0.45742574257425744, + 0.13910891089108912, + 0.5450495049504951, + 0.5905940594059406, + 0.034653465346534656, + 0.05841584158415842, + 0.1193069306930693, + 0.7569306930693069, + 0.020297029702970298, + 0.02821782178217822, + 0.6861386138613862, + 0.5564356435643565 + ], + [ + 0.31584158415841584, + 0.10594059405940594, + 0.19851485148514852, + 0.09108910891089109, + 0.031188118811881188, + 0.25, + 0.22326732673267327, + 0.16534653465346535, + 0.05693069306930693, + 0.0797029702970297, + 0.24207920792079207, + 0.27623762376237626, + 0.4910891089108911, + 0.25742574257425743, + 0.804950495049505, + 0.8163366336633663 + ], + [ + 0.14257425742574256, + 0.2316831683168317, + 0.22821782178217823, + 0.13564356435643565, + 0.19752475247524753, + 0.2202970297029703, + 0.2400990099009901, + 0.1311881188118812, + 0.024752475247524754, + 0.16980198019801981, + 0.39752475247524754, + 0.12623762376237624, + 0.0400990099009901, + 0.031683168316831684, + 0.17574257425742573, + 0.13663366336633664 + ], + [ + 0.0400990099009901, + 0.04603960396039604, + 0.10297029702970296, + 0.1792079207920792, + 0.12821782178217822, + 0.11732673267326732, + 0.21732673267326733, + 0.5603960396039604, + 0.2717821782178218, + 0.3212871287128713, + 0.7108910891089109, + 0.6034653465346534, + 0.4024752475247525, + 0.5227722772277228, + 0.8138613861386138, + 0.7400990099009901 + ], + [ + 0.346039603960396, + 0.35, + 0.2717821782178218, + 0.23465346534653464, + 0.07623762376237624, + 0.03762376237623762, + 0.03663366336633663, + 0.10594059405940594, + 0.4212871287128713, + 0.3123762376237624, + 0.30495049504950494, + 0.2376237623762376, + 0.30495049504950494, + 0.45, + 0.13366336633663367, + 0.09603960396039604 + ], + [ + 0.040594059405940595, + 0.04504950495049505, + 0.45742574257425744, + 0.695049504950495, + 0.31287128712871287, + 0.7267326732673267, + 0.22623762376237624, + 0.1806930693069307, + 0.10792079207920792, + 0.08168316831683169, + 0.4321782178217822, + 0.2376237623762376, + 0.04207920792079208, + 0.2584158415841584, + 0.0896039603960396, + 0.2396039603960396 + ], + [ + 0.16485148514851486, + 0.22772277227722773, + 0.39752475247524754, + 0.6272277227722772, + 0.49306930693069306, + 0.7024752475247524, + 0.20396039603960395, + 0.7663366336633664, + 0.4871287128712871, + 0.5792079207920792, + 0.062376237623762376, + 0.08118811881188119, + 0.43613861386138614, + 0.4524752475247525, + 0.020297029702970298, + 0.03712871287128713 + ], + [ + 0.07574257425742574, + 0.10247524752475247, + 0.17524752475247524, + 0.8257425742574257, + 0.43316831683168316, + 0.1504950495049505, + 0.4495049504950495, + 0.5752475247524752, + 0.3806930693069307, + 0.0504950495049505, + 0.553960396039604, + 0.650990099009901, + 0.3801980198019802, + 0.1915841584158416, + 0.699009900990099, + 0.6415841584158416 + ], + [ + 0.6039603960396039, + 0.5702970297029702, + 0.11534653465346535, + 0.06435643564356436, + 0.3014851485148515, + 0.10445544554455445, + 0.24356435643564356, + 0.1618811881188119, + 0.7831683168316832, + 0.8014851485148515, + 0.2851485148514851, + 0.4153465346534653, + 0.593069306930693, + 0.47128712871287126, + 0.39455445544554457, + 0.4420792079207921 + ], + [ + 0.21633663366336633, + 0.3188118811881188, + 0.10148514851485149, + 0.04356435643564356, + 0.35148514851485146, + 0.2727722772277228, + 0.3103960396039604, + 0.7054455445544554, + 0.6391089108910891, + 0.6767326732673268, + 0.27673267326732676, + 0.2965346534653465, + 0.6638613861386139, + 0.5861386138613861, + 0.25693069306930694, + 0.09504950495049505 + ], + [ + 0.5618811881188119, + 0.5797029702970297, + 0.053465346534653464, + 0.06831683168316832, + 0.6648514851485149, + 0.26683168316831685, + 0.3183168316831683, + 0.6861386138613862, + 0.0504950495049505, + 0.1292079207920792, + 0.4089108910891089, + 0.3410891089108911, + 0.2376237623762376, + 0.1297029702970297, + 0.6871287128712872, + 0.6801980198019802 + ], + [ + 0.0400990099009901, + 0.028712871287128714, + 0.05841584158415842, + 0.09504950495049505, + 0.349009900990099, + 0.6831683168316832, + 0.6118811881188119, + 0.6712871287128713, + 0.06534653465346535, + 0.05495049504950495, + 0.6074257425742574, + 0.6435643564356436, + 0.651980198019802, + 0.6544554455445545, + 0.6821782178217822, + 0.6737623762376238 + ], + [ + 0.10693069306930693, + 0.08217821782178218, + 0.03217821782178218, + 0.05742574257425743, + 0.6292079207920792, + 0.697029702970297, + 0.6485148514851485, + 0.656930693069307, + 0.3227722772277228, + 0.5524752475247525, + 0.6331683168316832, + 0.6633663366336634, + 0.6485148514851485, + 0.6460396039603961, + 0.6341584158415842, + 0.32772277227722774 + ], + [ + 0.4623762376237624, + 0.3207920792079208, + 0.6514851485148515, + 0.6306930693069307, + 0.6292079207920792, + 0.5876237623762376, + 0.16534653465346535, + 0.5935643564356435, + 0.6673267326732674, + 0.25594059405940595, + 0.027722772277227723, + 0.14603960396039603, + 0.053465346534653464, + 0.05099009900990099, + 0.6277227722772277, + 0.5801980198019802 + ], + [ + 0.09851485148514852, + 0.1004950495049505, + 0.09207920792079208, + 0.09702970297029703, + 0.08762376237623762, + 0.06633663366336634, + 0.16287128712871288, + 0.10297029702970296, + 0.033663366336633666, + 0.07425742574257425, + 0.10742574257425742, + 0.10792079207920792, + 0.048514851485148516, + 0.07524752475247524, + 0.10742574257425742, + 0.09158415841584158 + ] + ], + "alignment_heads": [ + { + "layer": 11, + "head": 2, + "ts": 0.8356 + }, + { + "layer": 11, + "head": 13, + "ts": 0.8272 + }, + { + "layer": 20, + "head": 3, + "ts": 0.8257 + }, + { + "layer": 14, + "head": 15, + "ts": 0.8163 + }, + { + "layer": 16, + "head": 14, + "ts": 0.8139 + }, + { + "layer": 14, + "head": 14, + "ts": 0.805 + }, + { + "layer": 11, + "head": 12, + "ts": 0.802 + }, + { + "layer": 21, + "head": 9, + "ts": 0.8015 + }, + { + "layer": 6, + "head": 6, + "ts": 0.798 + }, + { + "layer": 6, + "head": 7, + "ts": 0.7946 + }, + { + "layer": 11, + "head": 3, + "ts": 0.7931 + }, + { + "layer": 21, + "head": 8, + "ts": 0.7832 + }, + { + "layer": 6, + "head": 10, + "ts": 0.7802 + }, + { + "layer": 19, + "head": 7, + "ts": 0.7663 + }, + { + "layer": 3, + "head": 10, + "ts": 0.7624 + }, + { + "layer": 6, + "head": 11, + "ts": 0.7579 + }, + { + "layer": 13, + "head": 11, + "ts": 0.7569 + }, + { + "layer": 6, + "head": 13, + "ts": 0.7559 + }, + { + "layer": 2, + "head": 11, + "ts": 0.748 + }, + { + "layer": 16, + "head": 15, + "ts": 0.7401 + }, + { + "layer": 2, + "head": 3, + "ts": 0.7337 + }, + { + "layer": 18, + "head": 5, + "ts": 0.7267 + }, + { + "layer": 2, + "head": 10, + "ts": 0.7243 + }, + { + "layer": 6, + "head": 12, + "ts": 0.7188 + }, + { + "layer": 4, + "head": 13, + "ts": 0.7183 + }, + { + "layer": 13, + "head": 0, + "ts": 0.7119 + }, + { + "layer": 16, + "head": 10, + "ts": 0.7109 + }, + { + "layer": 22, + "head": 7, + "ts": 0.7054 + }, + { + "layer": 19, + "head": 5, + "ts": 0.7025 + }, + { + "layer": 20, + "head": 14, + "ts": 0.699 + }, + { + "layer": 25, + "head": 5, + "ts": 0.697 + }, + { + "layer": 18, + "head": 3, + "ts": 0.695 + }, + { + "layer": 23, + "head": 14, + "ts": 0.6871 + }, + { + "layer": 13, + "head": 14, + "ts": 0.6861 + }, + { + "layer": 23, + "head": 7, + "ts": 0.6861 + }, + { + "layer": 11, + "head": 11, + "ts": 0.6851 + }, + { + "layer": 8, + "head": 1, + "ts": 0.6832 + }, + { + "layer": 24, + "head": 5, + "ts": 0.6832 + }, + { + "layer": 24, + "head": 14, + "ts": 0.6822 + }, + { + "layer": 23, + "head": 15, + "ts": 0.6802 + }, + { + "layer": 3, + "head": 11, + "ts": 0.6787 + }, + { + "layer": 22, + "head": 9, + "ts": 0.6767 + }, + { + "layer": 8, + "head": 4, + "ts": 0.6738 + }, + { + "layer": 24, + "head": 15, + "ts": 0.6738 + }, + { + "layer": 24, + "head": 7, + "ts": 0.6713 + }, + { + "layer": 26, + "head": 8, + "ts": 0.6673 + }, + { + "layer": 23, + "head": 4, + "ts": 0.6649 + }, + { + "layer": 22, + "head": 12, + "ts": 0.6639 + }, + { + "layer": 25, + "head": 11, + "ts": 0.6634 + }, + { + "layer": 25, + "head": 7, + "ts": 0.6569 + }, + { + "layer": 24, + "head": 13, + "ts": 0.6545 + }, + { + "layer": 24, + "head": 12, + "ts": 0.652 + }, + { + "layer": 26, + "head": 2, + "ts": 0.6515 + }, + { + "layer": 20, + "head": 11, + "ts": 0.651 + }, + { + "layer": 25, + "head": 6, + "ts": 0.6485 + }, + { + "layer": 25, + "head": 12, + "ts": 0.6485 + }, + { + "layer": 25, + "head": 13, + "ts": 0.646 + }, + { + "layer": 24, + "head": 11, + "ts": 0.6436 + }, + { + "layer": 20, + "head": 15, + "ts": 0.6416 + }, + { + "layer": 22, + "head": 8, + "ts": 0.6391 + }, + { + "layer": 25, + "head": 14, + "ts": 0.6342 + }, + { + "layer": 25, + "head": 10, + "ts": 0.6332 + }, + { + "layer": 26, + "head": 3, + "ts": 0.6307 + }, + { + "layer": 25, + "head": 4, + "ts": 0.6292 + }, + { + "layer": 26, + "head": 4, + "ts": 0.6292 + }, + { + "layer": 26, + "head": 14, + "ts": 0.6277 + }, + { + "layer": 19, + "head": 3, + "ts": 0.6272 + }, + { + "layer": 24, + "head": 6, + "ts": 0.6119 + }, + { + "layer": 24, + "head": 10, + "ts": 0.6074 + }, + { + "layer": 12, + "head": 6, + "ts": 0.6059 + }, + { + "layer": 21, + "head": 0, + "ts": 0.604 + }, + { + "layer": 16, + "head": 11, + "ts": 0.6035 + }, + { + "layer": 26, + "head": 7, + "ts": 0.5936 + }, + { + "layer": 21, + "head": 12, + "ts": 0.5931 + }, + { + "layer": 13, + "head": 7, + "ts": 0.5906 + }, + { + "layer": 26, + "head": 5, + "ts": 0.5876 + }, + { + "layer": 22, + "head": 13, + "ts": 0.5861 + }, + { + "layer": 26, + "head": 15, + "ts": 0.5802 + }, + { + "layer": 23, + "head": 1, + "ts": 0.5797 + }, + { + "layer": 19, + "head": 9, + "ts": 0.5792 + }, + { + "layer": 20, + "head": 7, + "ts": 0.5752 + }, + { + "layer": 21, + "head": 1, + "ts": 0.5703 + }, + { + "layer": 9, + "head": 12, + "ts": 0.5619 + }, + { + "layer": 23, + "head": 0, + "ts": 0.5619 + }, + { + "layer": 16, + "head": 7, + "ts": 0.5604 + }, + { + "layer": 13, + "head": 15, + "ts": 0.5564 + }, + { + "layer": 20, + "head": 10, + "ts": 0.554 + }, + { + "layer": 25, + "head": 9, + "ts": 0.5525 + }, + { + "layer": 13, + "head": 6, + "ts": 0.545 + }, + { + "layer": 1, + "head": 15, + "ts": 0.5396 + }, + { + "layer": 16, + "head": 13, + "ts": 0.5228 + }, + { + "layer": 19, + "head": 4, + "ts": 0.4931 + }, + { + "layer": 4, + "head": 12, + "ts": 0.4916 + }, + { + "layer": 14, + "head": 12, + "ts": 0.4911 + }, + { + "layer": 19, + "head": 8, + "ts": 0.4871 + }, + { + "layer": 5, + "head": 10, + "ts": 0.4837 + }, + { + "layer": 21, + "head": 13, + "ts": 0.4713 + }, + { + "layer": 26, + "head": 0, + "ts": 0.4624 + }, + { + "layer": 13, + "head": 4, + "ts": 0.4574 + }, + { + "layer": 18, + "head": 2, + "ts": 0.4574 + }, + { + "layer": 19, + "head": 13, + "ts": 0.4525 + }, + { + "layer": 17, + "head": 13, + "ts": 0.45 + }, + { + "layer": 20, + "head": 6, + "ts": 0.4495 + }, + { + "layer": 9, + "head": 13, + "ts": 0.445 + }, + { + "layer": 21, + "head": 15, + "ts": 0.4421 + }, + { + "layer": 1, + "head": 8, + "ts": 0.4376 + }, + { + "layer": 19, + "head": 12, + "ts": 0.4361 + }, + { + "layer": 12, + "head": 1, + "ts": 0.4332 + }, + { + "layer": 20, + "head": 4, + "ts": 0.4332 + }, + { + "layer": 18, + "head": 10, + "ts": 0.4322 + }, + { + "layer": 12, + "head": 0, + "ts": 0.4228 + }, + { + "layer": 17, + "head": 8, + "ts": 0.4213 + }, + { + "layer": 21, + "head": 11, + "ts": 0.4153 + }, + { + "layer": 4, + "head": 10, + "ts": 0.4124 + }, + { + "layer": 23, + "head": 10, + "ts": 0.4089 + }, + { + "layer": 16, + "head": 12, + "ts": 0.4025 + }, + { + "layer": 15, + "head": 10, + "ts": 0.3975 + }, + { + "layer": 19, + "head": 2, + "ts": 0.3975 + }, + { + "layer": 21, + "head": 14, + "ts": 0.3946 + }, + { + "layer": 12, + "head": 8, + "ts": 0.3881 + }, + { + "layer": 8, + "head": 5, + "ts": 0.3812 + }, + { + "layer": 20, + "head": 8, + "ts": 0.3807 + }, + { + "layer": 20, + "head": 12, + "ts": 0.3802 + }, + { + "layer": 2, + "head": 8, + "ts": 0.3748 + }, + { + "layer": 10, + "head": 5, + "ts": 0.3609 + }, + { + "layer": 22, + "head": 4, + "ts": 0.3515 + }, + { + "layer": 17, + "head": 1, + "ts": 0.35 + }, + { + "layer": 24, + "head": 4, + "ts": 0.349 + }, + { + "layer": 17, + "head": 0, + "ts": 0.346 + }, + { + "layer": 5, + "head": 15, + "ts": 0.3441 + }, + { + "layer": 23, + "head": 11, + "ts": 0.3411 + }, + { + "layer": 3, + "head": 13, + "ts": 0.3406 + }, + { + "layer": 11, + "head": 10, + "ts": 0.3396 + }, + { + "layer": 7, + "head": 4, + "ts": 0.3386 + }, + { + "layer": 2, + "head": 14, + "ts": 0.3356 + }, + { + "layer": 25, + "head": 15, + "ts": 0.3277 + }, + { + "layer": 25, + "head": 8, + "ts": 0.3228 + }, + { + "layer": 16, + "head": 9, + "ts": 0.3213 + }, + { + "layer": 26, + "head": 1, + "ts": 0.3208 + }, + { + "layer": 8, + "head": 14, + "ts": 0.3193 + }, + { + "layer": 22, + "head": 1, + "ts": 0.3188 + }, + { + "layer": 23, + "head": 6, + "ts": 0.3183 + }, + { + "layer": 8, + "head": 7, + "ts": 0.3163 + }, + { + "layer": 14, + "head": 0, + "ts": 0.3158 + }, + { + "layer": 18, + "head": 4, + "ts": 0.3129 + }, + { + "layer": 17, + "head": 9, + "ts": 0.3124 + }, + { + "layer": 22, + "head": 6, + "ts": 0.3104 + }, + { + "layer": 2, + "head": 2, + "ts": 0.3059 + }, + { + "layer": 17, + "head": 10, + "ts": 0.305 + }, + { + "layer": 17, + "head": 12, + "ts": 0.305 + }, + { + "layer": 21, + "head": 4, + "ts": 0.3015 + }, + { + "layer": 22, + "head": 11, + "ts": 0.2965 + }, + { + "layer": 11, + "head": 7, + "ts": 0.2916 + }, + { + "layer": 10, + "head": 3, + "ts": 0.2906 + }, + { + "layer": 4, + "head": 15, + "ts": 0.2876 + }, + { + "layer": 21, + "head": 10, + "ts": 0.2851 + }, + { + "layer": 22, + "head": 10, + "ts": 0.2767 + }, + { + "layer": 14, + "head": 11, + "ts": 0.2762 + }, + { + "layer": 12, + "head": 3, + "ts": 0.2757 + }, + { + "layer": 22, + "head": 5, + "ts": 0.2728 + }, + { + "layer": 16, + "head": 8, + "ts": 0.2718 + }, + { + "layer": 17, + "head": 2, + "ts": 0.2718 + }, + { + "layer": 5, + "head": 5, + "ts": 0.2698 + }, + { + "layer": 6, + "head": 0, + "ts": 0.2693 + }, + { + "layer": 8, + "head": 6, + "ts": 0.2693 + }, + { + "layer": 3, + "head": 4, + "ts": 0.2668 + }, + { + "layer": 23, + "head": 5, + "ts": 0.2668 + }, + { + "layer": 12, + "head": 10, + "ts": 0.2614 + }, + { + "layer": 18, + "head": 13, + "ts": 0.2584 + }, + { + "layer": 11, + "head": 1, + "ts": 0.2579 + }, + { + "layer": 14, + "head": 13, + "ts": 0.2574 + }, + { + "layer": 22, + "head": 14, + "ts": 0.2569 + }, + { + "layer": 9, + "head": 8, + "ts": 0.2559 + }, + { + "layer": 26, + "head": 9, + "ts": 0.2559 + }, + { + "layer": 6, + "head": 9, + "ts": 0.2525 + }, + { + "layer": 9, + "head": 9, + "ts": 0.251 + }, + { + "layer": 14, + "head": 5, + "ts": 0.25 + }, + { + "layer": 21, + "head": 6, + "ts": 0.2436 + }, + { + "layer": 12, + "head": 14, + "ts": 0.2426 + }, + { + "layer": 14, + "head": 10, + "ts": 0.2421 + }, + { + "layer": 15, + "head": 6, + "ts": 0.2401 + }, + { + "layer": 18, + "head": 15, + "ts": 0.2396 + }, + { + "layer": 17, + "head": 11, + "ts": 0.2376 + }, + { + "layer": 18, + "head": 11, + "ts": 0.2376 + }, + { + "layer": 23, + "head": 12, + "ts": 0.2376 + }, + { + "layer": 7, + "head": 13, + "ts": 0.2361 + }, + { + "layer": 17, + "head": 3, + "ts": 0.2347 + }, + { + "layer": 15, + "head": 1, + "ts": 0.2317 + }, + { + "layer": 13, + "head": 2, + "ts": 0.2307 + }, + { + "layer": 15, + "head": 2, + "ts": 0.2282 + }, + { + "layer": 6, + "head": 1, + "ts": 0.2277 + }, + { + "layer": 19, + "head": 1, + "ts": 0.2277 + }, + { + "layer": 13, + "head": 1, + "ts": 0.2272 + }, + { + "layer": 18, + "head": 6, + "ts": 0.2262 + }, + { + "layer": 14, + "head": 6, + "ts": 0.2233 + }, + { + "layer": 15, + "head": 5, + "ts": 0.2203 + }, + { + "layer": 10, + "head": 2, + "ts": 0.2193 + }, + { + "layer": 16, + "head": 6, + "ts": 0.2173 + }, + { + "layer": 22, + "head": 0, + "ts": 0.2163 + }, + { + "layer": 4, + "head": 11, + "ts": 0.2139 + }, + { + "layer": 11, + "head": 8, + "ts": 0.2119 + }, + { + "layer": 3, + "head": 14, + "ts": 0.2064 + }, + { + "layer": 12, + "head": 15, + "ts": 0.2059 + }, + { + "layer": 11, + "head": 0, + "ts": 0.205 + }, + { + "layer": 9, + "head": 5, + "ts": 0.2045 + }, + { + "layer": 8, + "head": 13, + "ts": 0.204 + }, + { + "layer": 19, + "head": 6, + "ts": 0.204 + }, + { + "layer": 9, + "head": 0, + "ts": 0.2 + }, + { + "layer": 14, + "head": 2, + "ts": 0.1985 + }, + { + "layer": 6, + "head": 4, + "ts": 0.1975 + }, + { + "layer": 9, + "head": 4, + "ts": 0.1975 + }, + { + "layer": 15, + "head": 4, + "ts": 0.1975 + }, + { + "layer": 8, + "head": 10, + "ts": 0.196 + }, + { + "layer": 11, + "head": 5, + "ts": 0.196 + }, + { + "layer": 10, + "head": 4, + "ts": 0.1916 + }, + { + "layer": 20, + "head": 13, + "ts": 0.1916 + }, + { + "layer": 11, + "head": 6, + "ts": 0.1911 + }, + { + "layer": 7, + "head": 6, + "ts": 0.1876 + }, + { + "layer": 6, + "head": 14, + "ts": 0.1847 + }, + { + "layer": 9, + "head": 7, + "ts": 0.1827 + }, + { + "layer": 18, + "head": 7, + "ts": 0.1807 + }, + { + "layer": 3, + "head": 15, + "ts": 0.1797 + }, + { + "layer": 16, + "head": 3, + "ts": 0.1792 + }, + { + "layer": 12, + "head": 12, + "ts": 0.1787 + }, + { + "layer": 9, + "head": 10, + "ts": 0.1772 + }, + { + "layer": 15, + "head": 14, + "ts": 0.1757 + }, + { + "layer": 12, + "head": 2, + "ts": 0.1752 + }, + { + "layer": 20, + "head": 2, + "ts": 0.1752 + }, + { + "layer": 9, + "head": 2, + "ts": 0.1743 + }, + { + "layer": 13, + "head": 3, + "ts": 0.1738 + }, + { + "layer": 7, + "head": 5, + "ts": 0.1723 + }, + { + "layer": 2, + "head": 6, + "ts": 0.1708 + }, + { + "layer": 10, + "head": 11, + "ts": 0.1703 + }, + { + "layer": 15, + "head": 9, + "ts": 0.1698 + }, + { + "layer": 14, + "head": 7, + "ts": 0.1653 + }, + { + "layer": 26, + "head": 6, + "ts": 0.1653 + }, + { + "layer": 19, + "head": 0, + "ts": 0.1649 + }, + { + "layer": 8, + "head": 11, + "ts": 0.1644 + }, + { + "layer": 27, + "head": 6, + "ts": 0.1629 + }, + { + "layer": 21, + "head": 7, + "ts": 0.1619 + }, + { + "layer": 3, + "head": 3, + "ts": 0.1614 + }, + { + "layer": 10, + "head": 9, + "ts": 0.155 + }, + { + "layer": 20, + "head": 5, + "ts": 0.1505 + }, + { + "layer": 1, + "head": 14, + "ts": 0.15 + }, + { + "layer": 3, + "head": 2, + "ts": 0.15 + }, + { + "layer": 26, + "head": 11, + "ts": 0.146 + }, + { + "layer": 6, + "head": 15, + "ts": 0.145 + }, + { + "layer": 7, + "head": 14, + "ts": 0.145 + }, + { + "layer": 7, + "head": 12, + "ts": 0.1436 + }, + { + "layer": 6, + "head": 5, + "ts": 0.1426 + }, + { + "layer": 15, + "head": 0, + "ts": 0.1426 + }, + { + "layer": 10, + "head": 13, + "ts": 0.1406 + }, + { + "layer": 12, + "head": 5, + "ts": 0.1406 + }, + { + "layer": 1, + "head": 0, + "ts": 0.1401 + }, + { + "layer": 13, + "head": 5, + "ts": 0.1391 + }, + { + "layer": 15, + "head": 15, + "ts": 0.1366 + }, + { + "layer": 15, + "head": 3, + "ts": 0.1356 + }, + { + "layer": 0, + "head": 10, + "ts": 0.1351 + }, + { + "layer": 7, + "head": 11, + "ts": 0.1351 + }, + { + "layer": 0, + "head": 11, + "ts": 0.1342 + }, + { + "layer": 17, + "head": 14, + "ts": 0.1337 + }, + { + "layer": 12, + "head": 13, + "ts": 0.1322 + }, + { + "layer": 5, + "head": 8, + "ts": 0.1312 + }, + { + "layer": 10, + "head": 8, + "ts": 0.1312 + }, + { + "layer": 15, + "head": 7, + "ts": 0.1312 + }, + { + "layer": 11, + "head": 4, + "ts": 0.1302 + }, + { + "layer": 5, + "head": 14, + "ts": 0.1297 + }, + { + "layer": 23, + "head": 13, + "ts": 0.1297 + }, + { + "layer": 23, + "head": 9, + "ts": 0.1292 + }, + { + "layer": 5, + "head": 7, + "ts": 0.1287 + }, + { + "layer": 16, + "head": 4, + "ts": 0.1282 + }, + { + "layer": 1, + "head": 1, + "ts": 0.1262 + }, + { + "layer": 15, + "head": 11, + "ts": 0.1262 + }, + { + "layer": 5, + "head": 4, + "ts": 0.1238 + }, + { + "layer": 1, + "head": 3, + "ts": 0.1223 + }, + { + "layer": 5, + "head": 9, + "ts": 0.1208 + }, + { + "layer": 12, + "head": 9, + "ts": 0.1208 + }, + { + "layer": 13, + "head": 10, + "ts": 0.1193 + }, + { + "layer": 16, + "head": 5, + "ts": 0.1173 + }, + { + "layer": 4, + "head": 8, + "ts": 0.1168 + }, + { + "layer": 5, + "head": 0, + "ts": 0.1158 + }, + { + "layer": 5, + "head": 6, + "ts": 0.1158 + }, + { + "layer": 5, + "head": 11, + "ts": 0.1153 + }, + { + "layer": 21, + "head": 2, + "ts": 0.1153 + }, + { + "layer": 7, + "head": 10, + "ts": 0.1149 + }, + { + "layer": 8, + "head": 2, + "ts": 0.1149 + }, + { + "layer": 5, + "head": 1, + "ts": 0.1139 + }, + { + "layer": 6, + "head": 3, + "ts": 0.1139 + }, + { + "layer": 8, + "head": 0, + "ts": 0.1129 + }, + { + "layer": 4, + "head": 2, + "ts": 0.1084 + }, + { + "layer": 18, + "head": 8, + "ts": 0.1079 + }, + { + "layer": 27, + "head": 11, + "ts": 0.1079 + }, + { + "layer": 27, + "head": 10, + "ts": 0.1074 + }, + { + "layer": 27, + "head": 14, + "ts": 0.1074 + }, + { + "layer": 25, + "head": 0, + "ts": 0.1069 + }, + { + "layer": 3, + "head": 8, + "ts": 0.1064 + }, + { + "layer": 14, + "head": 1, + "ts": 0.1059 + }, + { + "layer": 17, + "head": 7, + "ts": 0.1059 + }, + { + "layer": 4, + "head": 4, + "ts": 0.1045 + }, + { + "layer": 21, + "head": 5, + "ts": 0.1045 + }, + { + "layer": 3, + "head": 1, + "ts": 0.104 + }, + { + "layer": 10, + "head": 12, + "ts": 0.103 + }, + { + "layer": 16, + "head": 2, + "ts": 0.103 + }, + { + "layer": 27, + "head": 7, + "ts": 0.103 + }, + { + "layer": 20, + "head": 1, + "ts": 0.1025 + }, + { + "layer": 0, + "head": 3, + "ts": 0.102 + }, + { + "layer": 22, + "head": 2, + "ts": 0.1015 + }, + { + "layer": 27, + "head": 1, + "ts": 0.1005 + } + ], + "alignment_heads_compact": [ + [ + 11, + 2 + ], + [ + 11, + 13 + ], + [ + 20, + 3 + ], + [ + 14, + 15 + ], + [ + 16, + 14 + ], + [ + 14, + 14 + ], + [ + 11, + 12 + ], + [ + 21, + 9 + ], + [ + 6, + 6 + ], + [ + 6, + 7 + ], + [ + 11, + 3 + ], + [ + 21, + 8 + ], + [ + 6, + 10 + ], + [ + 19, + 7 + ], + [ + 3, + 10 + ], + [ + 6, + 11 + ], + [ + 13, + 11 + ], + [ + 6, + 13 + ], + [ + 2, + 11 + ], + [ + 16, + 15 + ], + [ + 2, + 3 + ], + [ + 18, + 5 + ], + [ + 2, + 10 + ], + [ + 6, + 12 + ], + [ + 4, + 13 + ], + [ + 13, + 0 + ], + [ + 16, + 10 + ], + [ + 22, + 7 + ], + [ + 19, + 5 + ], + [ + 20, + 14 + ], + [ + 25, + 5 + ], + [ + 18, + 3 + ], + [ + 23, + 14 + ], + [ + 13, + 14 + ], + [ + 23, + 7 + ], + [ + 11, + 11 + ], + [ + 8, + 1 + ], + [ + 24, + 5 + ], + [ + 24, + 14 + ], + [ + 23, + 15 + ], + [ + 3, + 11 + ], + [ + 22, + 9 + ], + [ + 8, + 4 + ], + [ + 24, + 15 + ], + [ + 24, + 7 + ], + [ + 26, + 8 + ], + [ + 23, + 4 + ], + [ + 22, + 12 + ], + [ + 25, + 11 + ], + [ + 25, + 7 + ], + [ + 24, + 13 + ], + [ + 24, + 12 + ], + [ + 26, + 2 + ], + [ + 20, + 11 + ], + [ + 25, + 6 + ], + [ + 25, + 12 + ], + [ + 25, + 13 + ], + [ + 24, + 11 + ], + [ + 20, + 15 + ], + [ + 22, + 8 + ], + [ + 25, + 14 + ], + [ + 25, + 10 + ], + [ + 26, + 3 + ], + [ + 25, + 4 + ], + [ + 26, + 4 + ], + [ + 26, + 14 + ], + [ + 19, + 3 + ], + [ + 24, + 6 + ], + [ + 24, + 10 + ], + [ + 12, + 6 + ], + [ + 21, + 0 + ], + [ + 16, + 11 + ], + [ + 26, + 7 + ], + [ + 21, + 12 + ], + [ + 13, + 7 + ], + [ + 26, + 5 + ], + [ + 22, + 13 + ], + [ + 26, + 15 + ], + [ + 23, + 1 + ], + [ + 19, + 9 + ], + [ + 20, + 7 + ], + [ + 21, + 1 + ], + [ + 9, + 12 + ], + [ + 23, + 0 + ], + [ + 16, + 7 + ], + [ + 13, + 15 + ], + [ + 20, + 10 + ], + [ + 25, + 9 + ], + [ + 13, + 6 + ], + [ + 1, + 15 + ], + [ + 16, + 13 + ], + [ + 19, + 4 + ], + [ + 4, + 12 + ], + [ + 14, + 12 + ], + [ + 19, + 8 + ], + [ + 5, + 10 + ], + [ + 21, + 13 + ], + [ + 26, + 0 + ], + [ + 13, + 4 + ], + [ + 18, + 2 + ], + [ + 19, + 13 + ], + [ + 17, + 13 + ], + [ + 20, + 6 + ], + [ + 9, + 13 + ], + [ + 21, + 15 + ], + [ + 1, + 8 + ], + [ + 19, + 12 + ], + [ + 12, + 1 + ], + [ + 20, + 4 + ], + [ + 18, + 10 + ], + [ + 12, + 0 + ], + [ + 17, + 8 + ], + [ + 21, + 11 + ], + [ + 4, + 10 + ], + [ + 23, + 10 + ], + [ + 16, + 12 + ], + [ + 15, + 10 + ], + [ + 19, + 2 + ], + [ + 21, + 14 + ], + [ + 12, + 8 + ], + [ + 8, + 5 + ], + [ + 20, + 8 + ], + [ + 20, + 12 + ], + [ + 2, + 8 + ], + [ + 10, + 5 + ], + [ + 22, + 4 + ], + [ + 17, + 1 + ], + [ + 24, + 4 + ], + [ + 17, + 0 + ], + [ + 5, + 15 + ], + [ + 23, + 11 + ], + [ + 3, + 13 + ], + [ + 11, + 10 + ], + [ + 7, + 4 + ], + [ + 2, + 14 + ], + [ + 25, + 15 + ], + [ + 25, + 8 + ], + [ + 16, + 9 + ], + [ + 26, + 1 + ], + [ + 8, + 14 + ], + [ + 22, + 1 + ], + [ + 23, + 6 + ], + [ + 8, + 7 + ], + [ + 14, + 0 + ], + [ + 18, + 4 + ], + [ + 17, + 9 + ], + [ + 22, + 6 + ], + [ + 2, + 2 + ], + [ + 17, + 10 + ], + [ + 17, + 12 + ], + [ + 21, + 4 + ], + [ + 22, + 11 + ], + [ + 11, + 7 + ], + [ + 10, + 3 + ], + [ + 4, + 15 + ], + [ + 21, + 10 + ], + [ + 22, + 10 + ], + [ + 14, + 11 + ], + [ + 12, + 3 + ], + [ + 22, + 5 + ], + [ + 16, + 8 + ], + [ + 17, + 2 + ], + [ + 5, + 5 + ], + [ + 6, + 0 + ], + [ + 8, + 6 + ], + [ + 3, + 4 + ], + [ + 23, + 5 + ], + [ + 12, + 10 + ], + [ + 18, + 13 + ], + [ + 11, + 1 + ], + [ + 14, + 13 + ], + [ + 22, + 14 + ], + [ + 9, + 8 + ], + [ + 26, + 9 + ], + [ + 6, + 9 + ], + [ + 9, + 9 + ], + [ + 14, + 5 + ], + [ + 21, + 6 + ], + [ + 12, + 14 + ], + [ + 14, + 10 + ], + [ + 15, + 6 + ], + [ + 18, + 15 + ], + [ + 17, + 11 + ], + [ + 18, + 11 + ], + [ + 23, + 12 + ], + [ + 7, + 13 + ], + [ + 17, + 3 + ], + [ + 15, + 1 + ], + [ + 13, + 2 + ], + [ + 15, + 2 + ], + [ + 6, + 1 + ], + [ + 19, + 1 + ], + [ + 13, + 1 + ], + [ + 18, + 6 + ], + [ + 14, + 6 + ], + [ + 15, + 5 + ], + [ + 10, + 2 + ], + [ + 16, + 6 + ], + [ + 22, + 0 + ], + [ + 4, + 11 + ], + [ + 11, + 8 + ], + [ + 3, + 14 + ], + [ + 12, + 15 + ], + [ + 11, + 0 + ], + [ + 9, + 5 + ], + [ + 8, + 13 + ], + [ + 19, + 6 + ], + [ + 9, + 0 + ], + [ + 14, + 2 + ], + [ + 6, + 4 + ], + [ + 9, + 4 + ], + [ + 15, + 4 + ], + [ + 8, + 10 + ], + [ + 11, + 5 + ], + [ + 10, + 4 + ], + [ + 20, + 13 + ], + [ + 11, + 6 + ], + [ + 7, + 6 + ], + [ + 6, + 14 + ], + [ + 9, + 7 + ], + [ + 18, + 7 + ], + [ + 3, + 15 + ], + [ + 16, + 3 + ], + [ + 12, + 12 + ], + [ + 9, + 10 + ], + [ + 15, + 14 + ], + [ + 12, + 2 + ], + [ + 20, + 2 + ], + [ + 9, + 2 + ], + [ + 13, + 3 + ], + [ + 7, + 5 + ], + [ + 2, + 6 + ], + [ + 10, + 11 + ], + [ + 15, + 9 + ], + [ + 14, + 7 + ], + [ + 26, + 6 + ], + [ + 19, + 0 + ], + [ + 8, + 11 + ], + [ + 27, + 6 + ], + [ + 21, + 7 + ], + [ + 3, + 3 + ], + [ + 10, + 9 + ], + [ + 20, + 5 + ], + [ + 1, + 14 + ], + [ + 3, + 2 + ], + [ + 26, + 11 + ], + [ + 6, + 15 + ], + [ + 7, + 14 + ], + [ + 7, + 12 + ], + [ + 6, + 5 + ], + [ + 15, + 0 + ], + [ + 10, + 13 + ], + [ + 12, + 5 + ], + [ + 1, + 0 + ], + [ + 13, + 5 + ], + [ + 15, + 15 + ], + [ + 15, + 3 + ], + [ + 0, + 10 + ], + [ + 7, + 11 + ], + [ + 0, + 11 + ], + [ + 17, + 14 + ], + [ + 12, + 13 + ], + [ + 5, + 8 + ], + [ + 10, + 8 + ], + [ + 15, + 7 + ], + [ + 11, + 4 + ], + [ + 5, + 14 + ], + [ + 23, + 13 + ], + [ + 23, + 9 + ], + [ + 5, + 7 + ], + [ + 16, + 4 + ], + [ + 1, + 1 + ], + [ + 15, + 11 + ], + [ + 5, + 4 + ], + [ + 1, + 3 + ], + [ + 5, + 9 + ], + [ + 12, + 9 + ], + [ + 13, + 10 + ], + [ + 16, + 5 + ], + [ + 4, + 8 + ], + [ + 5, + 0 + ], + [ + 5, + 6 + ], + [ + 5, + 11 + ], + [ + 21, + 2 + ], + [ + 7, + 10 + ], + [ + 8, + 2 + ], + [ + 5, + 1 + ], + [ + 6, + 3 + ], + [ + 8, + 0 + ], + [ + 4, + 2 + ], + [ + 18, + 8 + ], + [ + 27, + 11 + ], + [ + 27, + 10 + ], + [ + 27, + 14 + ], + [ + 25, + 0 + ], + [ + 3, + 8 + ], + [ + 14, + 1 + ], + [ + 17, + 7 + ], + [ + 4, + 4 + ], + [ + 21, + 5 + ], + [ + 3, + 1 + ], + [ + 10, + 12 + ], + [ + 16, + 2 + ], + [ + 27, + 7 + ], + [ + 20, + 1 + ], + [ + 0, + 3 + ], + [ + 22, + 2 + ], + [ + 27, + 1 + ] + ] +} \ No newline at end of file diff --git a/whisperlivekit/qwen3_simul_kv.py b/whisperlivekit/qwen3_simul_kv.py index 276178a..84ea6a3 100644 --- a/whisperlivekit/qwen3_simul_kv.py +++ b/whisperlivekit/qwen3_simul_kv.py @@ -41,8 +41,8 @@ class Qwen3SimulKVConfig: border_fraction: float = 0.20 rewind_fraction: float = 0.12 audio_min_len: float = 0.5 - audio_max_len: float = 20.0 - max_context_tokens: int = 30 + audio_max_len: float = 30.0 + max_context_tokens: int = 20 init_prompt: Optional[str] = None max_alignment_heads: int = 10 @@ -101,6 +101,9 @@ class Qwen3SimulKVState: self.prompt_token_count = 0 self.audio_token_count = 0 self.generated_token_ids = [] + # Reset alignment tracking — old frame references are invalid + # after audio is trimmed from the front + self.last_attend_frame = -15 class Qwen3SimulKVASR: