From 228e2d103c3e549d64b4447c54f69d4224b7960d Mon Sep 17 00:00:00 2001 From: JavaZero <2487163254@qq.com> Date: Wed, 19 Nov 2025 11:08:05 +0800 Subject: [PATCH 1/2] fix: improve chart precision and scaling --- src/App.jsx | 450 +++++++++++------- src/components/ChartContainer.jsx | 261 +++++----- src/components/RegexControls.jsx | 336 ++++--------- .../__tests__/ChartContainer.test.jsx | 33 +- .../__tests__/valueExtractor.test.js | 2 +- src/utils/ValueExtractor.js | 176 +++++++ src/workers/logParser.worker.js | 99 ++++ 7 files changed, 799 insertions(+), 558 deletions(-) create mode 100644 src/utils/ValueExtractor.js create mode 100644 src/workers/logParser.worker.js diff --git a/src/App.jsx b/src/App.jsx index 9e85626..f2edab9 100644 --- a/src/App.jsx +++ b/src/App.jsx @@ -32,8 +32,8 @@ export const DEFAULT_GLOBAL_PARSING_CONFIG = { }; function App() { - const { t } = useTranslation(); - const [uploadedFiles, setUploadedFiles] = useState(() => { + const { t } = useTranslation(); + const [uploadedFiles, setUploadedFiles] = useState(() => { const stored = localStorage.getItem('uploadedFiles'); return stored ? JSON.parse(stored) : []; }); @@ -43,7 +43,7 @@ function App() { const stored = localStorage.getItem('globalParsingConfig'); return stored ? JSON.parse(stored) : JSON.parse(JSON.stringify(DEFAULT_GLOBAL_PARSING_CONFIG)); }); - + const [compareMode, setCompareMode] = useState('normal'); const [multiFileMode, setMultiFileMode] = useState('baseline'); const [baselineFile, setBaselineFile] = useState(''); @@ -58,6 +58,46 @@ function App() { const [sidebarVisible, setSidebarVisible] = useState(true); const savingDisabledRef = useRef(false); const enabledFiles = uploadedFiles.filter(file => file.enabled); + const workerRef = useRef(null); + + // Initialize Web Worker + useEffect(() => { + workerRef.current = new Worker(new URL('./workers/logParser.worker.js', import.meta.url), { type: 'module' }); + + workerRef.current.onmessage = (e) => { + const { type, payload } = e.data; + if (type === 'PARSE_COMPLETE') { + setUploadedFiles(prev => prev.map(file => { + if (file.id === payload.fileId) { + return { + ...file, + metricsData: payload.metricsData, + isParsing: false + }; + } + return file; + })); + } else if (type === 'PARSE_ERROR') { + console.error('Worker parsing error:', payload.error); + setUploadedFiles(prev => prev.map(file => { + if (file.id === payload.fileId) { + return { + ...file, + isParsing: false, + error: payload.error + }; + } + return file; + })); + } + }; + + return () => { + if (workerRef.current) { + workerRef.current.terminate(); + } + }; + }, []); useEffect(() => { if (enabledFiles.length > 0) { @@ -112,6 +152,8 @@ function App() { const filesWithDefaults = files.map(file => ({ ...file, enabled: true, + metricsData: {}, // Initialize empty + isParsing: true, // Mark as parsing config: { // Use global parsing config as default values metrics: globalParsingConfig.metrics.map(m => ({ ...m })), @@ -124,7 +166,22 @@ function App() { stepKeyword: globalParsingConfig.stepKeyword } })); + setUploadedFiles(prev => mergeFilesWithReplacement(prev, filesWithDefaults)); + + // Trigger worker for new files + filesWithDefaults.forEach(file => { + if (workerRef.current) { + workerRef.current.postMessage({ + type: 'PARSE_FILE', + payload: { + fileId: file.id, + content: file.content, + config: file.config + } + }); + } + }); }, [globalParsingConfig]); // Global file processing function @@ -143,7 +200,7 @@ function App() { // Read file contents Promise.all( - processedFiles.map(fileObj => + processedFiles.map(fileObj => new Promise((resolve) => { const reader = new FileReader(); reader.onload = (e) => { @@ -163,7 +220,7 @@ function App() { }, []); const handleFileToggle = useCallback((index, enabled) => { - setUploadedFiles(prev => prev.map((file, i) => + setUploadedFiles(prev => prev.map((file, i) => i === index ? { ...file, enabled } : file )); }, []); @@ -174,9 +231,23 @@ function App() { }, []); const handleConfigSave = useCallback((fileId, config) => { - setUploadedFiles(prev => prev.map(file => - file.id === fileId ? { ...file, config } : file - )); + setUploadedFiles(prev => prev.map(file => { + if (file.id === fileId) { + // Trigger re-parsing + if (workerRef.current) { + workerRef.current.postMessage({ + type: 'PARSE_FILE', + payload: { + fileId: file.id, + content: file.content, + config: config + } + }); + } + return { ...file, config, isParsing: true }; + } + return file; + })); }, []); const handleConfigClose = useCallback(() => { @@ -189,23 +260,44 @@ function App() { setGlobalParsingConfig(newConfig); // Sync parsing config to files that still use the global metrics - setUploadedFiles(prev => prev.map(file => { - const fileConfig = file.config || {}; - const usesGlobalMetrics = !fileConfig.metrics || - JSON.stringify(fileConfig.metrics) === JSON.stringify(globalParsingConfig.metrics); - - return { - ...file, - config: { - ...fileConfig, - ...(usesGlobalMetrics && { - metrics: newConfig.metrics.map(m => ({ ...m })) - }), - useStepKeyword: newConfig.useStepKeyword, - stepKeyword: newConfig.stepKeyword + setUploadedFiles(prev => { + const newFiles = prev.map(file => { + const fileConfig = file.config || {}; + const usesGlobalMetrics = !fileConfig.metrics || + JSON.stringify(fileConfig.metrics) === JSON.stringify(globalParsingConfig.metrics); + + if (usesGlobalMetrics || newConfig.useStepKeyword !== globalParsingConfig.useStepKeyword || newConfig.stepKeyword !== globalParsingConfig.stepKeyword) { + const newFileConfig = { + ...fileConfig, + ...(usesGlobalMetrics && { + metrics: newConfig.metrics.map(m => ({ ...m })) + }), + useStepKeyword: newConfig.useStepKeyword, + stepKeyword: newConfig.stepKeyword + }; + + // Trigger re-parsing if config changed + if (workerRef.current) { + workerRef.current.postMessage({ + type: 'PARSE_FILE', + payload: { + fileId: file.id, + content: file.content, + config: newFileConfig + } + }); + } + + return { + ...file, + config: newFileConfig, + isParsing: true + }; } - }; - })); + return file; + }); + return newFiles; + }); }, [globalParsingConfig]); // Reset configuration @@ -224,7 +316,7 @@ function App() { const handleGlobalDragEnter = useCallback((e) => { e.preventDefault(); setDragCounter(prev => prev + 1); - + // Check if files are included if (e.dataTransfer.types.includes('Files')) { setGlobalDragOver(true); @@ -252,7 +344,7 @@ function App() { e.preventDefault(); setGlobalDragOver(false); setDragCounter(0); - + if (e.dataTransfer.files.length > 0) { processGlobalFiles(e.dataTransfer.files); } @@ -290,18 +382,18 @@ function App() { >
-
@@ -351,160 +443,160 @@ function App() { > {/* Header info */}
-
-
- - - +
+
+ + + +
+

+ Log Analyzer +

+
+
+ +
+
-

- Log Analyzer -

-
-
- +

+ {t('intro')} +

+ + {/* Status and link buttons */} +
+ + + {t('status.online')} + + + + + + GitHub + +
-
-

- {t('intro')} -

- - {/* Status and link buttons */} -
- - - {t('status.online')} - - - - - - GitHub - - -
-
- - - - - + - {enabledFiles.length >= 2 && ( - - )} - -
-

- {t('display.options')} -

-
-
-

{t('display.chart')}

-

{t('display.chartDesc')}

-
-
-

{t('display.baseline')}

-
-
- - setRelativeBaseline(parseFloat(e.target.value) || 0)} - className="input-field" - placeholder="0.002" - aria-describedby="relative-baseline-description" - /> - - {t('display.relativeBaselineDesc')} - -
+ + + {enabledFiles.length >= 2 && ( + + )} + +
+

+ {t('display.options')} +

+
+
+

{t('display.chart')}

+

{t('display.chartDesc')}

+
-
- - setAbsoluteBaseline(parseFloat(e.target.value) || 0)} - className="input-field" - placeholder="0.005" - aria-describedby="absolute-baseline-description" - /> - - {t('display.absoluteBaselineDesc')} - +
+

{t('display.baseline')}

+
+
+ + setRelativeBaseline(parseFloat(e.target.value) || 0)} + className="input-field" + placeholder="0.002" + aria-describedby="relative-baseline-description" + /> + + {t('display.relativeBaselineDesc')} + +
+ +
+ + setAbsoluteBaseline(parseFloat(e.target.value) || 0)} + className="input-field" + placeholder="0.005" + aria-describedby="absolute-baseline-description" + /> + + {t('display.absoluteBaselineDesc')} + +
-
)} @@ -529,7 +621,7 @@ function App() {
- + { const { datasetIndex, index } = element; const dataset = chartRef.current.data.datasets[datasetIndex]; const point = dataset.data[index]; const pixelX = chartRef.current.scales.x.getPixelForValue(point.x); const distance = Math.abs(mouseX - pixelX); - + if (distance < minDistance) { minDistance = distance; closestElement = element; } }); } - + const { datasetIndex, index } = closestElement; const dataset = chartRef.current.data.datasets[datasetIndex]; const point = dataset.data[index]; @@ -180,15 +180,15 @@ export default function ChartContainer({ const elementKey = `${datasetIndex}-${idx}`; if (!seen.has(elementKey)) { // Validate element - if (datasetIndex >= 0 && datasetIndex < chart.data.datasets.length && - idx >= 0 && idx < dataset.data.length) { + if (datasetIndex >= 0 && datasetIndex < chart.data.datasets.length && + idx >= 0 && idx < dataset.data.length) { activeElements.push({ datasetIndex, index: idx }); seen.add(elementKey); } } } }); - + // Only set when activeElements are valid if (activeElements.length > 0) { try { @@ -214,105 +214,44 @@ export default function ChartContainer({ syncLockRef.current = false; }, []); - const parsedData = useMemo(() => { - const enabled = files.filter(f => f.enabled !== false); - return enabled.map(file => { - if (!file.content) return { ...file, metricsData: {} }; - const lines = file.content.split('\n'); - const metricsData = {}; - - const stepCfg = { - enabled: file.config?.useStepKeyword, - keyword: file.config?.stepKeyword || 'step:' - }; - - const extractStep = (line) => { - if (!stepCfg.enabled) return null; - const idx = line.toLowerCase().indexOf(stepCfg.keyword.toLowerCase()); - if (idx !== -1) { - const after = line.substring(idx + stepCfg.keyword.length); - const match = after.match(/[+-]?\d+/); - if (match) { - const s = parseInt(match[0], 10); - if (!isNaN(s)) return s; - } - } - return null; - }; + const parsedData = useMemo(() => { + const enabled = files.filter(f => f.enabled !== false); + return enabled.map(file => { + // Use pre-parsed data from worker + let metricsData = file.metricsData || {}; - const extractByKeyword = (linesArr, keyword) => { - const results = []; - const numberRegex = /[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/; - linesArr.forEach(line => { - const idx = line.toLowerCase().indexOf(keyword.toLowerCase()); - if (idx !== -1) { - const after = line.substring(idx + keyword.length); - const match = after.match(numberRegex); - if (match) { - const v = parseFloat(match[0]); - if (!isNaN(v)) { - const step = extractStep(line); - results.push({ x: step !== null ? step : results.length, y: v }); - } - } - } - }); - return results; - }; - - metrics.forEach((metric, idx) => { - const fileMetric = file.config?.metrics?.[idx] || metric; - let points = []; - if (fileMetric.mode === 'keyword') { - points = extractByKeyword(lines, fileMetric.keyword); - } else if (fileMetric.regex) { - const reg = new RegExp(fileMetric.regex); - lines.forEach(line => { - reg.lastIndex = 0; - const m = reg.exec(line); - if (m && m[1]) { - const v = parseFloat(m[1]); - if (!isNaN(v)) { - const step = extractStep(line); - points.push({ x: step !== null ? step : points.length, y: v }); - } - } - }); - } - - let key = ''; - if (metric.name && metric.name.trim()) { - key = metric.name.trim(); - } else if (metric.keyword) { - key = metric.keyword.replace(/[::]/g, '').trim(); - } else if (metric.regex) { - const sanitized = metric.regex.replace(/[^a-zA-Z0-9_]/g, '').trim(); - key = sanitized || `metric${idx + 1}`; - } else { - key = `metric${idx + 1}`; - } + // Clone to avoid mutation during range application + metricsData = { ...metricsData }; - metricsData[key] = points; - }); + const stepCfg = { + enabled: file.config?.useStepKeyword, + keyword: file.config?.stepKeyword || 'step:' + }; const range = file.config?.dataRange; if (range && (range.start > 0 || range.end !== undefined)) { const applyRange = data => { - if (data.length === 0) return data; + if (!data || data.length === 0) return data; const start = Math.max(0, parseInt(range.start) || 0); const end = range.end !== undefined ? parseInt(range.end) : data.length; const endIndex = Math.min(data.length, end); return data.slice(start, endIndex); }; + + // If not using step keyword, reindex x to 0, 1, 2... after slicing + // This matches original behavior where x-axis resets if we just treat lines as steps const reindex = data => stepCfg.enabled ? data : data.map((p, idx) => ({ x: idx, y: p.y })); + Object.keys(metricsData).forEach(k => { - metricsData[k] = reindex(applyRange(metricsData[k])); + if (metricsData[k]) { + metricsData[k] = reindex(applyRange(metricsData[k])); + } }); } return { ...file, metricsData }; }); - }, [files, metrics]); + }, [files]); useEffect(() => { const maxStep = parsedData.reduce((m, f) => { @@ -343,7 +282,7 @@ export default function ChartContainer({ } return acc; }, []); - + return { datasets: uniqueItems.map((item, index) => { const color = colors[index % colors.length]; @@ -399,36 +338,46 @@ export default function ChartContainer({ return result; }; - const calculateYRange = useCallback((dataArray) => { - let min = Infinity; - let max = -Infinity; + const getMaxDecimals = useCallback((dataArray) => { + let maxDecimals = 0; dataArray.forEach(item => { item.data.forEach(point => { - const inRange = - (xRange.min === undefined || point.x >= xRange.min) && - (xRange.max === undefined || point.x <= xRange.max); - if (inRange) { - if (point.y < min) min = point.y; - if (point.y > max) max = point.y; + const valStr = point.y.toString(); + if (valStr.includes('.')) { + const decimals = valStr.split('.')[1].length; + if (decimals > maxDecimals) maxDecimals = decimals; } }); }); - if (min === Infinity || max === -Infinity) { - return { min: 0, max: 1, step: 1 }; - } - if (min === max) { - return { min: min - 1, max: max + 1, step: 1 }; - } - const pad = (max - min) * 0.05; - const paddedMin = min - pad; - const paddedMax = max + pad; - const range = paddedMax - paddedMin; - let step = Math.pow(10, Math.floor(Math.log10(range))); - if (range / step < 3) { - step /= 10; - } - return { min: paddedMin, max: paddedMax, step }; - }, [xRange]); + return Math.min(maxDecimals, 10); // Cap at 10 to avoid extreme cases + }, []); + + const calculateNiceScale = useCallback((min, max) => { + if (min === Infinity || max === -Infinity) return { min: 0, max: 1, step: 0.1 }; + if (min === max) return { min: min - 0.5, max: max + 0.5, step: 0.1 }; + + // Calculate raw range + let range = max - min; + + // Calculate "nice" interval + const roughStep = range / 5; // Aim for approx 5-6 ticks + const magnitude = Math.pow(10, Math.floor(Math.log10(roughStep))); + const normalizedStep = roughStep / magnitude; + + let niceStep; + if (normalizedStep < 1.5) niceStep = 1; + else if (normalizedStep < 3) niceStep = 2; + else if (normalizedStep < 7) niceStep = 5; + else niceStep = 10; + + const step = niceStep * magnitude; + + // Calculate nice min and max + const niceMin = Math.floor(min / step) * step; + const niceMax = Math.ceil(max / step) * step; + + return { min: niceMin, max: niceMax, step }; + }, []); const chartOptions = useMemo(() => ({ responsive: true, @@ -510,8 +459,14 @@ export default function ChartContainer({ return `Step ${context[0].parsed.x}`; }, label: function (context) { - const value = Number(context.parsed.y.toPrecision(4)); + // Dynamic precision handled in render loop via options update, + // but here we need to access the chart options or dataset context + // We'll use a default safe fallback or try to read from chart config if possible. + // Actually, we can bind the precision in the render loop. + // For now, let's use the raw value which is most accurate. + const value = context.parsed.y; const label = context.dataset?.label || 'Dataset'; + // We will format this in the parent component's options generation return ` ${label}: ${value}`; }, labelColor: function (context) { @@ -544,11 +499,7 @@ export default function ChartContainer({ display: true, title: { display: true, text: 'Value' }, bounds: 'data', - ticks: { - callback: function (value) { - return Number(value.toPrecision(2)); - } - } + // Ticks callback will be overridden in the render loop } }, elements: { point: { radius: 0 } } @@ -686,10 +637,41 @@ export default function ChartContainer({ const dataArray = metricDataArrays[key] || []; const showComparison = dataArray.length >= 2; - const yRange = calculateYRange(dataArray); - const yDecimals = Math.max(0, -Math.floor(Math.log10(yRange.step))); + const yDecimals = getMaxDecimals(dataArray); + + // Calculate min/max for scaling + let min = Infinity; + let max = -Infinity; + dataArray.forEach(item => { + item.data.forEach(point => { + const inRange = + (xRange.min === undefined || point.x >= xRange.min) && + (xRange.max === undefined || point.x <= xRange.max); + if (inRange) { + if (point.y < min) min = point.y; + if (point.y > max) max = point.y; + } + }); + }); + + const yRange = calculateNiceScale(min, max); + const options = { ...chartOptions, + plugins: { + ...chartOptions.plugins, + tooltip: { + ...chartOptions.plugins.tooltip, + callbacks: { + ...chartOptions.plugins.tooltip.callbacks, + label: function (context) { + const value = Number(context.parsed.y).toFixed(yDecimals); + const label = context.dataset?.label || 'Dataset'; + return ` ${label}: ${value}`; + } + } + } + }, scales: { ...chartOptions.scales, y: { @@ -709,10 +691,41 @@ export default function ChartContainer({ if (showComparison) { const compResult = buildComparisonChartData(dataArray); stats = compResult.stats.length > 0 ? compResult.stats : null; - const compRange = calculateYRange(compResult.datasets); - const compDecimals = Math.max(0, -Math.floor(Math.log10(compRange.step))); + + // Calculate comparison range + let cMin = Infinity; + let cMax = -Infinity; + compResult.datasets.forEach(ds => { + ds.data.forEach(point => { + const inRange = + (xRange.min === undefined || point.x >= xRange.min) && + (xRange.max === undefined || point.x <= xRange.max); + if (inRange) { + if (point.y < cMin) cMin = point.y; + if (point.y > cMax) cMax = point.y; + } + }); + }); + + const compRange = calculateNiceScale(cMin, cMax); + const compDecimals = Math.max(4, getMaxDecimals(compResult.datasets)); // Ensure at least 4 for diffs + const compOptions = { ...chartOptions, + plugins: { + ...chartOptions.plugins, + tooltip: { + ...chartOptions.plugins.tooltip, + callbacks: { + ...chartOptions.plugins.tooltip.callbacks, + label: function (context) { + const value = Number(context.parsed.y).toFixed(compDecimals); + const label = context.dataset?.label || 'Dataset'; + return ` ${label}: ${value}`; + } + } + } + }, scales: { ...chartOptions.scales, y: { diff --git a/src/components/RegexControls.jsx b/src/components/RegexControls.jsx index f2b5fdb..3515352 100644 --- a/src/components/RegexControls.jsx +++ b/src/components/RegexControls.jsx @@ -2,6 +2,7 @@ import React, { useState, useEffect, useCallback } from 'react'; import { Settings, Zap, Eye, ChevronDown, ChevronUp, Target, Code, ZoomIn } from 'lucide-react'; import { METRIC_PRESETS } from '../metricPresets.js'; import { useTranslation, Trans } from 'react-i18next'; +import { ValueExtractor } from '../utils/ValueExtractor'; // Match mode enum const MATCH_MODES = { @@ -36,166 +37,7 @@ function getMetricTitle(metric, index) { return `Metric ${index + 1}`; } -// Value extractor class -export class ValueExtractor { - // Keyword match - static extractByKeyword(content, keyword) { - const results = []; - const lines = content.split('\n'); - - // Number regex supporting scientific notation - const numberRegex = /[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/; - - lines.forEach((line, lineIndex) => { - // Find keyword (case-insensitive) - const keywordIndex = line.toLowerCase().indexOf(keyword.toLowerCase()); - if (keywordIndex !== -1) { - // Find first number after the keyword - const afterKeyword = line.substring(keywordIndex + keyword.length); - const numberMatch = afterKeyword.match(numberRegex); - - if (numberMatch) { - const value = parseFloat(numberMatch[0]); - if (!isNaN(value)) { - results.push({ - value, - line: lineIndex + 1, - text: line.trim(), - format: 'Keyword Match' - }); - } - } - } - }); - - return results; - } - - // Column position match - static extractByColumn(content, columnIndex, separator = ' ') { - const results = []; - const lines = content.split('\n'); - - lines.forEach((line, lineIndex) => { - if (line.trim()) { - const columns = separator === ' ' - ? line.trim().split(/\s+/) - : line.split(separator); - - if (columns.length > columnIndex) { - const value = parseFloat(columns[columnIndex]); - if (!isNaN(value)) { - results.push({ - value, - line: lineIndex + 1, - text: line.trim() - }); - } - } - } - }); - - return results; - } - // Smart parsing - static extractBySmart(content, type = 'loss') { - const results = []; - const lines = content.split('\n'); - - // Smart keyword list - const keywords = type === 'loss' - ? ['loss', 'training_loss', 'train_loss', 'val_loss', 'validation_loss'] - : ['grad_norm', 'gradient_norm', 'gnorm', 'grad norm', 'gradient norm', 'global_norm']; - - lines.forEach((line, lineIndex) => { - // Try JSON parsing - try { - const jsonMatch = line.match(/\{.*\}/); - if (jsonMatch) { - const obj = JSON.parse(jsonMatch[0]); - for (const keyword of keywords) { - if (obj[keyword] !== undefined) { - const value = parseFloat(obj[keyword]); - if (!isNaN(value)) { - results.push({ - value, - line: lineIndex + 1, - text: line.trim(), - format: 'JSON' - }); - return; - } - } - } - } - } catch { - // Not JSON, continue other formats - } - - // Try key-value and special formats - for (const keyword of keywords) { - const patterns = [ - // Standard key-value format - new RegExp(`${keyword}\\s*[:=]\\s*([\\d.eE+-]+)`, 'i'), - new RegExp(`"${keyword}"\\s*:\\s*([\\d.eE+-]+)`, 'i'), - new RegExp(`${keyword}\\s+([\\d.eE+-]+)`, 'i'), - // MindFormers format: global_norm: [1.6887678] - new RegExp(`${keyword}\\s*:\\s*\\[([\\d.eE+-]+)\\]`, 'i'), - // Other possible array formats - new RegExp(`${keyword}\\s*:\\s*\\[\\s*([\\d.eE+-]+)\\s*\\]`, 'i') - ]; - - for (const pattern of patterns) { - const match = line.match(pattern); - if (match) { - const value = parseFloat(match[1]); - if (!isNaN(value)) { - results.push({ - value, - line: lineIndex + 1, - text: line.trim(), - format: keyword.includes('global_norm') ? 'MindFormers' : 'Key-Value' - }); - return; - } - } - } - } - }); - - return results; - } - - // Regex match (original functionality) - static extractByRegex(content, regex) { - const results = []; - const lines = content.split('\n'); - - try { - const regexObj = new RegExp(regex, 'gi'); - lines.forEach((line, lineIndex) => { - const matches = [...line.matchAll(regexObj)]; - matches.forEach(match => { - if (match[1]) { - const value = parseFloat(match[1]); - if (!isNaN(value)) { - results.push({ - value, - line: lineIndex + 1, - text: line.trim() - }); - } - } - }); - }); - } catch { - // Invalid regex - } - - return results; - } -} export function RegexControls({ globalParsingConfig, @@ -223,7 +65,7 @@ export function RegexControls({ }); }, [globalParsingConfig, onGlobalParsingConfigChange]); -// Generic function to extract numbers + // Generic function to extract numbers const extractValues = useCallback((content, mode, config) => { switch (mode) { case MATCH_MODES.KEYWORD: @@ -235,7 +77,7 @@ export function RegexControls({ } }, []); -// Preview match results + // Preview match results const previewMatches = useCallback(() => { const results = {}; @@ -262,7 +104,7 @@ export function RegexControls({ setPreviewResults(results); }, [uploadedFiles, globalParsingConfig, extractValues]); -// Smartly recommend best config + // Smartly recommend best config const smartRecommend = useCallback(() => { if (uploadedFiles.length === 0) return; @@ -295,14 +137,14 @@ export function RegexControls({ onGlobalParsingConfigChange({ metrics: newMetrics }); }, [uploadedFiles, globalParsingConfig, onGlobalParsingConfigChange]); -// Update preview when config changes + // Update preview when config changes useEffect(() => { if (showPreview) { previewMatches(); } }, [showPreview, previewMatches]); -// Handle config change + // Handle config change const handleMetricChange = (index, field, value) => { const newMetrics = [...globalParsingConfig.metrics]; newMetrics[index] = { ...newMetrics[index], [field]: value }; @@ -340,7 +182,7 @@ export function RegexControls({ onXRangeChange(newRange); }; -// Function to render config panel + // Function to render config panel const renderConfigPanel = (type, config, onConfigChange, index) => { const ModeIcon = MODE_CONFIG[config.mode].icon; @@ -463,93 +305,93 @@ export function RegexControls({
- +
- {globalParsingConfig.metrics.map((cfg, idx) => ( -
- -

- - {t('regex.metricConfig', { title: getMetricTitle(cfg, idx) })} -

- {renderConfigPanel(`metric-${idx}`, cfg, (field, value) => handleMetricChange(idx, field, value), idx)} -
- ))} - + {globalParsingConfig.metrics.map((cfg, idx) => ( +
+ +

+ + {t('regex.metricConfig', { title: getMetricTitle(cfg, idx) })} +

+ {renderConfigPanel(`metric-${idx}`, cfg, (field, value) => handleMetricChange(idx, field, value), idx)} +
+ ))} + -
-
- - {globalParsingConfig.useStepKeyword && ( - handleStepKeywordChange(e.target.value)} - placeholder={t('placeholder.step')} - /> - )} -
+
+
+ + {globalParsingConfig.useStepKeyword && ( + handleStepKeywordChange(e.target.value)} + placeholder={t('placeholder.step')} + /> + )}
+
-
-
-
- handleXRangeChange('min', e.target.value)} - className="input-field" - /> - - - handleXRangeChange('max', e.target.value)} - className="input-field" - /> - -
-

- - Hold Shift and drag on the chart to select range, or input values directly. - -

+
+
+
+ handleXRangeChange('min', e.target.value)} + className="input-field" + /> + - + handleXRangeChange('max', e.target.value)} + className="input-field" + /> + +
+

+ + Hold Shift and drag on the chart to select range, or input values directly. + +

{/* Preview results */} diff --git a/src/components/__tests__/ChartContainer.test.jsx b/src/components/__tests__/ChartContainer.test.jsx index 04b1785..c993c81 100644 --- a/src/components/__tests__/ChartContainer.test.jsx +++ b/src/components/__tests__/ChartContainer.test.jsx @@ -86,8 +86,18 @@ describe('ChartContainer', () => { const onXRangeChange = vi.fn(); const onMaxStepChange = vi.fn(); const files = [ - { name: 'a.log', enabled: true, content: 'loss: 1\nloss: 2' }, - { name: 'b.log', enabled: true, content: 'loss: 1.5\nloss: 2.5' }, + { + name: 'a.log', + enabled: true, + content: 'loss: 1\nloss: 2', + metricsData: { 'loss': [{ x: 0, y: 1 }, { x: 1, y: 2 }] } + }, + { + name: 'b.log', + enabled: true, + content: 'loss: 1.5\nloss: 2.5', + metricsData: { 'loss': [{ x: 0, y: 1.5 }, { x: 1, y: 2.5 }] } + }, ]; render( { name: 'a.log', enabled: true, content: 'loss: 1\nloss: 2\nloss: 3\nacc: 4\nacc: 5', - config: { dataRange: { start: 1, end: 3 } } + config: { dataRange: { start: 1, end: 3 } }, + metricsData: { + 'loss': [{ x: 0, y: 1 }, { x: 1, y: 2 }, { x: 2, y: 3 }], + 'metric2': [{ x: 3, y: 4 }, { x: 4, y: 5 }] + } }, { name: 'b.log', enabled: true, content: 'loss: 2\nloss: 4\nacc: 6\nacc: 8', - config: { dataRange: { start: 1, end: 3 } } + config: { dataRange: { start: 1, end: 3 } }, + metricsData: { + 'loss': [{ x: 0, y: 2 }, { x: 1, y: 4 }], + 'metric2': [{ x: 2, y: 6 }, { x: 3, y: 8 }] + } } ]; const metrics = [ @@ -165,7 +183,7 @@ describe('ChartContainer', () => { // invoke legend and tooltip callbacks const opts = currentProps[0].options; - opts.plugins.legend.labels.generateLabels({ data: { datasets: [{}, { borderDash: [5,5] }] } }); + opts.plugins.legend.labels.generateLabels({ data: { datasets: [{}, { borderDash: [5, 5] }] } }); const tt = opts.plugins.tooltip.callbacks; tt.title([{ parsed: { x: 1 } }]); tt.label({ parsed: { y: 1.2345 } }); @@ -180,12 +198,13 @@ describe('ChartContainer', () => { const onXRangeChange = vi.fn(); const onMaxStepChange = vi.fn(); const files = [ - { name: 'a.log', enabled: true, content: 'loss: 1\nloss: 2' }, + { name: 'a.log', enabled: true, content: 'loss: 1\nloss: 2', metricsData: { 'loss': [{ x: 0, y: 1 }, { x: 1, y: 2 }] } }, { name: 'b.log', enabled: true, content: 'train_loss: 3\ntrain_loss: 4', - config: { metrics: [{ mode: 'keyword', keyword: 'train_loss:' }] } + config: { metrics: [{ mode: 'keyword', keyword: 'train_loss:' }] }, + metricsData: { 'loss': [{ x: 0, y: 3 }, { x: 1, y: 4 }] } // Note: key is 'loss' because metric name is 'loss' } ]; const metrics = [{ name: 'loss', mode: 'keyword', keyword: 'loss:' }]; diff --git a/src/components/__tests__/valueExtractor.test.js b/src/components/__tests__/valueExtractor.test.js index db286dd..cc6f5d7 100644 --- a/src/components/__tests__/valueExtractor.test.js +++ b/src/components/__tests__/valueExtractor.test.js @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest'; -import { ValueExtractor } from '../RegexControls.jsx'; +import { ValueExtractor } from '../../utils/ValueExtractor'; const sampleContent = `loss: 0.123\nstep2 loss 0.234\n{"loss": 0.345, "global_norm": 1.23}`; diff --git a/src/utils/ValueExtractor.js b/src/utils/ValueExtractor.js new file mode 100644 index 0000000..b428870 --- /dev/null +++ b/src/utils/ValueExtractor.js @@ -0,0 +1,176 @@ + +// Match mode enum +export const MATCH_MODES = { + KEYWORD: 'keyword', + REGEX: 'regex' +}; + +// Value extractor class +export class ValueExtractor { + // Keyword match + static extractByKeyword(content, keyword) { + const results = []; + // Handle empty content + if (!content) return results; + + const lines = content.split('\n'); + + // Number regex supporting scientific notation + const numberRegex = /[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/; + + lines.forEach((line, lineIndex) => { + // Find keyword (case-insensitive) + const keywordIndex = line.toLowerCase().indexOf(keyword.toLowerCase()); + if (keywordIndex !== -1) { + // Find first number after the keyword + const afterKeyword = line.substring(keywordIndex + keyword.length); + const numberMatch = afterKeyword.match(numberRegex); + + if (numberMatch) { + const value = parseFloat(numberMatch[0]); + if (!isNaN(value)) { + results.push({ + value, + line: lineIndex + 1, + text: line.trim(), + format: 'Keyword Match' + }); + } + } + } + }); + + return results; + } + + // Column position match + static extractByColumn(content, columnIndex, separator = ' ') { + const results = []; + if (!content) return results; + + const lines = content.split('\n'); + + lines.forEach((line, lineIndex) => { + if (line.trim()) { + const columns = separator === ' ' + ? line.trim().split(/\s+/) + : line.split(separator); + + if (columns.length > columnIndex) { + const value = parseFloat(columns[columnIndex]); + if (!isNaN(value)) { + results.push({ + value, + line: lineIndex + 1, + text: line.trim() + }); + } + } + } + }); + + return results; + } + + // Smart parsing + static extractBySmart(content, type = 'loss') { + const results = []; + if (!content) return results; + + const lines = content.split('\n'); + + // Smart keyword list + const keywords = type === 'loss' + ? ['loss', 'training_loss', 'train_loss', 'val_loss', 'validation_loss'] + : ['grad_norm', 'gradient_norm', 'gnorm', 'grad norm', 'gradient norm', 'global_norm']; + + lines.forEach((line, lineIndex) => { + // Try JSON parsing + try { + const jsonMatch = line.match(/\{.*\}/); + if (jsonMatch) { + const obj = JSON.parse(jsonMatch[0]); + for (const keyword of keywords) { + if (obj[keyword] !== undefined) { + const value = parseFloat(obj[keyword]); + if (!isNaN(value)) { + results.push({ + value, + line: lineIndex + 1, + text: line.trim(), + format: 'JSON' + }); + return; + } + } + } + } + } catch { + // Not JSON, continue other formats + } + + // Try key-value and special formats + for (const keyword of keywords) { + const patterns = [ + // Standard key-value format + new RegExp(`${keyword}\\s*[:=]\\s*([\\d.eE+-]+)`, 'i'), + new RegExp(`"${keyword}"\\s*:\\s*([\\d.eE+-]+)`, 'i'), + new RegExp(`${keyword}\\s+([\\d.eE+-]+)`, 'i'), + // MindFormers format: global_norm: [1.6887678] + new RegExp(`${keyword}\\s*:\\s*\\[([\\d.eE+-]+)\\]`, 'i'), + // Other possible array formats + new RegExp(`${keyword}\\s*:\\s*\\[\\s*([\\d.eE+-]+)\\s*\\]`, 'i') + ]; + + for (const pattern of patterns) { + const match = line.match(pattern); + if (match) { + const value = parseFloat(match[1]); + if (!isNaN(value)) { + results.push({ + value, + line: lineIndex + 1, + text: line.trim(), + format: keyword.includes('global_norm') ? 'MindFormers' : 'Key-Value' + }); + return; + } + } + } + } + }); + + return results; + } + + // Regex match (original functionality) + static extractByRegex(content, regex) { + const results = []; + if (!content) return results; + + const lines = content.split('\n'); + + try { + const regexObj = new RegExp(regex, 'gi'); + lines.forEach((line, lineIndex) => { + const matches = [...line.matchAll(regexObj)]; + matches.forEach(match => { + if (match[1]) { + const value = parseFloat(match[1]); + if (!isNaN(value)) { + results.push({ + value, + line: lineIndex + 1, + text: line.trim() + }); + } + } + }); + }); + } catch { + // Invalid regex + } + + return results; + } +} diff --git a/src/workers/logParser.worker.js b/src/workers/logParser.worker.js new file mode 100644 index 0000000..f91bf02 --- /dev/null +++ b/src/workers/logParser.worker.js @@ -0,0 +1,99 @@ +import { ValueExtractor } from '../utils/ValueExtractor'; + +// Helper to extract step number from a line +const extractStep = (line, stepCfg) => { + if (!stepCfg.enabled) return null; + const idx = line.toLowerCase().indexOf(stepCfg.keyword.toLowerCase()); + if (idx !== -1) { + const after = line.substring(idx + stepCfg.keyword.length); + const match = after.match(/[+-]?\d+/); + if (match) { + const s = parseInt(match[0], 10); + if (!isNaN(s)) return s; + } + } + return null; +}; + +self.onmessage = (e) => { + const { type, payload } = e.data; + + if (type === 'PARSE_FILE') { + const { fileId, content, config } = payload; + const metricsData = {}; + + try { + const lines = content.split('\n'); + + const stepCfg = { + enabled: config.useStepKeyword, + keyword: config.stepKeyword || 'step:' + }; + + config.metrics.forEach((metric, idx) => { + let points = []; + let rawMatches = []; + + // Use ValueExtractor for the heavy lifting + if (metric.mode === 'keyword') { + rawMatches = ValueExtractor.extractByKeyword(content, metric.keyword); + } else if (metric.regex) { + rawMatches = ValueExtractor.extractByRegex(content, metric.regex); + } + + // Map matches to {x, y} points, extracting steps if needed + // Note: ValueExtractor returns { value, line, text } + // We need to re-process to get steps efficiently, or modify ValueExtractor to return steps? + // ValueExtractor processes line by line. + // If we use ValueExtractor, we iterate lines there. + // But we also need the step from the SAME line. + // ValueExtractor returns the line text and line number. + + // Optimization: If we need steps, we might need to look at the line again. + // ValueExtractor returns 'text' which is the line content. + + points = rawMatches.map((match, i) => { + const step = extractStep(match.text, stepCfg); + return { + x: step !== null ? step : i, // Fallback to index if no step found, but this index is match index, not line index. + // Wait, original logic used `results.length` as fallback x. + // If we map, `i` is the index in the matches array. + y: match.value + }; + }); + + // Determine key name + let key = ''; + if (metric.name && metric.name.trim()) { + key = metric.name.trim(); + } else if (metric.keyword) { + key = metric.keyword.replace(/[::]/g, '').trim(); + } else if (metric.regex) { + const sanitized = metric.regex.replace(/[^a-zA-Z0-9_]/g, '').trim(); + key = sanitized || `metric${idx + 1}`; + } else { + key = `metric${idx + 1}`; + } + + metricsData[key] = points; + }); + + self.postMessage({ + type: 'PARSE_COMPLETE', + payload: { + fileId, + metricsData + } + }); + + } catch (error) { + self.postMessage({ + type: 'PARSE_ERROR', + payload: { + fileId, + error: error.message + } + }); + } + } +}; From baaad766336e85c85a1bbf1f73cc7ac4b93487bc Mon Sep 17 00:00:00 2001 From: JavaZero <2487163254@qq.com> Date: Wed, 19 Nov 2025 14:10:48 +0800 Subject: [PATCH 2/2] fix: mock Worker for tests --- vitest.setup.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vitest.setup.js b/vitest.setup.js index 860f9d3..adac0a0 100644 --- a/vitest.setup.js +++ b/vitest.setup.js @@ -16,3 +16,17 @@ if (!window.matchMedia) { })), }); } + +// Mock Worker +class Worker { + constructor(stringUrl) { + this.url = stringUrl; + this.onmessage = () => { }; + } + postMessage(msg) { + this.onmessage({ data: msg }); + } + terminate() { } +} + +global.Worker = Worker;