diff --git a/.changeset/remove-git-analytics.md b/.changeset/remove-git-analytics.md new file mode 100644 index 0000000..ac3ae3f --- /dev/null +++ b/.changeset/remove-git-analytics.md @@ -0,0 +1,36 @@ +--- +"@lytics/dev-agent-cli": minor +"@lytics/dev-agent-core": patch +"@lytics/dev-agent": patch +--- + +Remove git analytics commands to refocus on semantic value + +**BREAKING CHANGES:** + +- Remove `dev owners` command - use `git log` or GitHub contributors instead +- Remove `dev activity` command - use `git log --since` for activity analysis + +**What's Changed:** + +- Removed 891 lines from `dev owners` command +- Removed 175 lines from `dev activity` command +- Cleaned up dead code in `change-frequency.ts` (calculateFileAuthorContributions) +- Simplified metrics collection to focus on code structure introspection + +**What's Kept:** + +- `code_metadata` table for debugging/introspection of indexed code +- `calculateChangeFrequency` for `dev_map` MCP tool (shows commit activity in codebase structure) + +**Why:** + +Dev-agent's unique value is semantic search (embeddings + AST), not git analytics which GitHub/git already provide. This change reduces complexity by ~1,200 lines and refocuses on MCP tools for AI context. + +**Migration:** + +For contributor/ownership analytics, use: +- `git log --format="%ae" | sort | uniq -c | sort -rn` for ownership +- `git log --since="1 month" --name-only | sort | uniq -c | sort -rn` for activity +- GitHub's Contributors page for visualization + diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 7e0d400..fa52381 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -2,7 +2,6 @@ import chalk from 'chalk'; import { Command } from 'commander'; -import { activityCommand } from './commands/activity.js'; import { cleanCommand } from './commands/clean.js'; import { compactCommand } from './commands/compact.js'; import { dashboardCommand } from './commands/dashboard.js'; @@ -13,7 +12,6 @@ import { indexCommand } from './commands/index.js'; import { initCommand } from './commands/init.js'; import { mapCommand } from './commands/map.js'; import { mcpCommand } from './commands/mcp.js'; -import { ownersCommand } from './commands/owners.js'; import { planCommand } from './commands/plan.js'; import { searchCommand } from './commands/search.js'; import { statsCommand } from './commands/stats.js'; @@ -42,8 +40,6 @@ program.addCommand(gitCommand); program.addCommand(mapCommand); program.addCommand(updateCommand); program.addCommand(statsCommand); -program.addCommand(ownersCommand); -program.addCommand(activityCommand); program.addCommand(dashboardCommand); program.addCommand(compactCommand); program.addCommand(cleanCommand); diff --git a/packages/cli/src/commands/activity.ts b/packages/cli/src/commands/activity.ts deleted file mode 100644 index 58c9c63..0000000 --- a/packages/cli/src/commands/activity.ts +++ /dev/null @@ -1,174 +0,0 @@ -/** - * Activity command - Show most active files by commit frequency - */ - -import * as path from 'node:path'; -import { - type FileMetrics, - getFileIcon, - getMostActive, - getStoragePath, - MetricsStore, -} from '@lytics/dev-agent-core'; -import chalk from 'chalk'; -import { Command } from 'commander'; -import { loadConfig } from '../utils/config.js'; -import { logger } from '../utils/logger.js'; - -/** - * Format relative time (e.g., "2 days ago", "today") - */ -function formatRelativeTime(date: Date): string { - const now = new Date(); - const diffMs = now.getTime() - date.getTime(); - const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); - - if (diffDays === 0) return 'today'; - if (diffDays === 1) return 'yesterday'; - if (diffDays < 7) return `${diffDays}d ago`; - if (diffDays < 30) return `${Math.floor(diffDays / 7)}w ago`; - if (diffDays < 365) return `${Math.floor(diffDays / 30)}mo ago`; - return `${Math.floor(diffDays / 365)}y ago`; -} - -/** - * Format files with tree branches and icons - */ -function formatFileMetricsTable(files: FileMetrics[]): string { - if (files.length === 0) return ''; - - let output = ''; - - for (let i = 0; i < files.length; i++) { - const file = files[i]; - const isLast = i === files.length - 1; - const branch = isLast ? '└─' : '├─'; - const connector = isLast ? ' ' : '│'; // Vertical line for non-last items - const icon = getFileIcon(file.filePath); - - // Author info - const authorIcon = file.authorCount === 1 ? '👤' : file.authorCount === 2 ? '👥' : '👥👥'; - - // Relative time - const lastChange = file.lastModified ? formatRelativeTime(file.lastModified) : 'unknown'; - - // File path line with icon and branch - output += `${chalk.dim(branch)} ${icon} ${file.filePath}\n`; - - // Metrics line with vertical connector - output += chalk.dim( - `${connector} ${file.commitCount} commits • ${file.authorCount} ${authorIcon} • Last: ${lastChange}\n` - ); - - // Add vertical line separator between items (except after last) - if (!isLast) { - output += chalk.dim(`${connector}\n`); - } - } - - return output; -} - -/** - * Generate summary insights - */ -function generateActivitySummary(files: FileMetrics[]): string[] { - const insights: string[] = []; - const highChurn = files.filter((f) => f.commitCount >= 10).length; - const singleAuthor = files.filter((f) => f.authorCount === 1).length; - - if (highChurn > 0) { - insights.push(`${highChurn} file${highChurn > 1 ? 's' : ''} changed 10+ times this month`); - } - if (singleAuthor > 0 && singleAuthor === files.length) { - insights.push(`All files have single author`); - } else if (singleAuthor > files.length / 2) { - insights.push(`${singleAuthor}/${files.length} files have single author`); - } - - return insights; -} - -/** - * Activity command - Show most active files - */ -export const activityCommand = new Command('activity') - .description('Show most active files by commit frequency') - .option('-n, --limit ', 'Number of files to show', '10') - .option('--json', 'Output as JSON', false) - .action(async (options) => { - try { - const config = await loadConfig(); - if (!config) { - logger.error('No config found. Run "dev init" first.'); - process.exit(1); - } - - const repositoryPath = config.repository?.path || config.repositoryPath || process.cwd(); - const storagePath = await getStoragePath(path.resolve(repositoryPath)); - const metricsDbPath = path.join(storagePath, 'metrics.db'); - - const store = new MetricsStore(metricsDbPath); - const latestSnapshot = store.getLatestSnapshot(path.resolve(repositoryPath)); - - if (!latestSnapshot) { - logger.warn('No metrics found. Index your repository first with "dev index".'); - store.close(); - process.exit(0); - } - - const limit = Number.parseInt(options.limit, 10); - const files = getMostActive(store, latestSnapshot.id, limit); - - // Get total count for context - const allFiles = store.getCodeMetadata({ snapshotId: latestSnapshot.id, limit: 10000 }); - const totalWithActivity = allFiles.filter((f) => (f.commitCount || 0) >= 5).length; - - store.close(); - - if (files.length === 0) { - logger.warn('No activity metrics found.'); - console.log(''); - console.log(chalk.yellow('📌 This feature requires re-indexing your repository:')); - console.log(''); - console.log(chalk.white(' dev index .')); - console.log(''); - console.log( - chalk.dim(' This is a one-time operation. Future updates will maintain activity data.') - ); - console.log(''); - process.exit(0); - } - - // JSON output for programmatic use - if (options.json) { - console.log(JSON.stringify({ files, totalWithActivity }, null, 2)); - return; - } - - // Human-readable table output - console.log(''); - console.log( - chalk.bold.cyan(`📊 Most Active Files (${totalWithActivity} total with 5+ commits)`) - ); - console.log(''); - console.log(formatFileMetricsTable(files)); - console.log(''); - - // Add summary - const summary = generateActivitySummary(files); - if (summary.length > 0) { - console.log(chalk.dim('Summary:')); - for (const insight of summary) { - console.log(chalk.dim(` • ${insight}`)); - } - } - - console.log(''); - } catch (error) { - logger.error( - `Failed to get activity metrics: ${error instanceof Error ? error.message : String(error)}` - ); - process.exit(1); - } - }); diff --git a/packages/cli/src/commands/index.ts b/packages/cli/src/commands/index.ts index 3789359..961ae2b 100644 --- a/packages/cli/src/commands/index.ts +++ b/packages/cli/src/commands/index.ts @@ -205,8 +205,8 @@ export const indexCommand = new Command('index') 'documents', embeddingStartTime ); - } else { - // Scanning phase + } else if (progress.phase === 'scanning') { + // Scanning phase - show file progress progressRenderer.updateSectionWithRate( progress.filesProcessed, progress.totalFiles, diff --git a/packages/cli/src/commands/owners.ts b/packages/cli/src/commands/owners.ts deleted file mode 100644 index de647f1..0000000 --- a/packages/cli/src/commands/owners.ts +++ /dev/null @@ -1,630 +0,0 @@ -/** - * Owners command - Show code ownership and developer contributions - */ - -import { execSync } from 'node:child_process'; -import * as path from 'node:path'; -import { getStoragePath, MetricsStore } from '@lytics/dev-agent-core'; -import chalk from 'chalk'; -import { Command } from 'commander'; -import { logger } from '../utils/logger.js'; - -/** - * Developer ownership stats - */ -interface DeveloperStats { - email: string; - displayName: string; // GitHub handle or shortened email - files: number; - commits: number; - linesOfCode: number; - lastActive: Date | null; - topFiles: Array<{ path: string; commits: number; loc: number }>; -} - -/** - * Extract GitHub handle from email or git config - */ -function getDisplayName(email: string, repositoryPath: string): string { - const { execSync } = require('node:child_process'); - - // Try GitHub-style emails: username@users.noreply.github.com - const githubMatch = email.match(/^([^@]+)@users\.noreply\.github\.com$/); - if (githubMatch) { - return `@${githubMatch[1]}`; - } - - // Try to get GitHub username from git config - try { - const username = execSync('git config --get github.user', { - cwd: repositoryPath, - encoding: 'utf-8', - stdio: ['pipe', 'pipe', 'ignore'], - }).trim(); - - if (username) { - return `@${username}`; - } - } catch (_error) { - // Git config not set, continue - } - - // Fallback: shorten email (username part only) - const atIndex = email.indexOf('@'); - if (atIndex > 0) { - return email.substring(0, atIndex); - } - - return email; -} - -/** - * Get current user as GitHub handle - */ -function getCurrentUser(repositoryPath: string): string { - const { execSync } = require('node:child_process'); - try { - const email = execSync('git config user.email', { - cwd: repositoryPath, - encoding: 'utf-8', - stdio: ['pipe', 'pipe', 'ignore'], - }).trim(); - return getDisplayName(email, repositoryPath); - } catch { - return 'unknown'; - } -} - -/** - * Get list of changed files (uncommitted changes) - */ -function getChangedFiles(repositoryPath: string): string[] { - const { execSync } = require('node:child_process'); - try { - const output = execSync('git diff --name-only HEAD', { - cwd: repositoryPath, - encoding: 'utf-8', - stdio: ['pipe', 'pipe', 'ignore'], - }); - return output.trim().split('\n').filter(Boolean); - } catch { - return []; - } -} - -/** - * Check if current directory is at repo root - */ -function isAtRepoRoot(repositoryPath: string): boolean { - return process.cwd() === repositoryPath; -} - -/** - * Get current directory relative to repo root - */ -function getCurrentDirectory(repositoryPath: string): string { - const cwd = process.cwd(); - if (cwd === repositoryPath) return ''; - return `${cwd.replace(repositoryPath, '').replace(/^\//, '')}/`; -} - -/** - * Get git repository root (or process.cwd() if not in git repo) - */ -function getGitRoot(): string { - try { - const output = execSync('git rev-parse --show-toplevel', { - encoding: 'utf-8', - stdio: ['pipe', 'pipe', 'ignore'], - }); - return output.trim(); - } catch { - return process.cwd(); - } -} - -/** - * Get ownership for specific files using git log (for uncommitted changes) - */ -function getFileOwnership( - repositoryPath: string, - filePaths: string[] -): Map< - string, - { - owner: string; - commits: number; - lastActive: Date | null; - recentContributor?: { name: string; lastActive: Date | null }; - } -> { - const fileOwners = new Map< - string, - { - owner: string; - commits: number; - lastActive: Date | null; - recentContributor?: { name: string; lastActive: Date | null }; - } - >(); - - for (const filePath of filePaths) { - try { - const absolutePath = path.join(repositoryPath, filePath); - const output = execSync( - `git log --follow --format='%ae|%aI' --numstat -- "${absolutePath}" | head -100`, - { - cwd: repositoryPath, - encoding: 'utf-8', - stdio: ['pipe', 'pipe', 'ignore'], - } - ); - - const lines = output.trim().split('\n'); - const authors = new Map(); - - let currentEmail = ''; - let currentDate: Date | null = null; - - for (const line of lines) { - if (line.includes('|')) { - // Author line: email|date - const [email, dateStr] = line.split('|'); - currentEmail = email.trim(); - currentDate = new Date(dateStr); - - const existing = authors.get(currentEmail); - if (!existing) { - authors.set(currentEmail, { commits: 1, lastActive: currentDate }); - } else { - existing.commits++; - if (!existing.lastActive || currentDate > existing.lastActive) { - existing.lastActive = currentDate; - } - } - } - } - - if (authors.size > 0) { - // Get primary author (most commits) - const sortedByCommits = Array.from(authors.entries()).sort( - (a, b) => b[1].commits - a[1].commits - ); - const [primaryEmail, primaryData] = sortedByCommits[0]; - const primaryHandle = getDisplayName(primaryEmail, repositoryPath); - - // Find most recent contributor - const sortedByRecency = Array.from(authors.entries()).sort((a, b) => { - const dateA = a[1].lastActive?.getTime() || 0; - const dateB = b[1].lastActive?.getTime() || 0; - return dateB - dateA; - }); - const [recentEmail, recentData] = sortedByRecency[0]; - const recentHandle = getDisplayName(recentEmail, repositoryPath); - - // Check if recent contributor is different from primary owner - const recentContributor = - recentHandle !== primaryHandle - ? { name: recentHandle, lastActive: recentData.lastActive } - : undefined; - - fileOwners.set(filePath, { - owner: primaryHandle, - commits: primaryData.commits, - lastActive: primaryData.lastActive, - recentContributor, - }); - } - } catch {} - } - - return fileOwners; -} - -/** - * Calculate developer ownership from indexed data (instant, no git calls!) - */ -async function calculateDeveloperOwnership( - store: MetricsStore, - snapshotId: string, - repositoryPath: string -): Promise { - // Get all files with metrics - const allFiles = store.getCodeMetadata({ snapshotId, limit: 10000 }); - - // Build file path lookup map - const fileMetadataMap = new Map(allFiles.map((f) => [f.filePath, f])); - - // Calculate file author contributions on-demand (fast batched git call) - const { calculateFileAuthorContributions } = await import('@lytics/dev-agent-core'); - const fileAuthors = await calculateFileAuthorContributions({ repositoryPath }); - - // Build developer stats grouped by GitHub handle (normalized identity) - const devMap = new Map< - string, - { - emails: Set; // Track all emails for this developer - files: Set; - commits: number; - linesOfCode: number; - lastActive: Date | null; - fileCommits: Map; - } - >(); - - // For each file, assign to primary author (most commits) - for (const [filePath, authors] of fileAuthors) { - if (authors.length === 0) continue; - - // Primary author is first in list (already sorted by commit count) - const primaryAuthor = authors[0]; - if (!primaryAuthor) continue; - - const fileMetadata = fileMetadataMap.get(filePath); - if (!fileMetadata) continue; - - // Normalize to GitHub handle (groups multiple emails for same developer) - const displayName = getDisplayName(primaryAuthor.authorEmail, repositoryPath); - - // Update developer stats (grouped by display name, not email) - let devData = devMap.get(displayName); - if (!devData) { - devData = { - emails: new Set(), - files: new Set(), - commits: 0, - linesOfCode: 0, - lastActive: null, - fileCommits: new Map(), - }; - devMap.set(displayName, devData); - } - - // Track this email for this developer - devData.emails.add(primaryAuthor.authorEmail); - - devData.files.add(filePath); - devData.commits += primaryAuthor.commitCount; - devData.linesOfCode += fileMetadata.linesOfCode; - devData.fileCommits.set(filePath, { - commits: primaryAuthor.commitCount, - loc: fileMetadata.linesOfCode, - }); - - // Track most recent activity - const lastCommit = primaryAuthor.lastCommit || fileMetadata.lastModified; - if (lastCommit) { - if (!devData.lastActive || lastCommit > devData.lastActive) { - devData.lastActive = lastCommit; - } - } - } - - // Convert to array and sort by file count - const developers: DeveloperStats[] = []; - for (const [displayName, data] of devMap) { - // Get top 5 files by commit count - const sortedFiles = Array.from(data.fileCommits.entries()) - .sort((a, b) => b[1].commits - a[1].commits) - .slice(0, 5); - - // Use first email for identity (already normalized by displayName) - const primaryEmail = Array.from(data.emails)[0] || displayName; - - developers.push({ - email: primaryEmail, - displayName, - files: data.files.size, - commits: data.commits, - linesOfCode: data.linesOfCode, - lastActive: data.lastActive, - topFiles: sortedFiles.map(([path, stats]) => ({ - path, - commits: stats.commits, - loc: stats.loc, - })), - }); - } - - // Sort by number of files owned (descending) - developers.sort((a, b) => b.files - a.files); - - return developers; -} - -/** - * Format changed files mode with tree branches - */ -function formatChangedFilesMode( - changedFiles: string[], - fileOwners: Map< - string, - { - owner: string; - commits: number; - lastActive: Date | null; - recentContributor?: { name: string; lastActive: Date | null }; - } - >, - currentUser: string, - _repositoryPath: string -): string { - let output = ''; - output += chalk.bold('📝 Modified files') + chalk.gray(` (${changedFiles.length}):\n`); - - const reviewers = new Set(); - - for (let i = 0; i < changedFiles.length; i++) { - const file = changedFiles[i]; - const isLast = i === changedFiles.length - 1; - const prefix = isLast ? '└─' : '├─'; - const ownerInfo = fileOwners.get(file); - - // Shorten file path for display - const displayPath = file.length > 60 ? `...${file.slice(-57)}` : file; - - if (!ownerInfo) { - // New file - no history - output += ` ${chalk.gray(prefix)} 🆕 ${chalk.white(displayPath)}\n`; - output += chalk.dim(` ${isLast ? ' ' : '│'} New file\n`); - } else { - const isYours = ownerInfo.owner === currentUser; - const lastTouched = ownerInfo.lastActive - ? formatRelativeTime(ownerInfo.lastActive) - : 'unknown'; - - if (isYours) { - // Your file - no icon, minimal noise - output += ` ${chalk.gray(prefix)} ${chalk.white(displayPath)}\n`; - output += chalk.dim( - ` ${isLast ? ' ' : '│'} ${chalk.cyan(ownerInfo.owner)} • ${ownerInfo.commits} commits • Last: ${lastTouched}\n` - ); - - // Check if someone else touched it recently - if (ownerInfo.recentContributor) { - const recentTime = ownerInfo.recentContributor.lastActive - ? formatRelativeTime(ownerInfo.recentContributor.lastActive) - : 'recently'; - output += chalk.dim( - ` ${isLast ? ' ' : '│'} ${chalk.yellow(`⚠️ Recent activity by ${chalk.cyan(ownerInfo.recentContributor.name)} (${recentTime})`)}\n` - ); - reviewers.add(ownerInfo.recentContributor.name); - } - } else { - // Someone else's file - flag for review - output += ` ${chalk.gray(prefix)} ⚠️ ${chalk.white(displayPath)}\n`; - output += chalk.dim( - ` ${isLast ? ' ' : '│'} ${chalk.cyan(ownerInfo.owner)} • ${ownerInfo.commits} commits • Last: ${lastTouched}\n` - ); - reviewers.add(ownerInfo.owner); - } - } - - if (!isLast) output += chalk.dim(` │\n`); - } - - if (reviewers.size > 0) { - output += '\n'; - output += chalk.yellow(`💡 Suggested reviewers: ${Array.from(reviewers).join(', ')}\n`); - } - - return output; -} - -/** - * Format root directory mode with tree branches - */ -function formatRootDirectoryMode(developers: DeveloperStats[], repositoryPath: string): string { - // Group files by top-level directory - const dirMap = new Map; owner: string; lastActive: Date | null }>(); - - for (const dev of developers) { - for (const fileData of dev.topFiles) { - const relativePath = fileData.path.replace(`${repositoryPath}/`, ''); - const parts = relativePath.split('/'); - - // For monorepos (packages/*, apps/*), show 2 levels. Otherwise, 1 level. - let topDir = parts[0] || ''; - if (topDir === 'packages' || topDir === 'apps' || topDir === 'libs') { - topDir = parts.slice(0, 2).join('/'); - } - - if (!topDir) continue; - - let dirData = dirMap.get(topDir); - if (!dirData) { - dirData = { files: new Set(), owner: dev.displayName, lastActive: dev.lastActive }; - dirMap.set(topDir, dirData); - } - dirData.files.add(fileData.path); - - // Use most recently active owner - if (dev.lastActive && (!dirData.lastActive || dev.lastActive > dirData.lastActive)) { - dirData.owner = dev.displayName; - dirData.lastActive = dev.lastActive; - } - } - } - - const repoName = repositoryPath.split('/').pop() || 'repository'; - let output = chalk.bold(`📦 ${repoName}\n\n`); - output += chalk.bold('Top areas:\n'); - - const dirs = Array.from(dirMap.entries()).sort((a, b) => b[1].files.size - a[1].files.size); - - for (let i = 0; i < Math.min(dirs.length, 10); i++) { - const [dirName, data] = dirs[i]; - const isLast = i === Math.min(dirs.length, 10) - 1; - const prefix = isLast ? '└─' : '├─'; - const relTime = data.lastActive ? formatRelativeTime(data.lastActive) : 'unknown'; - - output += chalk.dim(` ${prefix} `) + chalk.cyan(`📁 ${dirName}/`); - output += chalk.gray(` ${data.owner} • ${data.files.size} files • Active ${relTime}\n`); - } - - output += '\n'; - output += chalk.dim( - `💡 Tip: Use ${chalk.cyan(`'dev owners ${dirs[0]?.[0]}/'`)} to see details\n` - ); - - return output; -} - -/** - * Format subdirectory mode with tree branches - */ -function formatSubdirectoryMode( - developers: DeveloperStats[], - currentDir: string, - repositoryPath: string -): string { - // Filter developers to only those with files in current directory - const relevantDevs = developers.filter((dev) => - dev.topFiles.some((f) => { - const relativePath = f.path.replace(`${repositoryPath}/`, ''); - return relativePath.startsWith(currentDir); - }) - ); - - if (relevantDevs.length === 0) { - return chalk.yellow('No ownership data found for this directory\n'); - } - - const primary = relevantDevs[0]; - let output = chalk.bold(`📁 ${currentDir}\n\n`); - - output += chalk.bold(`👤 ${primary.displayName}`) + chalk.gray(' (Primary expert)\n'); - output += chalk.dim(` ├─ ${primary.files} files owned\n`); - output += chalk.dim(` ├─ ${primary.commits} commits total\n`); - const lastActiveStr = primary.lastActive ? formatRelativeTime(primary.lastActive) : 'unknown'; - output += chalk.dim(` └─ Last active: ${lastActiveStr}\n`); - - // Show top files in this directory - const filesInDir = primary.topFiles - .filter((f) => { - const relativePath = f.path.replace(`${repositoryPath}/`, ''); - return relativePath.startsWith(currentDir); - }) - .slice(0, 5); - - if (filesInDir.length > 0) { - output += '\n'; - output += chalk.bold('Recent files:\n'); - - for (let i = 0; i < filesInDir.length; i++) { - const file = filesInDir[i]; - const isLast = i === filesInDir.length - 1; - const prefix = isLast ? '└─' : '├─'; - const fileName = file.path.split('/').pop() || file.path; - const locStr = file.loc >= 1000 ? `${(file.loc / 1000).toFixed(1)}k` : String(file.loc); - - output += chalk.dim(` ${prefix} ${fileName} • ${file.commits} commits • ${locStr} LOC\n`); - } - } - - output += '\n'; - if (relevantDevs.length === 1) { - output += chalk.dim(`💡 Tip: You're the main contributor here\n`); - } else { - output += chalk.dim(`💡 Tip: ${relevantDevs.length} contributors work in this area\n`); - } - - return output; -} - -/** - * Format relative time (e.g., "2 days ago", "today") - */ -function formatRelativeTime(date: Date): string { - const now = new Date(); - const diffMs = now.getTime() - date.getTime(); - const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); - - if (diffDays === 0) return 'today'; - if (diffDays === 1) return 'yesterday'; - if (diffDays < 7) return `${diffDays}d ago`; - if (diffDays < 30) return `${Math.floor(diffDays / 7)}w ago`; - if (diffDays < 365) return `${Math.floor(diffDays / 30)}mo ago`; - return `${Math.floor(diffDays / 365)}y ago`; -} - -/** - * Owners command - Show developer contributions - */ -export const ownersCommand = new Command('owners') - .description('Show code ownership and developer contributions (context-aware)') - .option('-n, --limit ', 'Number of developers to display (default: 10)', '10') - .option('--json', 'Output as JSON', false) - .action(async (options) => { - try { - // Always use git root for metrics lookup (config paths may be relative) - const repositoryPath = getGitRoot(); - const storagePath = await getStoragePath(repositoryPath); - const metricsDbPath = path.join(storagePath, 'metrics.db'); - - const store = new MetricsStore(metricsDbPath); - const latestSnapshot = store.getLatestSnapshot(repositoryPath); - - if (!latestSnapshot) { - logger.warn('No metrics found. Index your repository first with "dev index".'); - store.close(); - process.exit(0); - } - - // Calculate developer ownership on-demand (uses fast batched git call) - const developers = await calculateDeveloperOwnership( - store, - latestSnapshot.id, - repositoryPath - ); - store.close(); - - if (developers.length === 0) { - logger.warn('No developer ownership data found.'); - process.exit(0); - } - - // JSON output for programmatic use - if (options.json) { - const limit = Number.parseInt(options.limit, 10); - const topDevelopers = developers.slice(0, limit); - console.log( - JSON.stringify({ developers: topDevelopers, total: developers.length }, null, 2) - ); - return; - } - - // Context-aware modes - console.log(''); - - // Mode 1: Changed files (if there are uncommitted changes) - const changedFiles = getChangedFiles(repositoryPath); - if (changedFiles.length > 0) { - const currentUser = getCurrentUser(repositoryPath); - - // Get real-time ownership for changed files using git log - const fileOwners = getFileOwnership(repositoryPath, changedFiles); - - console.log(formatChangedFilesMode(changedFiles, fileOwners, currentUser, repositoryPath)); - console.log(''); - return; - } - - // Mode 2: Root directory (show high-level areas) - if (isAtRepoRoot(repositoryPath)) { - console.log(formatRootDirectoryMode(developers, repositoryPath)); - console.log(''); - return; - } - - // Mode 3: Subdirectory (show expertise for current area) - const currentDir = getCurrentDirectory(repositoryPath); - console.log(formatSubdirectoryMode(developers, currentDir, repositoryPath)); - console.log(''); - } catch (error) { - logger.error( - `Failed to get ownership metrics: ${error instanceof Error ? error.message : String(error)}` - ); - process.exit(1); - } - }); diff --git a/packages/cli/src/utils/progress.ts b/packages/cli/src/utils/progress.ts index 8947360..c2bff11 100644 --- a/packages/cli/src/utils/progress.ts +++ b/packages/cli/src/utils/progress.ts @@ -61,7 +61,7 @@ export class ProgressRenderer { */ updateSectionWithRate(processed: number, total: number, unit: string, startTime: number): void { if (total === 0) { - this.updateSection('Discovering...'); + this.updateSection('Discovering repository... this process may take 3-5 minutes'); return; } @@ -195,8 +195,6 @@ export function formatFinalSummary(stats: { // Next steps lines.push(chalk.dim('💡 Next steps:')); lines.push(` ${chalk.cyan('dev map')} ${chalk.dim('Explore codebase structure')}`); - lines.push(` ${chalk.cyan('dev owners')} ${chalk.dim('See contributor stats')}`); - lines.push(` ${chalk.cyan('dev activity')} ${chalk.dim('Find active files')}`); lines.push(''); return lines.join('\n'); diff --git a/packages/core/src/indexer/index.ts b/packages/core/src/indexer/index.ts index e23270b..d3a2ba2 100644 --- a/packages/core/src/indexer/index.ts +++ b/packages/core/src/indexer/index.ts @@ -108,6 +108,19 @@ export class RepositoryIndexer { exclude: [...this.config.excludePatterns, ...(options.excludePatterns || [])], languages: options.languages, logger: options.logger, + onProgress: (scanProgress) => { + // Forward scanner progress to indexer progress callback + onProgress?.({ + phase: 'scanning', + filesProcessed: scanProgress.filesScanned, + totalFiles: scanProgress.filesTotal, + documentsIndexed: scanProgress.documentsExtracted, + percentComplete: + scanProgress.filesTotal > 0 + ? Math.round((scanProgress.filesScanned / scanProgress.filesTotal) * 100) + : 0, + }); + }, }); filesScanned = scanResult.stats.filesScanned; @@ -283,7 +296,6 @@ export class RepositoryIndexer { } // Build code metadata for metrics storage (git change frequency only) - // Author contributions are calculated on-demand in `dev owners` command let codeMetadata: CodeMetadata[] | undefined; if (this.eventBus) { try { @@ -476,7 +488,7 @@ export class RepositoryIndexer { // Build code metadata for metrics storage (only for updated files) // Build code metadata for metrics storage (git change frequency only) - // Author contributions are calculated on-demand in `dev owners` command + // Author contributions are calculated on-demand if needed let codeMetadata: CodeMetadata[] | undefined; if (this.eventBus && scannedDocuments.length > 0) { try { diff --git a/packages/core/src/indexer/utils/change-frequency.ts b/packages/core/src/indexer/utils/change-frequency.ts index c9fa3f3..98b01ee 100644 --- a/packages/core/src/indexer/utils/change-frequency.ts +++ b/packages/core/src/indexer/utils/change-frequency.ts @@ -24,20 +24,6 @@ export interface FileChangeFrequency { authorCount: number; } -/** - * Author contribution data for a specific file - */ -export interface FileAuthorContribution { - /** Author email */ - authorEmail: string; - - /** Number of commits by this author */ - commitCount: number; - - /** Last commit timestamp by this author */ - lastCommit: Date | null; -} - /** * Options for calculating change frequency */ @@ -190,101 +176,3 @@ export function aggregateChangeFrequency( lastModified: mostRecent, }; } - -/** - * Calculate per-file author contributions (batched for performance) - * - * Returns a map of file path → array of author contributions. - * Uses a single batched git call for efficiency. - * - * @param options - Change frequency options - * @returns Map of file paths to author contributions - */ -export async function calculateFileAuthorContributions( - options: ChangeFrequencyOptions -): Promise> { - const { repositoryPath } = options; - const result = new Map(); - - try { - // Single batched git call: get all commits with author, timestamp, and files changed - // Format: commit-hash|author-email|timestamp|file-path (one per line) - const output = execSync('git log --all --pretty=format:"%H|%ae|%ai" --name-only --no-merges', { - cwd: repositoryPath, - encoding: 'utf-8', - maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large repos - stdio: ['pipe', 'pipe', 'ignore'], - }); - - // Parse git output to build file → author → {commits, lastCommit} map - const fileAuthorData = new Map< - string, - Map - >(); - - const lines = output.split('\n'); - let currentCommit: string | null = null; - let currentAuthor: string | null = null; - let currentTimestamp: Date | null = null; - - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed) continue; - - // Check if this is a commit line (format: hash|email|timestamp) - if (trimmed.includes('|')) { - const parts = trimmed.split('|'); - if (parts.length >= 3) { - currentCommit = parts[0]; - currentAuthor = parts[1]; - currentTimestamp = new Date(parts.slice(2).join('|')); - continue; - } - } - - // This is a file path - if (currentCommit && currentAuthor && currentTimestamp) { - let authorMap = fileAuthorData.get(trimmed); - if (!authorMap) { - authorMap = new Map(); - fileAuthorData.set(trimmed, authorMap); - } - - let authorData = authorMap.get(currentAuthor); - if (!authorData) { - authorData = { commitCount: 0, lastCommit: null }; - authorMap.set(currentAuthor, authorData); - } - - authorData.commitCount++; - - // Track most recent commit by this author - if (!authorData.lastCommit || currentTimestamp > authorData.lastCommit) { - authorData.lastCommit = currentTimestamp; - } - } - } - - // Convert to final format - for (const [filePath, authorMap] of fileAuthorData) { - const contributions: FileAuthorContribution[] = []; - - for (const [authorEmail, data] of authorMap) { - contributions.push({ - authorEmail, - commitCount: data.commitCount, - lastCommit: data.lastCommit, - }); - } - - // Sort by commit count (descending) - contributions.sort((a, b) => b.commitCount - a.commitCount); - - result.set(filePath, contributions); - } - } catch (_error) { - // Git command failed, return empty map - } - - return result; -} diff --git a/packages/core/src/indexer/utils/index.ts b/packages/core/src/indexer/utils/index.ts index 848001f..3bf1e0e 100644 --- a/packages/core/src/indexer/utils/index.ts +++ b/packages/core/src/indexer/utils/index.ts @@ -11,8 +11,6 @@ export { aggregateChangeFrequency, type ChangeFrequencyOptions, calculateChangeFrequency, - calculateFileAuthorContributions, - type FileAuthorContribution, type FileChangeFrequency, } from './change-frequency'; diff --git a/packages/core/src/metrics/collector.ts b/packages/core/src/metrics/collector.ts index cc16779..0dcbfed 100644 --- a/packages/core/src/metrics/collector.ts +++ b/packages/core/src/metrics/collector.ts @@ -4,17 +4,25 @@ * Builds CodeMetadata from scanner results and change frequency data. */ -// Note: We import FileAuthorContribution type only for internal use in deriving change frequency -import type { FileAuthorContribution } from '../indexer/utils/change-frequency.js'; -import { calculateFileAuthorContributions } from '../indexer/utils/change-frequency.js'; +import { calculateChangeFrequency } from '../indexer/utils/change-frequency.js'; import type { Document } from '../scanner/types.js'; import type { CodeMetadata } from './types.js'; /** - * Count lines of code in a snippet + * Count lines of code in a file */ -function countLines(content: string): number { - return content.split('\n').length; +async function countFileLines(repositoryPath: string, filePath: string): Promise { + const fs = await import('node:fs/promises'); + const path = await import('node:path'); + + try { + const fullPath = path.join(repositoryPath, filePath); + const content = await fs.readFile(fullPath, 'utf-8'); + return content.split('\n').length; + } catch { + // File doesn't exist or can't be read - return 0 + return 0; + } } /** @@ -32,45 +40,11 @@ export async function buildCodeMetadata( repositoryPath: string, documents: Document[] ): Promise { - // Use fast batched author contributions call to derive change frequency - // This is much faster than the old calculateChangeFrequency which made individual git calls per file - const authorContributions = await calculateFileAuthorContributions({ repositoryPath }).catch( - () => new Map() - ); - - // Derive change frequency from author contributions (no additional git calls!) - const changeFreq = new Map< - string, - { commitCount: number; lastModified: Date; authorCount: number } - >(); - - for (const [filePath, contributions] of authorContributions) { - // Sum commit counts across all authors - const commitCount = contributions.reduce( - (sum: number, c: FileAuthorContribution) => sum + c.commitCount, - 0 - ); - - // Get most recent commit across all authors - const lastModified = - contributions.reduce( - (latest: Date | null, c: FileAuthorContribution) => { - if (!c.lastCommit) return latest; - if (!latest) return c.lastCommit; - return c.lastCommit > latest ? c.lastCommit : latest; - }, - null as Date | null - ) || new Date(0); - - // Author count is number of unique contributors - const authorCount = contributions.length; - - changeFreq.set(filePath, { - commitCount, - lastModified, - authorCount, - }); - } + // Calculate change frequency using git log + const changeFreq = await calculateChangeFrequency({ + repositoryPath, + maxCommits: 1000, + }).catch(() => new Map()); // Group documents by file const fileToDocuments = new Map(); @@ -81,38 +55,50 @@ export async function buildCodeMetadata( fileToDocuments.set(filePath, existing); } - // Build metadata for each file + // Build metadata for each file - process in parallel for speed + const CONCURRENCY = 50; // Read 50 files at a time + const fileEntries = Array.from(fileToDocuments.entries()); + const batches: Array<[string, Document[]][]> = []; + + // Create batches + for (let i = 0; i < fileEntries.length; i += CONCURRENCY) { + batches.push(fileEntries.slice(i, i + CONCURRENCY)); + } + const metadata: CodeMetadata[] = []; - for (const [filePath, docs] of fileToDocuments) { - const freq = changeFreq.get(filePath); + // Process each batch in parallel + for (const batch of batches) { + const batchResults = await Promise.all( + batch.map(async ([filePath, docs]) => { + const freq = changeFreq.get(filePath); - // Estimate LOC from first document's snippet (approximate) - // In practice, this is an underestimate since snippet is truncated - // But it's good enough for relative comparisons - const linesOfCode = docs[0]?.metadata.snippet - ? countLines(docs[0].metadata.snippet) - : docs[0]?.metadata.endLine - docs[0]?.metadata.startLine || 0; + // Count actual lines of code from the file on disk + const linesOfCode = await countFileLines(repositoryPath, filePath); - // Count unique imports across all documents in this file - const allImports = new Set(); - for (const doc of docs) { - if (doc.metadata.imports) { - for (const imp of doc.metadata.imports) { - allImports.add(imp); + // Count unique imports across all documents in this file + const allImports = new Set(); + for (const doc of docs) { + if (doc.metadata.imports) { + for (const imp of doc.metadata.imports) { + allImports.add(imp); + } + } } - } - } - metadata.push({ - filePath, - commitCount: freq?.commitCount, - lastModified: freq?.lastModified, - authorCount: freq?.authorCount, - linesOfCode, - numFunctions: docs.length, // Each document is a function/component - numImports: allImports.size, - }); + return { + filePath, + commitCount: freq?.commitCount, + lastModified: freq?.lastModified, + authorCount: freq?.authorCount, + linesOfCode, + numFunctions: docs.length, // Each document is a function/component + numImports: allImports.size, + }; + }) + ); + + metadata.push(...batchResults); } return metadata; diff --git a/packages/core/src/scanner/go.ts b/packages/core/src/scanner/go.ts index 44dcf62..9e0da7a 100644 --- a/packages/core/src/scanner/go.ts +++ b/packages/core/src/scanner/go.ts @@ -148,7 +148,12 @@ export class GoScanner implements Scanner { } } - async scan(files: string[], repoRoot: string, logger?: Logger): Promise { + async scan( + files: string[], + repoRoot: string, + logger?: Logger, + onProgress?: (filesProcessed: number, totalFiles: number) => void + ): Promise { const documents: Document[] = []; const total = files.length; const errors: Array<{ @@ -180,9 +185,15 @@ export class GoScanner implements Scanner { const file = files[i]; const fileStartTime = Date.now(); - // Log progress every 50 files OR every 10 seconds + // Report progress via callback every 50 files OR every 10 seconds const now = Date.now(); const timeSinceLastLog = now - lastLogTime; + + if (onProgress && i > 0 && (i % 50 === 0 || timeSinceLastLog > 10000)) { + onProgress(i, total); + } + + // Log progress every 50 files OR every 10 seconds if (logger && i > 0 && (i % 50 === 0 || timeSinceLastLog > 10000)) { lastLogTime = now; const elapsed = now - startTime; diff --git a/packages/core/src/scanner/markdown.ts b/packages/core/src/scanner/markdown.ts index 0bf3897..4553fa6 100644 --- a/packages/core/src/scanner/markdown.ts +++ b/packages/core/src/scanner/markdown.ts @@ -22,7 +22,12 @@ export class MarkdownScanner implements Scanner { return ext === '.md' || ext === '.mdx'; } - async scan(files: string[], repoRoot: string, _logger?: Logger): Promise { + async scan( + files: string[], + repoRoot: string, + _logger?: Logger, + _onProgress?: (filesProcessed: number, totalFiles: number) => void + ): Promise { const documents: Document[] = []; for (const file of files) { diff --git a/packages/core/src/scanner/registry.ts b/packages/core/src/scanner/registry.ts index 1ac9ca8..ae662fd 100644 --- a/packages/core/src/scanner/registry.ts +++ b/packages/core/src/scanner/registry.ts @@ -132,7 +132,21 @@ export class ScannerRegistry { }); try { - const documents = await scanner.scan(scannerFiles, options.repoRoot, logger); + const documents = await scanner.scan( + scannerFiles, + options.repoRoot, + logger, + (filesProcessed, _totalFiles) => { + // Emit progress updates from scanner + emitProgress({ + phase: 'scanning', + language: scanner.language, + filesTotal: files.length, + filesScanned: totalFilesScanned + filesProcessed, + documentsExtracted: allDocuments.length, + }); + } + ); allDocuments.push(...documents); totalFilesScanned += scannerFiles.length; diff --git a/packages/core/src/scanner/types.ts b/packages/core/src/scanner/types.ts index 1cb83e3..f89c0e6 100644 --- a/packages/core/src/scanner/types.ts +++ b/packages/core/src/scanner/types.ts @@ -87,8 +87,14 @@ export interface Scanner { * @param files - List of files to scan (relative paths) * @param repoRoot - Repository root path * @param logger - Optional logger for progress output + * @param onProgress - Optional callback for progress updates */ - scan(files: string[], repoRoot: string, logger?: Logger): Promise; + scan( + files: string[], + repoRoot: string, + logger?: Logger, + onProgress?: (filesProcessed: number, totalFiles: number) => void + ): Promise; /** * Check if this scanner can handle a file diff --git a/packages/core/src/scanner/typescript.ts b/packages/core/src/scanner/typescript.ts index 9929378..4fc8b07 100644 --- a/packages/core/src/scanner/typescript.ts +++ b/packages/core/src/scanner/typescript.ts @@ -70,7 +70,12 @@ export class TypeScriptScanner implements Scanner { }); } - async scan(files: string[], repoRoot: string, logger?: Logger): Promise { + async scan( + files: string[], + repoRoot: string, + logger?: Logger, + onProgress?: (filesProcessed: number, totalFiles: number) => void + ): Promise { // Initialize project with lenient type checking enabled // - Allows cross-file symbol resolution for better callee extraction // - Keeps strict checks disabled to avoid blocking on type errors @@ -214,6 +219,14 @@ export class TypeScriptScanner implements Scanner { const now = Date.now(); const timeSinceLastLog = now - lastLogTime; + // Report progress via callback: every 2 batches OR every 10 seconds OR last batch + if ( + onProgress && + (batchIndex % 2 === 0 || timeSinceLastLog > 10000 || batchIndex === batches.length - 1) + ) { + onProgress(processedCount, total); + } + // Log progress: every 2 batches OR every 10 seconds OR last batch if ( logger &&