Skip to content

Commit 443d59c

Browse files
feat(web): support .gitattributes linguist-language overrides in file viewer (#1048)
1 parent abcb180 commit 443d59c

File tree

3 files changed

+87
-1
lines changed

3 files changed

+87
-1
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
- Added support for `.gitattributes` `linguist-language` overrides in the file viewer ([#1048](https://github.com/sourcebot-dev/sourcebot/pull/1048))
12+
1013
### Fixed
1114
- Fixed Ask GitHub landing page chat box placement to be centered on the page instead of at the bottom. [#1046](https://github.com/sourcebot-dev/sourcebot/pull/1046)
1215

packages/web/src/features/git/getFileSourceApi.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { sew } from '@/actions';
22
import { getBrowsePath } from '@/app/[domain]/browse/hooks/utils';
33
import { getAuditService } from '@/ee/features/audit/factory';
44
import { SINGLE_TENANT_ORG_DOMAIN } from '@/lib/constants';
5+
import { parseGitAttributes, resolveLanguageFromGitAttributes } from '@/lib/gitattributes';
56
import { detectLanguageFromFilename } from '@/lib/languageDetection';
67
import { ServiceError, notFound, fileNotFound, invalidGitRef, unexpectedError } from '@/lib/serviceError';
78
import { getCodeHostBrowseFileAtBranchUrl } from '@/lib/utils';
@@ -65,7 +66,17 @@ export const getFileSource = async ({ path: filePath, repo: repoName, ref }: Fil
6566
throw error;
6667
}
6768

68-
const language = detectLanguageFromFilename(filePath);
69+
let gitattributesContent: string | undefined;
70+
try {
71+
gitattributesContent = await git.raw(['show', `${gitRef}:.gitattributes`]);
72+
} catch {
73+
// No .gitattributes in this repo/ref, that's fine
74+
}
75+
76+
const language = gitattributesContent
77+
? (resolveLanguageFromGitAttributes(filePath, parseGitAttributes(gitattributesContent)) ?? detectLanguageFromFilename(filePath))
78+
: detectLanguageFromFilename(filePath);
79+
6980
const externalWebUrl = getCodeHostBrowseFileAtBranchUrl({
7081
webUrl: repo.webUrl,
7182
codeHostType: repo.external_codeHostType,
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import micromatch from 'micromatch';
2+
3+
// GitAttributes holds parsed .gitattributes rules for overriding language detection.
4+
export interface GitAttributes {
5+
rules: GitAttributeRule[];
6+
}
7+
8+
interface GitAttributeRule {
9+
pattern: string;
10+
attrs: Record<string, string>;
11+
}
12+
13+
// parseGitAttributes parses the content of a .gitattributes file.
14+
// Each non-comment, non-empty line has the form: pattern attr1 attr2=value ...
15+
// Attributes can be:
16+
// - "linguist-vendored" (set/true), "-linguist-vendored" (unset/false)
17+
// - "linguist-language=Go"
18+
// - etc.
19+
export function parseGitAttributes(content: string): GitAttributes {
20+
const rules: GitAttributeRule[] = [];
21+
22+
for (const raw of content.split('\n')) {
23+
const line = raw.trim();
24+
if (line === '' || line.startsWith('#')) {
25+
continue;
26+
}
27+
28+
const fields = line.split(/\s+/);
29+
if (fields.length < 2) {
30+
continue;
31+
}
32+
33+
const pattern = fields[0];
34+
const attrs: Record<string, string> = {};
35+
36+
for (const field of fields.slice(1)) {
37+
if (field.startsWith('!')) {
38+
// !attr means unspecified (reset to default)
39+
attrs[field.slice(1)] = 'unspecified';
40+
} else if (field.startsWith('-')) {
41+
// -attr means unset (false)
42+
attrs[field.slice(1)] = 'false';
43+
} else {
44+
const eqIdx = field.indexOf('=');
45+
if (eqIdx !== -1) {
46+
// attr=value
47+
attrs[field.slice(0, eqIdx)] = field.slice(eqIdx + 1);
48+
} else {
49+
// attr alone means set (true)
50+
attrs[field] = 'true';
51+
}
52+
}
53+
}
54+
55+
rules.push({ pattern, attrs });
56+
}
57+
58+
return { rules };
59+
}
60+
61+
// resolveLanguageFromGitAttributes returns the linguist-language override for
62+
// the given file path based on the parsed .gitattributes rules, or undefined
63+
// if no rule matches. Last matching rule wins, consistent with gitattributes semantics.
64+
export function resolveLanguageFromGitAttributes(filePath: string, gitAttributes: GitAttributes): string | undefined {
65+
let language: string | undefined;
66+
for (const rule of gitAttributes.rules) {
67+
if (micromatch.isMatch(filePath, rule.pattern) && rule.attrs['linguist-language']) {
68+
language = rule.attrs['linguist-language'];
69+
}
70+
}
71+
return language;
72+
}

0 commit comments

Comments
 (0)