-
Notifications
You must be signed in to change notification settings - Fork 0
Optimize search index initialization performance #112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -253,6 +253,21 @@ export class Repository { | |
| } | ||
| } | ||
|
|
||
| async getAllClaims(): Promise<Claim[]> { | ||
| try { | ||
| const results = await this.db.exec({ | ||
| sql: `SELECT * FROM claims`, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| returnValue: 'resultRows', | ||
| rowMode: 'object', | ||
| }); | ||
| const rows = z.array(z.unknown()).parse(results); | ||
| return rows.map((r) => this.parseMetadata(ClaimSchema, r)); | ||
| } catch (err) { | ||
| logger.error('Failed to fetch all claims', err); | ||
| throw new AppError('Failed to fetch all claims', 'DB_ERROR', err); | ||
| } | ||
| } | ||
|
|
||
| async updateClaim(id: string, claim: Partial<Claim>): Promise<Claim> { | ||
| try { | ||
| const results = await this.db.exec({ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| import { create, insert, remove, search, type Orama } from '@orama/orama'; | ||
| import { create, insert, insertMultiple, remove, search, type Orama } from '@orama/orama'; | ||
| import { repository } from '../db/repository.js'; | ||
| import { logger } from './logger.js'; | ||
| import { compressText } from './nlp.js'; | ||
|
|
@@ -13,14 +13,14 @@ interface SearchDocument { | |
| keywords: string; | ||
| } | ||
|
|
||
| type OramaSchema = typeof searchSchema; | ||
| const searchSchema = { | ||
| id: 'string', | ||
| type: 'string', | ||
| title: 'string', | ||
| content: 'string', | ||
| keywords: 'string', | ||
| } as const; | ||
| export type OramaSchema = typeof searchSchema; | ||
|
|
||
| let oramaDb: Orama<OramaSchema> | null = null; | ||
| const oramaIdMap = new Map<string, string>(); // entityId → oramaInternalId | ||
|
|
@@ -68,21 +68,56 @@ export const initSearch = async () => { | |
|
|
||
| try { | ||
| oramaDb = await create({ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 MEDIUM RISK
Try running the following prompt in your coding agent:
|
||
| schema: { | ||
| id: 'string', | ||
| type: 'string', | ||
| title: 'string', | ||
| content: 'string', | ||
| keywords: 'string', | ||
| }, | ||
| schema: searchSchema, | ||
| }); | ||
|
|
||
| const entities = await repository.getAllEntities(); | ||
| const [entities, allClaims] = await Promise.all([ | ||
| repository.getAllEntities(), | ||
| repository.getAllClaims(), | ||
| ]); | ||
|
|
||
| // Group claims by entity_id for efficient lookup | ||
| const claimsByEntity = new Map<string, Claim[]>(); | ||
| for (const claim of allClaims) { | ||
| const list = claimsByEntity.get(claim.entity_id) || []; | ||
| list.push(claim); | ||
| claimsByEntity.set(claim.entity_id, list); | ||
| } | ||
|
|
||
| const docs: SearchDocument[] = []; | ||
| const originalIds: string[] = []; | ||
|
|
||
| for (const entity of entities) { | ||
| await indexEntityById(entity.id!); | ||
| docs.push({ | ||
| id: entity.id!, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| type: 'entity', | ||
| title: entity.name, | ||
| content: compressText(`${entity.name} ${entity.description || ''}`), | ||
| keywords: entity.type, | ||
| }); | ||
| originalIds.push(entity.id!); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| const claims = claimsByEntity.get(entity.id!) || []; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| for (const claim of claims) { | ||
| docs.push({ | ||
| id: claim.id!, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| type: 'claim', | ||
| title: entity.name, | ||
| content: compressText(claim.statement), | ||
| keywords: [entity.id!, claim.source || 'unknown'].join(','), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| }); | ||
| originalIds.push(claim.id!); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
|
Comment on lines
+91
to
+110
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 MEDIUM RISK Suggestion: The mapping logic for transforming entities and claims into search documents is now duplicated between the bulk 'initSearch' and the single-item 'addEntityToIndex'. This creates a maintainability risk where the two indexing paths could diverge. Try running the following prompt in your IDE agent:
|
||
| } | ||
|
|
||
| if (docs.length > 0) { | ||
| const oramaIds = await insertMultiple(oramaDb, docs); | ||
| for (let i = 0; i < originalIds.length; i++) { | ||
| oramaIdMap.set(originalIds[i], oramaIds[i]); | ||
| } | ||
| } | ||
|
|
||
| logger.info('Orama search index initialized'); | ||
| logger.info(`Orama search index initialized with ${entities.length} entities and ${allClaims.length} claims`); | ||
|
|
||
| // Register job handlers | ||
| jobCoordinator.registerHandler('reindex-document', async (payload) => { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 HIGH RISK The job handlers are registered inside Try running the following prompt in your coding agent:
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 MEDIUM RISK
The environment check for Node.js is flagged as redundant. You can simplify this using optional chaining, which is safer and satisfies the linter's requirement for non-constant conditionals in a multi-environment codebase.
See Issue in Codacy