Skip to content

Commit 1500caf

Browse files
committed
feat(core): add scanner logging and Go-specific exclusions
Logging: - Add logger option to ScanOptions and IndexOptions - Log per-language file counts during discovery - Log scan progress per language - Log embedding progress every 10 batches - Pass logger from CLI (--verbose enables debug level) Go Exclusions (default): - *.pb.go (protobuf) - *.gen.go, *_gen.go (code generators) - *.pb.gw.go (gRPC gateway) - mock_*.go (mockgen) - mocks/ (mock directories) - testdata/ (test fixtures) Spinner UX: - Show 'Embedding X/Y documents' during storing phase - Added totalDocuments to IndexProgress for accurate counts Tests updated for new type imports.
1 parent 73f168a commit 1500caf

8 files changed

Lines changed: 193 additions & 13 deletions

File tree

packages/cli/src/commands/index.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import { Command } from 'commander';
1717
import ora from 'ora';
1818
import { getDefaultConfig, loadConfig } from '../utils/config.js';
1919
import { formatBytes, getDirectorySize } from '../utils/file.js';
20-
import { logger } from '../utils/logger.js';
20+
import { createIndexLogger, logger } from '../utils/logger.js';
2121

2222
/**
2323
* Check if a command is available
@@ -129,18 +129,27 @@ export const indexCommand = new Command('index')
129129

130130
spinner.text = 'Scanning repository...';
131131

132+
// Create logger for indexing (verbose mode shows debug logs)
133+
const indexLogger = createIndexLogger(options.verbose);
134+
132135
const startTime = Date.now();
133136
let lastUpdate = startTime;
134137

135138
const stats = await indexer.index({
136139
force: options.force,
140+
logger: indexLogger,
137141
onProgress: (progress) => {
138142
const now = Date.now();
139143
// Update spinner every 100ms to avoid flickering
140144
if (now - lastUpdate > 100) {
141-
const percent = progress.percentComplete || 0;
142-
const currentFile = progress.currentFile ? ` ${progress.currentFile}` : '';
143-
spinner.text = `${progress.phase}:${currentFile} (${percent.toFixed(0)}%)`;
145+
if (progress.phase === 'storing' && progress.totalDocuments) {
146+
// Show document count with percentage
147+
const pct = Math.round((progress.documentsIndexed / progress.totalDocuments) * 100);
148+
spinner.text = `Embedding ${progress.documentsIndexed}/${progress.totalDocuments} documents (${pct}%)`;
149+
} else {
150+
const percent = progress.percentComplete || 0;
151+
spinner.text = `${progress.phase} (${percent.toFixed(0)}%)`;
152+
}
144153
lastUpdate = now;
145154
}
146155
},

packages/cli/src/utils/logger.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* CLI Logger using @lytics/kero
33
*/
44

5-
import { createLogger } from '@lytics/kero';
5+
import { createLogger, type Logger, type LogLevel } from '@lytics/kero';
66

77
// Create a logger with pretty output and icons
88
const keroLogger = createLogger({
@@ -36,3 +36,14 @@ export const logger = {
3636
keroLogger.debug(message);
3737
},
3838
};
39+
40+
/**
41+
* Create a logger for indexing operations with configurable verbosity
42+
*/
43+
export function createIndexLogger(verbose: boolean): Logger {
44+
const level: LogLevel = verbose ? 'debug' : 'info';
45+
return createLogger({
46+
level,
47+
format: 'pretty',
48+
});
49+
}

packages/core/src/indexer/index.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,16 @@ export class RepositoryIndexer {
8787
include: options.languages?.map((lang) => `**/*.${getExtensionForLanguage(lang)}`),
8888
exclude: [...this.config.excludePatterns, ...(options.excludePatterns || [])],
8989
languages: options.languages,
90+
logger: options.logger,
9091
});
9192

9293
filesScanned = scanResult.documents.length;
9394
documentsExtracted = scanResult.documents.length;
9495

9596
// Phase 2: Prepare documents for embedding
97+
const logger = options.logger?.child({ component: 'indexer' });
98+
logger?.info({ documents: documentsExtracted }, 'Preparing documents for embedding');
99+
96100
onProgress?.({
97101
phase: 'embedding',
98102
filesProcessed: filesScanned,
@@ -104,27 +108,49 @@ export class RepositoryIndexer {
104108
const embeddingDocuments = prepareDocumentsForEmbedding(scanResult.documents);
105109

106110
// Phase 3: Batch embed and store
111+
logger?.info(
112+
{
113+
documents: embeddingDocuments.length,
114+
batchSize: options.batchSize || this.config.batchSize,
115+
},
116+
'Starting embedding and storage'
117+
);
118+
107119
onProgress?.({
108120
phase: 'storing',
109121
filesProcessed: filesScanned,
110122
totalFiles: filesScanned,
111123
documentsIndexed: 0,
124+
totalDocuments: embeddingDocuments.length,
112125
percentComplete: 66,
113126
});
114127

115128
const batchSize = options.batchSize || this.config.batchSize;
129+
const totalBatches = Math.ceil(embeddingDocuments.length / batchSize);
130+
let batchNum = 0;
131+
116132
for (let i = 0; i < embeddingDocuments.length; i += batchSize) {
117133
const batch = embeddingDocuments.slice(i, i + batchSize);
134+
batchNum++;
118135

119136
try {
120137
await this.vectorStorage.addDocuments(batch);
121138
documentsIndexed += batch.length;
122139

140+
// Log progress every 10 batches or on last batch
141+
if (batchNum % 10 === 0 || batchNum === totalBatches) {
142+
logger?.info(
143+
{ batch: batchNum, totalBatches, documentsIndexed, total: embeddingDocuments.length },
144+
`Embedded ${documentsIndexed}/${embeddingDocuments.length} documents`
145+
);
146+
}
147+
123148
onProgress?.({
124149
phase: 'storing',
125150
filesProcessed: filesScanned,
126151
totalFiles: filesScanned,
127152
documentsIndexed,
153+
totalDocuments: embeddingDocuments.length,
128154
percentComplete: 66 + (documentsIndexed / embeddingDocuments.length) * 33,
129155
});
130156
} catch (error) {
@@ -134,9 +160,15 @@ export class RepositoryIndexer {
134160
error: error instanceof Error ? error : undefined,
135161
timestamp: new Date(),
136162
});
163+
logger?.error(
164+
{ batch: batchNum, error: error instanceof Error ? error.message : String(error) },
165+
'Batch embedding failed'
166+
);
137167
}
138168
}
139169

170+
logger?.info({ documentsIndexed, errors: errors.length }, 'Embedding complete');
171+
140172
// Update state
141173
await this.updateState(scanResult.documents);
142174

@@ -253,6 +285,7 @@ export class RepositoryIndexer {
253285
repoRoot: this.config.repositoryPath,
254286
include: filesToReindex,
255287
exclude: this.config.excludePatterns,
288+
logger: options.logger,
256289
});
257290

258291
documentsExtracted = scanResult.documents.length;

packages/core/src/indexer/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
* Repository Indexer types
33
*/
44

5+
import type { Logger } from '@lytics/kero';
6+
57
/**
68
* Options for indexing a repository
79
*/
@@ -20,6 +22,9 @@ export interface IndexOptions {
2022

2123
/** Progress callback for tracking indexing */
2224
onProgress?: (progress: IndexProgress) => void;
25+
26+
/** Logger for progress and debug output */
27+
logger?: Logger;
2328
}
2429

2530
/**
@@ -46,6 +51,9 @@ export interface IndexProgress {
4651
/** Documents indexed so far */
4752
documentsIndexed: number;
4853

54+
/** Total documents to index (available during storing phase) */
55+
totalDocuments?: number;
56+
4957
/** Current file being processed */
5058
currentFile?: string;
5159

packages/core/src/scanner/__tests__/scanner.test.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,8 @@ describe('Scanner', () => {
511511
const scannerInterface = result.documents.find((d) => d.metadata.name === 'Scanner');
512512
expect(scannerInterface).toBeDefined();
513513
expect(scannerInterface?.metadata.imports).toBeDefined();
514-
// types.ts has no imports, so should be empty array
515-
expect(scannerInterface?.metadata.imports).toEqual([]);
514+
// types.ts imports Logger from @lytics/kero
515+
expect(scannerInterface?.metadata.imports).toContain('@lytics/kero');
516516
});
517517

518518
it('should extract imports for methods', async () => {
@@ -581,8 +581,8 @@ describe('Scanner', () => {
581581
expect(docs.length >= 0).toBe(true);
582582
});
583583

584-
it('should return empty array for files with no imports', async () => {
585-
// types.ts should have no imports
584+
it('should capture type-only imports', async () => {
585+
// types.ts imports Logger type from @lytics/kero
586586
const result = await scanRepository({
587587
repoRoot,
588588
include: ['packages/core/src/scanner/types.ts'],
@@ -591,7 +591,8 @@ describe('Scanner', () => {
591591
const docType = result.documents.find((d) => d.metadata.name === 'DocumentType');
592592
expect(docType).toBeDefined();
593593
expect(docType?.metadata.imports).toBeDefined();
594-
expect(docType?.metadata.imports).toEqual([]);
594+
// Type imports should be captured
595+
expect(docType?.metadata.imports).toContain('@lytics/kero');
595596
});
596597
});
597598

packages/core/src/scanner/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export type {
1313
Scanner,
1414
ScannerCapabilities,
1515
ScanOptions,
16+
ScanProgress,
1617
ScanResult,
1718
ScanStats,
1819
} from './types';

packages/core/src/scanner/registry.ts

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { globby } from 'globby';
2-
import type { Document, Scanner, ScanOptions, ScanResult } from './types';
2+
import type { Document, Scanner, ScanOptions, ScanProgress, ScanResult } from './types';
33

44
/**
55
* Scanner registry manages multiple language scanners
@@ -60,6 +60,24 @@ export class ScannerRegistry {
6060
async scanRepository(options: ScanOptions): Promise<ScanResult> {
6161
const startTime = Date.now();
6262
const errors: Array<{ file: string; error: string }> = [];
63+
const logger = options.logger?.child({ component: 'scanner' });
64+
const onProgress = options.onProgress;
65+
66+
// Helper to emit progress
67+
const emitProgress = (progress: Partial<ScanProgress>) => {
68+
onProgress?.({
69+
phase: 'discovery',
70+
filesTotal: 0,
71+
filesScanned: 0,
72+
documentsExtracted: 0,
73+
errors: errors.length,
74+
...progress,
75+
});
76+
};
77+
78+
// Phase 1: Discovery
79+
logger?.info({ repoRoot: options.repoRoot }, 'Starting repository scan');
80+
emitProgress({ phase: 'discovery' });
6381

6482
// Build glob patterns
6583
const patterns = this.buildGlobPatterns(options);
@@ -71,6 +89,8 @@ export class ScannerRegistry {
7189
absolute: false,
7290
});
7391

92+
logger?.info({ totalFiles: files.length }, 'File discovery complete');
93+
7494
// Group files by scanner
7595
const filesByScanner = new Map<Scanner, string[]>();
7696

@@ -83,23 +103,85 @@ export class ScannerRegistry {
83103
}
84104
}
85105

86-
// Scan files with appropriate scanners
106+
// Log per-language breakdown
107+
const languageBreakdown: Record<string, number> = {};
108+
for (const [scanner, scannerFiles] of filesByScanner) {
109+
languageBreakdown[scanner.language] = scannerFiles.length;
110+
logger?.info(
111+
{ language: scanner.language, files: scannerFiles.length },
112+
`Found ${scannerFiles.length} ${scanner.language} files`
113+
);
114+
}
115+
116+
// Phase 2: Scanning
87117
const allDocuments: Document[] = [];
118+
let totalFilesScanned = 0;
88119

89120
for (const [scanner, scannerFiles] of filesByScanner.entries()) {
121+
logger?.debug(
122+
{ language: scanner.language, fileCount: scannerFiles.length },
123+
`Scanning ${scanner.language}...`
124+
);
125+
126+
emitProgress({
127+
phase: 'scanning',
128+
language: scanner.language,
129+
filesTotal: files.length,
130+
filesScanned: totalFilesScanned,
131+
documentsExtracted: allDocuments.length,
132+
});
133+
90134
try {
91135
const documents = await scanner.scan(scannerFiles, options.repoRoot);
92136
allDocuments.push(...documents);
137+
totalFilesScanned += scannerFiles.length;
138+
139+
logger?.info(
140+
{ language: scanner.language, files: scannerFiles.length, documents: documents.length },
141+
`${scanner.language} scan complete`
142+
);
143+
144+
emitProgress({
145+
phase: 'scanning',
146+
language: scanner.language,
147+
filesTotal: files.length,
148+
filesScanned: totalFilesScanned,
149+
documentsExtracted: allDocuments.length,
150+
});
93151
} catch (error) {
152+
const errorMessage = error instanceof Error ? error.message : String(error);
94153
errors.push({
95154
file: `[${scanner.language}]`,
96-
error: error instanceof Error ? error.message : String(error),
155+
error: errorMessage,
97156
});
157+
logger?.error(
158+
{ language: scanner.language, error: errorMessage },
159+
`${scanner.language} scan failed`
160+
);
98161
}
99162
}
100163

164+
// Phase 3: Complete
101165
const duration = Date.now() - startTime;
102166

167+
logger?.info(
168+
{
169+
totalFiles: files.length,
170+
totalDocuments: allDocuments.length,
171+
duration: `${duration}ms`,
172+
byLanguage: languageBreakdown,
173+
errors: errors.length,
174+
},
175+
'Repository scan complete'
176+
);
177+
178+
emitProgress({
179+
phase: 'complete',
180+
filesTotal: files.length,
181+
filesScanned: totalFilesScanned,
182+
documentsExtracted: allDocuments.length,
183+
});
184+
103185
return {
104186
documents: allDocuments,
105187
stats: {
@@ -152,6 +234,15 @@ export class ScannerRegistry {
152234
'**/.turbo/**',
153235
'**/.nuxt/**',
154236

237+
// Go generated files
238+
'**/*.pb.go', // Protobuf
239+
'**/*.gen.go', // Code generators
240+
'**/*_gen.go', // Alternative generator pattern
241+
'**/*.pb.gw.go', // gRPC gateway
242+
'**/mock_*.go', // Mockgen files
243+
'**/mocks/**', // Mock directories
244+
'**/testdata/**', // Test fixtures
245+
155246
// Version control
156247
'**/.git/**',
157248
'**/.svn/**',

0 commit comments

Comments
 (0)