From cd70561e002e5f82d7d71721ddcbbb020664c000 Mon Sep 17 00:00:00 2001 From: aruntyagiTutu Date: Mon, 2 Mar 2026 11:58:03 +0530 Subject: [PATCH 1/9] NEW @W-21102126@ - PMD java wrapper for ast-dump (#423) --- PMD_AST_DUMP_IMPLEMENTATION_PLAN.md | 817 ++++++++++++++++++ .../code-analyzer-pmd-engine/package.json | 2 +- .../sfca/pmdwrapper/PmdAstDumpInputData.java | 22 + .../sfca/pmdwrapper/PmdAstDumpResults.java | 26 + .../sfca/pmdwrapper/PmdAstDumper.java | 120 +++ .../sfca/pmdwrapper/PmdWrapper.java | 66 +- .../sfca/pmdwrapper/PmdAstDumpTest.java | 246 ++++++ .../sfca/pmdwrapper/PmdWrapperTest.java | 3 +- 8 files changed, 1298 insertions(+), 4 deletions(-) create mode 100644 PMD_AST_DUMP_IMPLEMENTATION_PLAN.md create mode 100644 packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpInputData.java create mode 100644 packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpResults.java create mode 100644 packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java create mode 100644 packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java diff --git a/PMD_AST_DUMP_IMPLEMENTATION_PLAN.md b/PMD_AST_DUMP_IMPLEMENTATION_PLAN.md new file mode 100644 index 00000000..2e2bada5 --- /dev/null +++ b/PMD_AST_DUMP_IMPLEMENTATION_PLAN.md @@ -0,0 +1,817 @@ +# PMD AST Dump Implementation Plan + +## 1. Overview + +This document outlines the implementation plan for integrating PMD's AST dump functionality directly into the code-analyzer-core PMD engine, eliminating the need for users to install PMD CLI separately. + +### Version 1 Scope (Simplified): +This implementation focuses on a **simple, straightforward approach** for v1: + +**Key Design Decisions**: +- ✅ **Single File Processing**: One file per API call (not batch) +- ✅ **XML Format Only**: Text format not supported in v1 +- ✅ **Single Output**: One result object (not array) +- ✅ **Embedded Errors**: Errors returned in result object (not thrown) + +**Rationale**: +- Simpler implementation and testing +- Easier error handling +- Predictable memory usage +- Faster time to market +- Can extend to batch processing in v2 if needed + +## 2. Current Architecture Analysis + +### Existing Components: +- **TypeScript Layer**: `pmd-wrapper.ts` - Handles Java command execution +- **Java Wrapper**: `PmdWrapper.java` - Main entry point with commands: + - `describe` - Lists available PMD rules + - `run` - Executes PMD analysis +- **Supporting Classes**: + - `PmdRunner.java` - Executes PMD analysis using `PmdAnalysis` API + - `PmdRuleDescriber.java` - Describes PMD rules + - Various data classes for input/output + +### Current Flow: +``` +TypeScript → JavaCommandExecutor → PmdWrapper.java → [PmdRunner/PmdRuleDescriber] → Results +``` + +## 3. Proposed Implementation + +### 3.1 New Java Classes to Create + +#### A. `PmdAstDumpInputData.java` +**Purpose**: Input data structure for AST dump command + +**Fields**: +```java +- String language // Language ID (apex, java, xml, etc.) +- String fileToDump // Single file to generate AST for +- String encoding // Character encoding (default: "UTF-8") +``` + +**Note**: +- Only **one file** is supported per request (not multiple files) +- Only **XML format** is supported (text format not supported in v1) + +**Location**: `/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/` + +--- + +#### B. `PmdAstDumpResults.java` +**Purpose**: Results structure for AST dump output + +**Fields**: +```java +- String file // Full path to the file +- String ast // AST representation in XML format +- ProcessingError error // Error if processing failed (null if successful) +``` + +**Note**: +- Since only one file is processed, there's only **one output** (not a list) +- The `error` field is populated only if AST generation fails +- If successful, `ast` field contains the XML representation, `error` is null +- If failed, `error` field contains the error details, `ast` is null + +**Location**: Same as above + +--- + +#### C. `PmdAstDumper.java` +**Purpose**: Core class that performs AST dumping using PMD APIs + +**Key Methods**: +```java +public PmdAstDumpResults dump(PmdAstDumpInputData inputData) +``` + +**Internal Implementation Details**: +1. **Language Resolution**: + - Use `LanguageRegistry.PMD.getLanguageById(languageId)` + - Get the language processor + - Validate language is supported + +2. **Single File Processing**: + - Read file content using specified encoding + - Create `TextDocument` from file content + - Parse file to get `RootNode` (AST root) + - Render AST using XML renderer + +3. **AST Rendering**: + - **XML Format Only**: Use `net.sourceforge.pmd.util.treeexport.XmlTreeRenderer` + - Text format is not supported in v1 + +4. **Error Handling**: + - Catch parsing errors + - Store error in results object + - Return results with either ast or error populated + +**Key PMD APIs Used**: +```java +- net.sourceforge.pmd.lang.LanguageRegistry +- net.sourceforge.pmd.lang.LanguageProcessor +- net.sourceforge.pmd.lang.document.TextDocument +- net.sourceforge.pmd.lang.ast.RootNode +- net.sourceforge.pmd.util.treeexport.XmlTreeRenderer +- net.sourceforge.pmd.util.treeexport.TreeRenderer +``` + +**Pseudo-code**: +```java +public PmdAstDumpResults dump(PmdAstDumpInputData inputData) { + validate(inputData); + + PmdAstDumpResults results = new PmdAstDumpResults(); + results.file = inputData.fileToDump; + + try { + // Get language and processor + Language language = LanguageRegistry.PMD.getLanguageById(inputData.language); + if (language == null) { + throw new RuntimeException("Language not supported: " + inputData.language); + } + + LanguageProcessor processor = language.createProcessor( + LanguageProcessor.processorConfiguration() + ); + + // Read file + String content = readFile(inputData.fileToDump, inputData.encoding); + + // Create document + TextDocument doc = TextDocument.readOnlyString(content, + Paths.get(inputData.fileToDump).getFileName().toString()); + + // Parse to AST + RootNode ast = processor.parse(doc); + + // Render AST as XML + StringWriter writer = new StringWriter(); + XmlTreeRenderer renderer = new XmlTreeRenderer(); + renderer.renderSubtree(ast, writer); + results.ast = writer.toString(); + + } catch (Exception e) { + // Store processing error + ProcessingError error = new ProcessingError(); + error.file = inputData.fileToDump; + error.message = e.getMessage(); + error.detail = e.toString(); + results.error = error; + } + + return results; +} +``` + +**Location**: `/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/` + +--- + +### 3.2 Changes to Existing Java Classes + +#### A. `PmdWrapper.java` Modifications + +**Add new command**: `ast-dump` + +**Changes**: +```java +// In main() method, add new else-if branch: +} else if(args[0].equalsIgnoreCase("ast-dump")) { + invokeAstDumpCommand(Arrays.copyOfRange(args, 1, args.length)); +} + +// Add new method: +private static void invokeAstDumpCommand(String[] args) { + if (args.length != 2) { + throw new RuntimeException("Invalid arguments for ast-dump"); + } + + String argsInputFile = args[0]; + String resultsOutputFile = args[1]; + + Gson gson = new Gson(); + + // Read input + PmdAstDumpInputData inputData; + try (FileReader reader = new FileReader(argsInputFile)) { + inputData = gson.fromJson(reader, PmdAstDumpInputData.class); + } catch (Exception e) { + throw new RuntimeException("Could not read input", e); + } + + // Execute AST dump + PmdAstDumper astDumper = new PmdAstDumper(); + PmdAstDumpResults results = astDumper.dump(inputData); + + // Write results + try (FileWriter fileWriter = new FileWriter(resultsOutputFile)) { + gson.toJson(results, fileWriter); + } catch (IOException e) { + throw new RuntimeException(e); + } +} +``` + +**Update Javadoc** to document the new command: +``` +AST-DUMP: + - Generates Abstract Syntax Tree representation of source files + - Invocation: java -cp {classPath} com.salesforce.sfca.pmdwrapper.PmdWrapper ast-dump {inputFile} {outputFile} + - {inputFile}: JSON file with PmdAstDumpInputData structure + - {outputFile}: JSON file for PmdAstDumpResults +``` + +--- + +### 3.3 TypeScript Integration Layer + +#### A. New TypeScript Types (in `pmd-wrapper.ts`) + +```typescript +export type PmdAstDumpInputData = { + language: string, // Language ID (apex, xml, etc.) + fileToDump: string, // Single file to dump AST for + encoding?: string // File encoding (default: 'UTF-8') +} + +export type PmdAstDumpResults = { + file: string, // File path + ast: string | null, // AST representation in XML (null if error) + error: PmdProcessingError | null // Error details (null if successful) +} +``` + +**Note**: Only XML format is supported, so no format parameter is needed. + +#### B. New Method in `PmdWrapperInvoker` Class + +```typescript +async invokeAstDumpCommand( + language: string, + fileToDump: string, + workingFolder: string, + encoding: string = 'UTF-8', + emitProgress: (percComplete: number) => void +): Promise { + + emitProgress(5); + + // Prepare input data + const inputData: PmdAstDumpInputData = { + language: language, + fileToDump: fileToDump, + encoding: encoding + }; + + const inputFile = path.join(workingFolder, 'astDumpInput.json'); + await fs.promises.writeFile(inputFile, JSON.stringify(inputData), 'utf-8'); + emitProgress(10); + + const resultsOutputFile = path.join(workingFolder, 'astDumpResults.json'); + const javaCmdArgs = [PMD_WRAPPER_JAVA_CLASS, 'ast-dump', inputFile, resultsOutputFile]; + const javaClassPaths = [ + path.join(PMD_WRAPPER_LIB_FOLDER, '*'), + ...this.userProvidedJavaClasspathEntries.map(toJavaClasspathEntry) + ]; + + this.emitLogEvent(LogLevel.Fine, `Calling AST dump for file: ${fileToDump}`); + + await this.javaCommandExecutor.exec(javaCmdArgs, javaClassPaths, (stdOutMsg: string) => { + if (stdOutMsg.startsWith(STDOUT_ERROR_MARKER)) { + const errorMessage = stdOutMsg.slice(STDOUT_ERROR_MARKER.length).replaceAll('{NEWLINE}','\n'); + throw new Error(errorMessage); + } else if (stdOutMsg.startsWith(STDOUT_WARNING_MARKER)) { + const warningMessage = stdOutMsg.slice(STDOUT_WARNING_MARKER.length).replaceAll('{NEWLINE}','\n'); + this.emitLogEvent(LogLevel.Warn, `[JAVA StdOut]: ${warningMessage}`); + } else { + this.emitLogEvent(LogLevel.Fine, `[JAVA StdOut]: ${stdOutMsg}`); + } + }); + + emitProgress(95); + + // Read and parse results + const resultsFileContents = await fs.promises.readFile(resultsOutputFile, 'utf-8'); + const results: PmdAstDumpResults = JSON.parse(resultsFileContents); + emitProgress(100); + + return results; +} +``` + +**Note**: Simplified signature - only one file, no format parameter (XML only). + +--- + +#### C. New Method in `pmd-engine.ts` (if needed) + +Add high-level API in the PMD engine to expose AST dump functionality to users: + +```typescript +async generateAst( + language: string, + file: string, + options?: { + encoding?: string + } +): Promise { + // Implementation using PmdWrapperInvoker + // Returns XML AST representation for a single file +} +``` + +**Note**: For multiple files, users should call this method multiple times. + +--- + +## 4. Data Flow Diagram + +``` +User Code (TypeScript) + ↓ +pmd-engine.ts (generateAst method) [optional high-level API] + ↓ +pmd-wrapper.ts (PmdWrapperInvoker.invokeAstDumpCommand) + ↓ +[Creates JSON input file] → astDumpInput.json + { + "language": "apex", + "fileToDump": "/path/to/MyClass.cls", + "encoding": "UTF-8" + } + ↓ +JavaCommandExecutor.exec() + ↓ +PmdWrapper.java (main → invokeAstDumpCommand) + ↓ +PmdAstDumper.java (dump method) + ↓ + ├─→ LanguageRegistry.PMD.getLanguageById() + ├─→ Language.createProcessor() + ├─→ Read file content (with encoding) + ├─→ TextDocument.readOnlyString() + ├─→ LanguageProcessor.parse() → RootNode (AST) + └─→ XmlTreeRenderer.renderSubtree() → XML string + ↓ +[Writes JSON output] → astDumpResults.json + { + "file": "/path/to/MyClass.cls", + "ast": "...", + "error": null + } + ↓ +TypeScript reads and parses results + ↓ +User receives PmdAstDumpResults + ↓ +User accesses result.ast (if successful) or result.error (if failed) +``` + +**Key Points**: +- Single file input → Single file output +- XML format only +- Error captured in results (not thrown) + +--- + +## 5. Dependencies Required + +### Java Dependencies (Already Available) +All required PMD APIs are already available in your current dependencies: +- ✅ `pmd-core` (contains all AST and rendering APIs) +- ✅ `pmd-apex`, `pmd-java`, `pmd-xml`, etc. (language modules) +- ✅ `gson` (for JSON serialization) + +**No additional dependencies needed!** + +### PMD API Classes Used +From `pmd-core-7.21.0`: +- `net.sourceforge.pmd.lang.LanguageRegistry` - Get language by ID +- `net.sourceforge.pmd.lang.Language` - Language definition +- `net.sourceforge.pmd.lang.LanguageProcessor` - Parse files for specific language +- `net.sourceforge.pmd.lang.document.TextDocument` - Document representation +- `net.sourceforge.pmd.lang.ast.RootNode` - Root of the AST +- `net.sourceforge.pmd.util.treeexport.XmlTreeRenderer` - Render AST as XML +- `net.sourceforge.pmd.util.treeexport.TreeRenderer` - Base renderer interface +- `java.io.StringWriter` - Capture XML output +- `java.nio.file.Files` - File reading +- `java.nio.file.Paths` - Path handling + +--- + +## 6. Supported Languages + +Based on your current PMD language modules, AST dump will support: +- ✅ **Apex** (pmd-apex-7.21.0.jar) +- ✅ **Visualforce** (pmd-visualforce-7.21.0.jar) +- ✅ **HTML** (pmd-html-7.21.0.jar) +- ✅ **JavaScript** (pmd-javascript-7.21.0.jar) +- ✅ **XML** (pmd-xml-7.21.0.jar) + +Language IDs to use: +- `apex` - Apex classes and triggers +- `visualforce` - Visualforce pages +- `html` - HTML files +- `javascript` - JavaScript files +- `xml` - XML files +- `xsl` - XSL stylesheets + +--- + +## 7. Usage Examples + +### Example 1: Dump Apex Class AST (XML format) + +**Input JSON** (`astDumpInput.json`): +```json +{ + "language": "apex", + "fileToDump": "/path/to/MyClass.cls", + "encoding": "UTF-8" +} +``` + +**Java Command**: +```bash +java -cp "dist/java-lib/*" \ + com.salesforce.sfca.pmdwrapper.PmdWrapper \ + ast-dump \ + astDumpInput.json \ + astDumpResults.json +``` + +**Output JSON** (`astDumpResults.json`) - Success case: +```json +{ + "file": "/path/to/MyClass.cls", + "ast": "\n\n \n ...", + "error": null +} +``` + +**Output JSON** (`astDumpResults.json`) - Error case: +```json +{ + "file": "/path/to/MyClass.cls", + "ast": null, + "error": { + "file": "/path/to/MyClass.cls", + "message": "ParseException", + "detail": "Unexpected token at line 15, column 8" + } +} +``` + +### Example 2: TypeScript Usage + +```typescript +// In your code analyzer +const pmdWrapperInvoker = new PmdWrapperInvoker( + javaCommandExecutor, + [], + (level, msg) => console.log(msg) +); + +const result = await pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', + '/path/to/MyClass.cls', + '/tmp/workdir', + 'UTF-8', + (progress) => console.log(`Progress: ${progress}%`) +); + +// Check if successful +if (result.ast) { + console.log('AST generated successfully:'); + console.log(result.ast); +} else if (result.error) { + console.error('Error generating AST:', result.error.message); +} +``` + +### Example 3: Processing Multiple Files + +To process multiple files, call the method multiple times: + +```typescript +const files = ['/path/to/File1.cls', '/path/to/File2.cls']; + +for (const file of files) { + const result = await pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', + file, + '/tmp/workdir', + 'UTF-8', + (progress) => console.log(`${file}: ${progress}%`) + ); + + if (result.ast) { + console.log(`AST for ${file}:`, result.ast); + } +} +``` + +--- + +## 8. Error Handling + +### Scenarios Handled: +1. **Invalid Language ID**: Error stored in `error` field +2. **File Not Found**: Error stored in `error` field +3. **Parse Errors**: Error stored in `error` field with details +4. **Encoding Issues**: Error stored in `error` field + +### Error Response Examples: + +**Success Response**: +```json +{ + "file": "/path/to/MyClass.cls", + "ast": "...", + "error": null +} +``` + +**Parse Error Response**: +```json +{ + "file": "/path/to/BadFile.cls", + "ast": null, + "error": { + "file": "/path/to/BadFile.cls", + "message": "ParseException: Unexpected token at line 15", + "detail": "net.sourceforge.pmd.lang.apex.ParseException: ..." + } +} +``` + +**File Not Found Response**: +```json +{ + "file": "/path/to/missing.cls", + "ast": null, + "error": { + "file": "/path/to/missing.cls", + "message": "File not found", + "detail": "java.io.FileNotFoundException: /path/to/missing.cls" + } +} +``` + +**Invalid Language Response**: +```json +{ + "file": "/path/to/file.txt", + "ast": null, + "error": { + "file": "/path/to/file.txt", + "message": "Language not supported: unknown", + "detail": "java.lang.RuntimeException: Language not supported: unknown" + } +} +``` + +--- + +## 9. Testing Strategy + +### A. Unit Tests to Create + +#### Java Tests: +**File**: `PmdAstDumperTest.java` + +Test cases: +```java +- testDumpApexClassAsXml() // Successfully dump Apex class +- testDumpApexTriggerAsXml() // Successfully dump Apex trigger +- testDumpVisualforceAsXml() // Successfully dump Visualforce page +- testDumpXmlAsXml() // Successfully dump XML file +- testInvalidLanguage() // Error: language not supported +- testFileNotFound() // Error: file doesn't exist +- testParseError() // Error: syntax error in file +- testDifferentEncodings() // Test UTF-8, ISO-8859-1, etc. +- testXmlOutputStructure() // Verify XML format is valid +- testEmptyFile() // Handle empty source files +``` + +**Location**: `/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/` + +#### TypeScript Tests: +**File**: `pmd-ast-dump.test.ts` + +Test cases: +```typescript +- testInvokeAstDumpCommandSuccess() // Successful AST generation +- testInvokeAstDumpCommandError() // Error handling +- testXmlFormatValidation() // Verify XML output is well-formed +- testMultipleFilesSequential() // Process multiple files in sequence +- testProgressReporting() // Verify progress callbacks work +- testDifferentLanguages() // Test apex, xml, visualforce, etc. +``` + +**Location**: `/packages/code-analyzer-pmd-engine/test/` + +### B. Integration Tests +- End-to-end test calling from TypeScript through Java +- Test with real Apex, Visualforce, XML files +- Verify AST structure is correct +- Performance testing with large files + +--- + +## 10. Performance Considerations + +### Expected Performance (Single File): +- **Small files** (<1KB): ~50-100ms per file +- **Medium files** (1-10KB): ~100-500ms per file +- **Large files** (>10KB): ~500ms-2s per file + +### Processing Multiple Files: +Since only one file is processed per request, to handle multiple files: +1. **Sequential Processing**: Call the API multiple times (simpler, predictable) +2. **Parallel Processing**: Use Promise.all() to process multiple files concurrently (faster) + +Example of parallel processing: +```typescript +const files = ['file1.cls', 'file2.cls', 'file3.cls']; +const results = await Promise.all( + files.map(file => pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', file, '/tmp/workdir', 'UTF-8', (p) => {} + )) +); +``` + +### Memory Considerations: +- Each AST held in memory temporarily +- XML output can be 5-10x larger than source +- Processing one file at a time keeps memory usage predictable +- For parallel processing, limit concurrency to avoid memory issues + +--- + +## 11. Limitations & Known Issues + +### Limitations: +1. **Single File Per Request**: Only one file can be processed per API call (not batch processing) +2. **XML Format Only**: Only XML format is supported (text format not supported in v1) +3. **Language Support**: Limited to languages with PMD modules installed +4. **AST Depth**: Full AST can be very large for complex files +5. **Memory**: Large files may require increased heap size + +### Not Supported (v1): +- Batch processing (multiple files in one request) +- Text format output +- JSON format output +- Custom AST node filtering +- Partial AST extraction +- AST modification or manipulation +- Direct AST querying (use PMD's XPath instead) + +### Future Enhancements (Potential v2): +- Support for text format +- Batch processing for multiple files +- AST node filtering options +- Performance optimizations + +--- + +## 12. Alternative Approaches Considered + +### Approach 1: Direct CLI Wrapper +**Pros**: Simpler, no code changes +**Cons**: Requires PMD CLI installation, harder to integrate + +### Approach 2: Separate Microservice +**Pros**: Language-agnostic +**Cons**: Complex deployment, network overhead + +### Approach 3: JavaScript AST Parser +**Pros**: No Java dependency +**Cons**: Would need separate parsers for each language, inconsistent with PMD analysis + +**Chosen Approach**: **Library Integration** (this plan) +**Reason**: Consistent with existing architecture, no external dependencies, reuses existing PMD infrastructure + +--- + +## 13. Migration Path (If Upgrading PMD) + +When upgrading PMD version: +1. Update `gradle/libs.versions.toml` (pmd version) +2. Check for API changes in: + - `LanguageProcessor` API + - `TreeRenderer` API + - `TextDocument` API +3. Update `PmdAstDumper.java` if APIs changed +4. Re-run tests +5. Update documentation + +--- + +## 14. Security Considerations + +### Input Validation: +- ✅ Validate language ID exists +- ✅ Validate file paths (no directory traversal) +- ✅ Validate format is 'xml' or 'text' +- ✅ Limit file size to prevent DoS +- ✅ Sanitize file content before parsing + +### Output Safety: +- ✅ AST output is read-only representation +- ✅ No code execution in AST generation +- ✅ Error messages don't expose sensitive paths + +--- + +## 15. Documentation to Update + +After implementation: +1. **User Documentation**: + - Add AST dump API reference + - Add usage examples + - Add troubleshooting guide + +2. **Developer Documentation**: + - Update architecture diagrams + - Document new Java classes + - Update TypeScript API docs + +3. **README Files**: + - Update feature list + - Add AST dump to capabilities + +--- + +## 16. Summary + +### Files to Create: +1. `PmdAstDumpInputData.java` (Input structure) +2. `PmdAstDumpResults.java` (Output structure) +3. `PmdAstDumper.java` (Core implementation) +4. `PmdAstDumperTest.java` (Unit tests) +5. `pmd-ast-dump.test.ts` (Integration tests) + +### Files to Modify: +1. `PmdWrapper.java` (Add ast-dump command) +2. `pmd-wrapper.ts` (Add TypeScript types and method) +3. `pmd-engine.ts` (Optional: Add high-level API) + +### Dependencies: +- ✅ No new dependencies required (all APIs in pmd-core) + +### Estimated Effort: +- **Java Implementation**: 3-4 hours (simplified - single file, XML only) +- **TypeScript Integration**: 1-2 hours (simplified API) +- **Testing**: 3-4 hours +- **Documentation**: 1-2 hours +- **Total**: ~8-12 hours (reduced due to simplified scope) + +### Benefits: +✅ No PMD CLI installation required +✅ Consistent with existing architecture +✅ Full control over AST generation +✅ Easy to extend for new languages +✅ Programmatic access from TypeScript +✅ Simple API - one file in, one AST out +✅ Predictable memory usage (single file processing) +✅ Easy error handling (error embedded in result) + +--- + +## 17. Next Steps + +1. **Review this document** - Confirm approach +2. **Create Java classes** - Implement core functionality +3. **Update PmdWrapper** - Add new command +4. **Add TypeScript types** - Type definitions +5. **Implement TypeScript method** - Integration layer +6. **Write tests** - Unit and integration tests +7. **Test with real files** - Apex, Visualforce, XML +8. **Update documentation** - User and developer docs +9. **Review and iterate** - Code review and refinement + +--- + +## Questions to Consider (For Future Versions) + +### Answered in v1: +✅ **Format support**: XML only (simplifies implementation) +✅ **Batch processing**: Single file only (simplifies API and error handling) +✅ **Error handling**: Errors embedded in result object (no exceptions thrown) + +### For Future Consideration (v2+): +1. **Should we limit file size?** Prevent memory issues with very large files +2. **Should we add batch processing?** Process multiple files in one request +3. **Should we support text format?** In addition to XML +4. **Should we cache parsed ASTs?** For repeated operations on same file +5. **Should we support AST filtering?** Extract only specific node types +6. **Should we add XPath support?** Query AST nodes directly +7. **Should we add streaming?** For very large files to reduce memory usage + +--- + +**End of Implementation Plan** diff --git a/packages/code-analyzer-pmd-engine/package.json b/packages/code-analyzer-pmd-engine/package.json index fd3f737f..3688bc34 100644 --- a/packages/code-analyzer-pmd-engine/package.json +++ b/packages/code-analyzer-pmd-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-pmd-engine", "description": "Plugin package that adds 'pmd' and 'cpd' as engines into Salesforce Code Analyzer", - "version": "0.36.0", + "version": "0.37.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpInputData.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpInputData.java new file mode 100644 index 00000000..8b55850e --- /dev/null +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpInputData.java @@ -0,0 +1,22 @@ +package com.salesforce.sfca.pmdwrapper; + +/** + * Input data structure for AST dump command + */ +public class PmdAstDumpInputData { + /** + * The language of the file to dump AST for (e.g., "apex", "xml", "visualforce") + */ + public String language; + + /** + * Single file to generate AST for + */ + public String fileToDump; + + /** + * Character encoding for reading the file + * Defaults to "UTF-8" if not specified + */ + public String encoding = "UTF-8"; +} diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpResults.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpResults.java new file mode 100644 index 00000000..89fb62ad --- /dev/null +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpResults.java @@ -0,0 +1,26 @@ +package com.salesforce.sfca.pmdwrapper; + +import com.salesforce.sfca.shared.ProcessingError; + +/** + * Results structure for AST dump command. + * Contains either the AST (if successful) or an error (if failed), but never both. + */ +public class PmdAstDumpResults { + /** + * Full path to the file that was processed + */ + public String file; + + /** + * The AST representation in XML format + * This is populated if the AST generation was successful, null otherwise + */ + public String ast; + + /** + * Error details if AST generation failed + * This is populated if the AST generation failed, null otherwise + */ + public ProcessingError error; +} diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java new file mode 100644 index 00000000..77283a16 --- /dev/null +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java @@ -0,0 +1,120 @@ +package com.salesforce.sfca.pmdwrapper; + +import com.salesforce.sfca.shared.ProcessingError; +import net.sourceforge.pmd.lang.Language; +import net.sourceforge.pmd.lang.LanguageRegistry; +import net.sourceforge.pmd.util.treeexport.TreeExportConfiguration; +import net.sourceforge.pmd.util.treeexport.TreeExporter; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Core class that performs AST dumping using PMD's TreeExporter API + */ +public class PmdAstDumper { + + /** + * Dumps the AST for a single file in XML format + * + * @param inputData Input data containing language, file path, and encoding + * @return Results containing either the AST (if successful) or error details (if failed) + */ + public PmdAstDumpResults dump(PmdAstDumpInputData inputData) { + validateInputData(inputData); + + PmdAstDumpResults results = new PmdAstDumpResults(); + results.file = inputData.fileToDump; + + try { + System.out.println("Generating AST for file '" + inputData.fileToDump + "' with language '" + inputData.language + "'"); + + // Verify file exists + Path filePath = Paths.get(inputData.fileToDump); + readFileContent(filePath, inputData.encoding); + + // Get language + Language language = LanguageRegistry.PMD.getLanguageById(inputData.language); + if (language == null) { + throw new RuntimeException("Language not supported: " + inputData.language); + } + + // Create TreeExportConfiguration + TreeExportConfiguration config = new TreeExportConfiguration(); + config.setLanguage(language); + config.setFormat("xml"); // Always XML format for v1 + config.setFile(filePath); + + // Capture output to string + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8); + PrintStream originalOut = System.out; + + try { + // Redirect System.out to capture XML output + System.setOut(ps); + + // Create and export AST (TreeExporter writes to System.out) + TreeExporter exporter = new TreeExporter(config); + exporter.export(); + + // Get the XML output + results.ast = baos.toString(StandardCharsets.UTF_8); + + } finally { + // Restore original System.out + System.setOut(originalOut); + ps.close(); + } + + System.out.println("Successfully generated AST for file '" + inputData.fileToDump + "'"); + + } catch (Exception e) { + // Store processing error + System.err.println("Error generating AST for file '" + inputData.fileToDump + "': " + e.getMessage()); + ProcessingError error = new ProcessingError(); + error.file = inputData.fileToDump; + error.message = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); + error.detail = e.toString(); + results.error = error; + } + + return results; + } + + /** + * Validates the input data + */ + private void validateInputData(PmdAstDumpInputData inputData) { + if (inputData.language == null || inputData.language.trim().isEmpty()) { + throw new RuntimeException("The 'language' field is required"); + } + if (inputData.fileToDump == null || inputData.fileToDump.trim().isEmpty()) { + throw new RuntimeException("The 'fileToDump' field is required"); + } + if (inputData.encoding == null || inputData.encoding.trim().isEmpty()) { + inputData.encoding = "UTF-8"; + } + } + + /** + * Reads file content using the specified encoding + */ + private String readFileContent(Path filePath, String encoding) throws IOException { + if (!Files.exists(filePath)) { + throw new IOException("File not found: " + filePath); + } + if (!Files.isRegularFile(filePath)) { + throw new IOException("Not a regular file: " + filePath); + } + + Charset charset = Charset.forName(encoding); + return Files.readString(filePath, charset); + } +} diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java index c00ca280..d22b7fd2 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java @@ -67,6 +67,34 @@ * } * ] * } + * AST-DUMP: + * - Generates Abstract Syntax Tree representation of a source file in XML format + * - Invocation: java -cp {classPath} com.salesforce.sfca.pmdwrapper.PmdWrapper ast-dump {argsInputFile} {resultsOutputFile} + * - {classPath} is the list of entries to add to the class path + * - {argsInputFile} is a JSON file containing the input arguments for the ast-dump command. + * Example: + * { + * "language": "apex", + * "fileToDump": "/full/path/to/MyClass.cls", + * "encoding": "UTF-8" + * } + * - {resultsOutputFile} is a file to write the JSON formatted AST dump results to + * Example (success): + * { + * "file": "/full/path/to/MyClass.cls", + * "ast": "\n...", + * "error": null + * } + * Example (error): + * { + * "file": "/full/path/to/MyClass.cls", + * "ast": null, + * "error": { + * "file": "/full/path/to/MyClass.cls", + * "message": "ParseException: Unexpected token", + * "detail": "..." + * } + * } */ public class PmdWrapper { @@ -82,8 +110,10 @@ public static void main(String[] args) { invokeDescribeCommand(Arrays.copyOfRange(args, 1, args.length)); } else if(args[0].equalsIgnoreCase("run")) { invokeRunCommand(Arrays.copyOfRange(args, 1, args.length)); + } else if(args[0].equalsIgnoreCase("ast-dump")) { + invokeAstDumpCommand(Arrays.copyOfRange(args, 1, args.length)); } else { - throw new RuntimeException("Bad first argument to PmdWrapper. Expected \"describe\" or \"run\". Received: \"" + args[0] + "\""); + throw new RuntimeException("Bad first argument to PmdWrapper. Expected \"describe\", \"run\", or \"ast-dump\". Received: \"" + args[0] + "\""); } long endTime = System.currentTimeMillis(); @@ -152,4 +182,38 @@ private static void invokeRunCommand(String[] args) { throw new RuntimeException(e); } } + + private static void invokeAstDumpCommand(String[] args) { + if (args.length != 2) { + throw new RuntimeException("Invalid number of arguments following the \"ast-dump\" command. Expected 2 but received: " + args.length); + } + String argsInputFile = args[0]; + String resultsOutputFile = args[1]; + + Gson gson = new Gson(); + + // Read input data + PmdAstDumpInputData inputData; + try (FileReader reader = new FileReader(argsInputFile)) { + inputData = gson.fromJson(reader, PmdAstDumpInputData.class); + } catch (Exception e) { + throw new RuntimeException("Could not read contents from \"" + argsInputFile + "\"", e); + } + + // Execute AST dump + PmdAstDumper astDumper = new PmdAstDumper(); + PmdAstDumpResults results; + try { + results = astDumper.dump(inputData); + } catch (Exception e) { + throw new RuntimeException("Error while attempting to invoke PmdAstDumper.dump: " + e.getMessage(), e); + } + + // Write results + try (FileWriter fileWriter = new FileWriter(resultsOutputFile)) { + gson.toJson(results, fileWriter); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } \ No newline at end of file diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java new file mode 100644 index 00000000..325626b8 --- /dev/null +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java @@ -0,0 +1,246 @@ +package com.salesforce.sfca.pmdwrapper; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.google.gson.Gson; +import com.salesforce.sfca.testtools.StdOutCaptor; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.FileNotFoundException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Tests for PMD AST Dump functionality + */ +class PmdAstDumpTest { + + @Test + void whenCallingMainWithAstDumpAndTooFewArgs_thenError() { + String[] args = {"ast-dump", "notEnough"}; + Exception thrown = assertThrows(Exception.class, () -> callPmdWrapper(args)); + assertThat(thrown.getMessage(), is("Invalid number of arguments following the \"ast-dump\" command. Expected 2 but received: 1")); + } + + @Test + void whenCallingMainWithAstDumpAndTooManyArgs_thenError() { + String[] args = {"ast-dump", "too", "many", "args"}; + Exception thrown = assertThrows(Exception.class, () -> callPmdWrapper(args)); + assertThat(thrown.getMessage(), is("Invalid number of arguments following the \"ast-dump\" command. Expected 2 but received: 3")); + } + + @Test + void whenCallingMainWithAstDumpAndInputFileThatDoesNotExist_thenError() { + String[] args = {"ast-dump", "/does/not/exist.json", "/does/not/matter"}; + RuntimeException thrown = assertThrows(RuntimeException.class, () -> callPmdWrapper(args)); + assertThat(thrown.getMessage(), containsString("Could not read contents from \"/does/not/exist.json\"")); + assertThat(thrown.getCause(), instanceOf(FileNotFoundException.class)); + } + + @Test + void whenCallingAstDumpWithValidApexCode_thenGeneratesNonEmptyXmlAst(@TempDir Path tempDir) throws Exception { + // Create a simple Apex class + String apexCode = "public class TestClass {\n" + + " public String name;\n" + + " \n" + + " public void sayHello() {\n" + + " System.debug('Hello World');\n" + + " }\n" + + "}"; + String apexFile = createTempFile(tempDir, "TestClass.cls", apexCode); + + // Create input JSON for ast-dump command + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(apexFile) + "\",\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + String stdOut = callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Assert the AST was generated successfully + assertThat(results.file, is(apexFile)); + assertThat(results.ast, is(notNullValue())); + assertThat(results.ast.length(), greaterThan(100)); // AST should be substantial + assertThat(results.error, is(nullValue())); + + // Assert the AST contains expected XML structure + assertThat(results.ast, containsString("\n" + + "

Hello World

\n" + + " \n" + + ""; + String vfFile = createTempFile(tempDir, "TestPage.page", vfCode); + + // Create input JSON for ast-dump command + String inputFileContents = "{\n" + + " \"language\": \"visualforce\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(vfFile) + "\",\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Assert the AST was generated successfully + assertThat(results.file, is(vfFile)); + assertThat(results.ast, is(notNullValue())); + assertThat(results.ast.length(), greaterThan(50)); + assertThat(results.error, is(nullValue())); + + // Assert the AST contains expected XML structure + assertThat(results.ast, containsString(" callPmdWrapper(args)); - assertThat(thrown.getMessage(), is("Bad first argument to PmdWrapper. Expected \"describe\" or \"run\". Received: \"oops\"")); + assertThat(thrown.getMessage(), is("Bad first argument to PmdWrapper. Expected \"describe\", \"run\", or \"ast-dump\". Received: \"oops\"")); } @Test @@ -528,7 +528,6 @@ void whenRunningWithDeprecatedExcessiveClassLengthRule_thenExecutesSuccessfully( assertThat(element.isJsonObject(), is(true)); } - private static String createSampleRulesetFile(Path tempDir) throws Exception { String ruleSetContents = "\n" + " Date: Mon, 2 Mar 2026 12:59:03 +0530 Subject: [PATCH 2/9] NEW @W-21102140@ - pmdwrapper for ast dump typescript pmd wrapper (#424) --- .../src/pmd-wrapper.ts | 68 ++++++ .../test/pmd-ast-dump.test.ts | 197 ++++++++++++++++++ 2 files changed, 265 insertions(+) create mode 100644 packages/code-analyzer-pmd-engine/test/pmd-ast-dump.test.ts diff --git a/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts b/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts index a80b2049..13b043cd 100644 --- a/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts +++ b/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts @@ -46,6 +46,18 @@ export type PmdProcessingError = { detail: string } +export type PmdAstDumpInputData = { + language: string + fileToDump: string + encoding?: string +} + +export type PmdAstDumpResults = { + file: string + ast: string | null + error: PmdProcessingError | null +} + const STDOUT_PROGRESS_MARKER = '[Progress]'; const STDOUT_ERROR_MARKER = '[Error] '; const STDOUT_WARNING_MARKER = '[Warning] '; @@ -148,6 +160,62 @@ export class PmdWrapperInvoker { throw new Error(getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ErrorParsingOutputFile', resultsOutputFile, errMsg), {cause: err}); } } + + async invokeAstDumpCommand( + language: string, + fileToDump: string, + workingFolder: string, + encoding: string = 'UTF-8', + emitProgress: (percComplete: number) => void + ): Promise { + + emitProgress(5); + + // Prepare input data + const inputData: PmdAstDumpInputData = { + language: language, + fileToDump: fileToDump, + encoding: encoding + }; + + const inputFile: string = path.join(workingFolder, 'astDumpInput.json'); + await fs.promises.writeFile(inputFile, JSON.stringify(inputData), 'utf-8'); + emitProgress(10); + + const resultsOutputFile: string = path.join(workingFolder, 'astDumpResults.json'); + const javaCmdArgs: string[] = [PMD_WRAPPER_JAVA_CLASS, 'ast-dump', inputFile, resultsOutputFile]; + const javaClassPaths: string[] = [ + path.join(PMD_WRAPPER_LIB_FOLDER, '*'), + ...this.userProvidedJavaClasspathEntries.map(toJavaClasspathEntry) + ]; + + this.emitLogEvent(LogLevel.Fine, `Calling AST dump for file: ${fileToDump}`); + + await this.javaCommandExecutor.exec(javaCmdArgs, javaClassPaths, (stdOutMsg: string) => { + if (stdOutMsg.startsWith(STDOUT_ERROR_MARKER)) { + const errorMessage: string = stdOutMsg.slice(STDOUT_ERROR_MARKER.length).replaceAll('{NEWLINE}','\n'); + throw new Error(errorMessage); + } else if (stdOutMsg.startsWith(STDOUT_WARNING_MARKER)) { + const warningMessage: string = stdOutMsg.slice(STDOUT_WARNING_MARKER.length).replaceAll('{NEWLINE}','\n'); + this.emitLogEvent(LogLevel.Warn, `[JAVA StdOut]: ${warningMessage}`); + } else { + this.emitLogEvent(LogLevel.Fine, `[JAVA StdOut]: ${stdOutMsg}`); + } + }); + + emitProgress(95); + + // Read and parse results + try { + const resultsFileContents: string = await fs.promises.readFile(resultsOutputFile, 'utf-8'); + const results: PmdAstDumpResults = JSON.parse(resultsFileContents); + emitProgress(100); + return results; + } catch (err) /* istanbul ignore next */ { + const errMsg: string = err instanceof Error ? err.message : String(err); + throw new Error(getMessageFromCatalog(SHARED_MESSAGE_CATALOG, 'ErrorParsingOutputFile', resultsOutputFile, errMsg), {cause: err}); + } + } } function createRuleSetFileContentsFor(pmdRuleInfoList: PmdRuleInfo[]): string { diff --git a/packages/code-analyzer-pmd-engine/test/pmd-ast-dump.test.ts b/packages/code-analyzer-pmd-engine/test/pmd-ast-dump.test.ts new file mode 100644 index 00000000..278c63a9 --- /dev/null +++ b/packages/code-analyzer-pmd-engine/test/pmd-ast-dump.test.ts @@ -0,0 +1,197 @@ +import {changeWorkingDirectoryToPackageRoot} from "./test-helpers"; +import {LogLevel} from "@salesforce/code-analyzer-engine-api"; +import {JavaCommandExecutor} from "@salesforce/code-analyzer-engine-api/utils"; +import {PmdWrapperInvoker, PmdAstDumpResults} from "../src/pmd-wrapper"; +import path from "node:path"; +import fs from "node:fs"; +import os from "node:os"; + +changeWorkingDirectoryToPackageRoot(); + +const TEST_DATA_FOLDER: string = path.join(__dirname, 'test-data'); + +describe('Tests for invokeAstDumpCommand method of PmdWrapperInvoker', () => { + let javaCommandExecutor: JavaCommandExecutor; + let pmdWrapperInvoker: PmdWrapperInvoker; + let workingFolder: string; + let logEvents: Array<{level: LogLevel, message: string}>; + + beforeEach(() => { + javaCommandExecutor = new JavaCommandExecutor(); + logEvents = []; + pmdWrapperInvoker = new PmdWrapperInvoker( + javaCommandExecutor, + [], + (level: LogLevel, message: string) => { + logEvents.push({level, message}); + } + ); + workingFolder = fs.mkdtempSync(path.join(os.tmpdir(), 'pmd-ast-dump-test-')); + }); + + afterEach(() => { + // Clean up working folder + if (fs.existsSync(workingFolder)) { + fs.rmSync(workingFolder, {recursive: true, force: true}); + } + }); + + it('When calling invokeAstDumpCommand with valid Apex file, then AST is generated successfully', async () => { + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + const progressEvents: number[] = []; + + const results: PmdAstDumpResults = await pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', + apexFile, + workingFolder, + 'UTF-8', + (progress: number) => progressEvents.push(progress) + ); + + // Assert results + expect(results.file).toBe(apexFile); + expect(results.ast).toBeDefined(); + expect(results.ast).not.toBeNull(); + expect(results.ast!).toContain(' e.level === LogLevel.Fine); + expect(fineLogEvents.length).toBeGreaterThan(0); + expect(fineLogEvents.some(e => e.message.includes('Calling AST dump'))).toBe(true); + }); + + it('When calling invokeAstDumpCommand with valid Visualforce file, then AST is generated successfully', async () => { + const vfFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'VfUnescapeEl.page'); + const progressEvents: number[] = []; + + const results: PmdAstDumpResults = await pmdWrapperInvoker.invokeAstDumpCommand( + 'visualforce', + vfFile, + workingFolder, + 'UTF-8', + (progress: number) => progressEvents.push(progress) + ); + + // Assert results + expect(results.file).toBe(vfFile); + expect(results.ast).toBeDefined(); + expect(results.ast).not.toBeNull(); + expect(results.ast!).toContain(' { + const nonExistentFile = path.join(workingFolder, 'DoesNotExist.cls'); + + const results: PmdAstDumpResults = await pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', + nonExistentFile, + workingFolder, + 'UTF-8', + () => {} + ); + + // Assert error is returned + expect(results.file).toBe(nonExistentFile); + expect(results.ast).toBeFalsy(); // null or undefined + expect(results.error).toBeDefined(); + expect(results.error!.file).toBe(nonExistentFile); + expect(results.error!.message).toContain('File not found'); + }); + + it('When calling invokeAstDumpCommand with invalid language, then error is returned', async () => { + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + + const results: PmdAstDumpResults = await pmdWrapperInvoker.invokeAstDumpCommand( + 'invalid_language', + apexFile, + workingFolder, + 'UTF-8', + () => {} + ); + + // Assert error is returned + expect(results.file).toBe(apexFile); + expect(results.ast).toBeFalsy(); // null or undefined + expect(results.error).toBeDefined(); + expect(results.error!.message).toContain('Language not supported'); + }); + + it('When calling invokeAstDumpCommand with invalid Apex syntax, then error is returned', async () => { + // Create a file with invalid Apex syntax + const invalidApexFile = path.join(workingFolder, 'Invalid.cls'); + const invalidApexCode = 'public class Invalid {\n #### SYNTAX ERROR ####\n}'; + fs.writeFileSync(invalidApexFile, invalidApexCode, 'utf-8'); + + const results: PmdAstDumpResults = await pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', + invalidApexFile, + workingFolder, + 'UTF-8', + () => {} + ); + + // Assert error is returned + expect(results.file).toBe(invalidApexFile); + expect(results.ast).toBeFalsy(); // null or undefined + expect(results.error).toBeDefined(); + expect(results.error!.file).toBe(invalidApexFile); + // Error message should indicate parsing issue + expect(results.error!.message.length).toBeGreaterThan(0); + }); + + it('When calling invokeAstDumpCommand with valid encoding parameter, then AST is generated', async () => { + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + + const results: PmdAstDumpResults = await pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', + apexFile, + workingFolder, + 'UTF-8', // Use UTF-8 since the file is actually UTF-8 + () => {} + ); + + // Should succeed + expect(results.file).toBe(apexFile); + expect(results.ast).toBeDefined(); + expect(results.ast).not.toBeNull(); + expect(results.error).toBeUndefined(); + }); + + it('When calling invokeAstDumpCommand, then input and output files are created in working folder', async () => { + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + + await pmdWrapperInvoker.invokeAstDumpCommand( + 'apex', + apexFile, + workingFolder, + 'UTF-8', + () => {} + ); + + // Verify input file was created + const inputFile = path.join(workingFolder, 'astDumpInput.json'); + expect(fs.existsSync(inputFile)).toBe(true); + + const inputData = JSON.parse(fs.readFileSync(inputFile, 'utf-8')); + expect(inputData.language).toBe('apex'); + expect(inputData.fileToDump).toBe(apexFile); + expect(inputData.encoding).toBe('UTF-8'); + + // Verify output file was created + const outputFile = path.join(workingFolder, 'astDumpResults.json'); + expect(fs.existsSync(outputFile)).toBe(true); + }); +}); From f821c968b68dd8f95c6c84e9bcd41f19b7626151 Mon Sep 17 00:00:00 2001 From: aruntyagiTutu Date: Mon, 2 Mar 2026 13:52:47 +0530 Subject: [PATCH 3/9] New @W-21102140@ - engine api for astdump (#425) --- .../code-analyzer-pmd-engine/src/index.ts | 7 +- .../src/pmd-engine.ts | 42 ++++++++ .../src/pmd-wrapper.ts | 5 + .../test/pmd-engine.test.ts | 102 ++++++++++++++++++ 4 files changed, 155 insertions(+), 1 deletion(-) diff --git a/packages/code-analyzer-pmd-engine/src/index.ts b/packages/code-analyzer-pmd-engine/src/index.ts index c346d540..3518aa06 100644 --- a/packages/code-analyzer-pmd-engine/src/index.ts +++ b/packages/code-analyzer-pmd-engine/src/index.ts @@ -7,4 +7,9 @@ function createEnginePlugin(): EnginePlugin { // Each code analyzer engine plugin module should export its plugin (so that it can be constructed manually) and // a createEnginePlugin function that creates the plugin (so that it can be dynamically loaded). -export { createEnginePlugin, PmdCpdEnginesPlugin } \ No newline at end of file +export { createEnginePlugin, PmdCpdEnginesPlugin } + +// Export types for AST dump functionality +export type { PmdAstDumpResults, GenerateAstOptions, PmdProcessingError } from "./pmd-wrapper" +export { PmdEngine } from "./pmd-engine" +export { CpdEngine } from "./cpd-engine" \ No newline at end of file diff --git a/packages/code-analyzer-pmd-engine/src/pmd-engine.ts b/packages/code-analyzer-pmd-engine/src/pmd-engine.ts index 93e31eff..36112d6e 100644 --- a/packages/code-analyzer-pmd-engine/src/pmd-engine.ts +++ b/packages/code-analyzer-pmd-engine/src/pmd-engine.ts @@ -14,6 +14,7 @@ import {indent, JavaCommandExecutor} from '@salesforce/code-analyzer-engine-api/ import {toExtensionsToLanguageMap, WorkspaceLiaison} from "./utils"; import path from "node:path"; import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; import { Language, PMD_ENGINE_NAME, @@ -21,7 +22,9 @@ import { SHARED_RULE_NAMES } from "./constants"; import { + GenerateAstOptions, LanguageSpecificPmdRunData, + PmdAstDumpResults, PmdResults, PmdRuleInfo, PmdViolation, @@ -121,6 +124,45 @@ export class PmdEngine extends Engine { }; } + /** + * Generates Abstract Syntax Tree (AST) representation for a source file + * @param language - Language identifier (apex, visualforce, xml, html, javascript) + * @param file - Absolute path to the file to analyze + * @param options - Optional configuration (encoding, workingFolder) + * @returns PmdAstDumpResults containing AST XML or error information + */ + async generateAst(language: string, file: string, options?: GenerateAstOptions): Promise { + const encoding = options?.encoding || 'UTF-8'; + const workingFolder = options?.workingFolder || await fs.mkdtemp(path.join(os.tmpdir(), 'pmd-ast-dump-')); + + this.emitLogEvent(LogLevel.Fine, `Generating AST for file: ${file} (language: ${language})`); + + try { + const results = await this.pmdWrapperInvoker.invokeAstDumpCommand( + language, + file, + workingFolder, + encoding, + () => {} // No progress reporting at engine level + ); + + if (results.error) { + this.emitLogEvent(LogLevel.Error, `Failed to generate AST for ${file}: ${results.error.message}`); + } else { + this.emitLogEvent(LogLevel.Fine, `Successfully generated AST for ${file}`); + } + + return results; + } finally { + // Clean up temporary working folder if we created it + if (!options?.workingFolder) { + await fs.rm(workingFolder, {recursive: true, force: true}).catch(() => { + // Ignore cleanup errors + }); + } + } + } + private async getPmdRuleInfoList(workspaceLiaison: WorkspaceLiaison, workingFolder: string, emitProgress: (percComplete: number) => void): Promise { diff --git a/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts b/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts index 13b043cd..ed26a98f 100644 --- a/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts +++ b/packages/code-analyzer-pmd-engine/src/pmd-wrapper.ts @@ -58,6 +58,11 @@ export type PmdAstDumpResults = { error: PmdProcessingError | null } +export type GenerateAstOptions = { + encoding?: string + workingFolder?: string +} + const STDOUT_PROGRESS_MARKER = '[Progress]'; const STDOUT_ERROR_MARKER = '[Error] '; const STDOUT_WARNING_MARKER = '[Warning] '; diff --git a/packages/code-analyzer-pmd-engine/test/pmd-engine.test.ts b/packages/code-analyzer-pmd-engine/test/pmd-engine.test.ts index ad4e82fd..198de645 100644 --- a/packages/code-analyzer-pmd-engine/test/pmd-engine.test.ts +++ b/packages/code-analyzer-pmd-engine/test/pmd-engine.test.ts @@ -15,6 +15,7 @@ import { } from "@salesforce/code-analyzer-engine-api"; import {PmdEngine} from "../src/pmd-engine"; import fs from "node:fs"; +import * as fsPromises from "node:fs/promises"; import path from "node:path"; import {Language, PMD_VERSION} from "../src/constants"; import {DEFAULT_PMD_ENGINE_CONFIG, PMD_AVAILABLE_LANGUAGES, PmdEngineConfig} from "../src/config"; @@ -649,6 +650,107 @@ describe('Tests for the getEngineVersion method of PmdEngine', () => { }); }); +describe('Tests for the generateAst method of PmdEngine', () => { + it('When calling generateAst with valid Apex file, then AST is returned', async () => { + const engine: PmdEngine = new PmdEngine(DEFAULT_PMD_ENGINE_CONFIG); + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + + const logEvents: LogEvent[] = []; + engine.onEvent(EventType.LogEvent, (e: LogEvent) => logEvents.push(e)); + + const results = await engine.generateAst('apex', apexFile); + + expect(results.file).toBe(apexFile); + expect(results.ast).toBeDefined(); + expect(results.ast).not.toBeNull(); + expect(results.ast!).toContain(' e.logLevel === LogLevel.Fine); + expect(fineLogEvents.some(e => e.message.includes('Generating AST'))).toBe(true); + expect(fineLogEvents.some(e => e.message.includes('Successfully generated AST'))).toBe(true); + }); + + it('When calling generateAst with valid Visualforce file, then AST is returned', async () => { + const engine: PmdEngine = new PmdEngine(DEFAULT_PMD_ENGINE_CONFIG); + const vfFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'VfUnescapeEl.page'); + + const results = await engine.generateAst('visualforce', vfFile); + + expect(results.file).toBe(vfFile); + expect(results.ast).toBeDefined(); + expect(results.ast).not.toBeNull(); + expect(results.ast!).toContain(' { + const engine: PmdEngine = new PmdEngine(DEFAULT_PMD_ENGINE_CONFIG); + const nonExistentFile = path.join(TEST_DATA_FOLDER, 'DoesNotExist.cls'); + + const logEvents: LogEvent[] = []; + engine.onEvent(EventType.LogEvent, (e: LogEvent) => logEvents.push(e)); + + const results = await engine.generateAst('apex', nonExistentFile); + + expect(results.file).toBe(nonExistentFile); + expect(results.ast).toBeFalsy(); + expect(results.error).toBeDefined(); + expect(results.error!.message).toContain('File not found'); + + // Check error log event + const errorLogEvents = logEvents.filter(e => e.logLevel === LogLevel.Error); + expect(errorLogEvents.length).toBeGreaterThan(0); + expect(errorLogEvents.some(e => e.message.includes('Failed to generate AST'))).toBe(true); + }); + + it('When calling generateAst with invalid language, then error is returned', async () => { + const engine: PmdEngine = new PmdEngine(DEFAULT_PMD_ENGINE_CONFIG); + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + + const results = await engine.generateAst('invalid_language', apexFile); + + expect(results.file).toBe(apexFile); + expect(results.ast).toBeFalsy(); + expect(results.error).toBeDefined(); + expect(results.error!.message).toContain('Language not supported'); + }); + + it('When calling generateAst with custom encoding, then AST is generated', async () => { + const engine: PmdEngine = new PmdEngine(DEFAULT_PMD_ENGINE_CONFIG); + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + + const results = await engine.generateAst('apex', apexFile, { encoding: 'UTF-8' }); + + expect(results.file).toBe(apexFile); + expect(results.ast).toBeDefined(); + expect(results.ast).not.toBeNull(); + expect(results.error).toBeUndefined(); + }); + + it('When calling generateAst with custom workingFolder, then working folder is not cleaned up', async () => { + const engine: PmdEngine = new PmdEngine(DEFAULT_PMD_ENGINE_CONFIG); + const apexFile = path.join(TEST_DATA_FOLDER, 'samplePmdWorkspace', 'sampleViolations', 'AvoidDebugStatements.cls'); + const customWorkingFolder = await fsPromises.mkdtemp(path.join(TEST_DATA_FOLDER, 'temp-ast-')); + + try { + const results = await engine.generateAst('apex', apexFile, { workingFolder: customWorkingFolder }); + + expect(results.ast).toBeDefined(); + // Working folder should still exist since we provided it + expect(await fsPromises.access(customWorkingFolder).then(() => true).catch(() => false)).toBe(true); + // Output file should exist + const outputFile = path.join(customWorkingFolder, 'astDumpResults.json'); + expect(await fsPromises.access(outputFile).then(() => true).catch(() => false)).toBe(true); + } finally { + // Clean up + await fsPromises.rm(customWorkingFolder, { recursive: true, force: true }); + } + }); +}); + function expectNoDashesAppearOutsideOfOurLanguageSpecificRules(ruleDescriptions: RuleDescription[]): void { for (const ruleDescription of ruleDescriptions) { From d52a38a2141c43929f10bb71f3f8c132808641ad Mon Sep 17 00:00:00 2001 From: Arun Tyagi Date: Tue, 10 Mar 2026 08:28:31 +0530 Subject: [PATCH 4/9] Add comprehensive tests for PMD AST dump and remove implementation plan - Add 11 new tests covering all validation logic and edge cases - Test null/empty language and fileToDump validation - Test encoding defaults to UTF-8 when null/empty - Test invalid encoding, directory instead of file - Test empty files and different encodings (ISO-8859-1) - All 18 tests passing with 100% code coverage - Remove implementation plan document as feature is complete --- PMD_AST_DUMP_IMPLEMENTATION_PLAN.md | 817 ------------------ .../sfca/pmdwrapper/PmdAstDumpTest.java | 272 ++++++ 2 files changed, 272 insertions(+), 817 deletions(-) delete mode 100644 PMD_AST_DUMP_IMPLEMENTATION_PLAN.md diff --git a/PMD_AST_DUMP_IMPLEMENTATION_PLAN.md b/PMD_AST_DUMP_IMPLEMENTATION_PLAN.md deleted file mode 100644 index 2e2bada5..00000000 --- a/PMD_AST_DUMP_IMPLEMENTATION_PLAN.md +++ /dev/null @@ -1,817 +0,0 @@ -# PMD AST Dump Implementation Plan - -## 1. Overview - -This document outlines the implementation plan for integrating PMD's AST dump functionality directly into the code-analyzer-core PMD engine, eliminating the need for users to install PMD CLI separately. - -### Version 1 Scope (Simplified): -This implementation focuses on a **simple, straightforward approach** for v1: - -**Key Design Decisions**: -- ✅ **Single File Processing**: One file per API call (not batch) -- ✅ **XML Format Only**: Text format not supported in v1 -- ✅ **Single Output**: One result object (not array) -- ✅ **Embedded Errors**: Errors returned in result object (not thrown) - -**Rationale**: -- Simpler implementation and testing -- Easier error handling -- Predictable memory usage -- Faster time to market -- Can extend to batch processing in v2 if needed - -## 2. Current Architecture Analysis - -### Existing Components: -- **TypeScript Layer**: `pmd-wrapper.ts` - Handles Java command execution -- **Java Wrapper**: `PmdWrapper.java` - Main entry point with commands: - - `describe` - Lists available PMD rules - - `run` - Executes PMD analysis -- **Supporting Classes**: - - `PmdRunner.java` - Executes PMD analysis using `PmdAnalysis` API - - `PmdRuleDescriber.java` - Describes PMD rules - - Various data classes for input/output - -### Current Flow: -``` -TypeScript → JavaCommandExecutor → PmdWrapper.java → [PmdRunner/PmdRuleDescriber] → Results -``` - -## 3. Proposed Implementation - -### 3.1 New Java Classes to Create - -#### A. `PmdAstDumpInputData.java` -**Purpose**: Input data structure for AST dump command - -**Fields**: -```java -- String language // Language ID (apex, java, xml, etc.) -- String fileToDump // Single file to generate AST for -- String encoding // Character encoding (default: "UTF-8") -``` - -**Note**: -- Only **one file** is supported per request (not multiple files) -- Only **XML format** is supported (text format not supported in v1) - -**Location**: `/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/` - ---- - -#### B. `PmdAstDumpResults.java` -**Purpose**: Results structure for AST dump output - -**Fields**: -```java -- String file // Full path to the file -- String ast // AST representation in XML format -- ProcessingError error // Error if processing failed (null if successful) -``` - -**Note**: -- Since only one file is processed, there's only **one output** (not a list) -- The `error` field is populated only if AST generation fails -- If successful, `ast` field contains the XML representation, `error` is null -- If failed, `error` field contains the error details, `ast` is null - -**Location**: Same as above - ---- - -#### C. `PmdAstDumper.java` -**Purpose**: Core class that performs AST dumping using PMD APIs - -**Key Methods**: -```java -public PmdAstDumpResults dump(PmdAstDumpInputData inputData) -``` - -**Internal Implementation Details**: -1. **Language Resolution**: - - Use `LanguageRegistry.PMD.getLanguageById(languageId)` - - Get the language processor - - Validate language is supported - -2. **Single File Processing**: - - Read file content using specified encoding - - Create `TextDocument` from file content - - Parse file to get `RootNode` (AST root) - - Render AST using XML renderer - -3. **AST Rendering**: - - **XML Format Only**: Use `net.sourceforge.pmd.util.treeexport.XmlTreeRenderer` - - Text format is not supported in v1 - -4. **Error Handling**: - - Catch parsing errors - - Store error in results object - - Return results with either ast or error populated - -**Key PMD APIs Used**: -```java -- net.sourceforge.pmd.lang.LanguageRegistry -- net.sourceforge.pmd.lang.LanguageProcessor -- net.sourceforge.pmd.lang.document.TextDocument -- net.sourceforge.pmd.lang.ast.RootNode -- net.sourceforge.pmd.util.treeexport.XmlTreeRenderer -- net.sourceforge.pmd.util.treeexport.TreeRenderer -``` - -**Pseudo-code**: -```java -public PmdAstDumpResults dump(PmdAstDumpInputData inputData) { - validate(inputData); - - PmdAstDumpResults results = new PmdAstDumpResults(); - results.file = inputData.fileToDump; - - try { - // Get language and processor - Language language = LanguageRegistry.PMD.getLanguageById(inputData.language); - if (language == null) { - throw new RuntimeException("Language not supported: " + inputData.language); - } - - LanguageProcessor processor = language.createProcessor( - LanguageProcessor.processorConfiguration() - ); - - // Read file - String content = readFile(inputData.fileToDump, inputData.encoding); - - // Create document - TextDocument doc = TextDocument.readOnlyString(content, - Paths.get(inputData.fileToDump).getFileName().toString()); - - // Parse to AST - RootNode ast = processor.parse(doc); - - // Render AST as XML - StringWriter writer = new StringWriter(); - XmlTreeRenderer renderer = new XmlTreeRenderer(); - renderer.renderSubtree(ast, writer); - results.ast = writer.toString(); - - } catch (Exception e) { - // Store processing error - ProcessingError error = new ProcessingError(); - error.file = inputData.fileToDump; - error.message = e.getMessage(); - error.detail = e.toString(); - results.error = error; - } - - return results; -} -``` - -**Location**: `/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/` - ---- - -### 3.2 Changes to Existing Java Classes - -#### A. `PmdWrapper.java` Modifications - -**Add new command**: `ast-dump` - -**Changes**: -```java -// In main() method, add new else-if branch: -} else if(args[0].equalsIgnoreCase("ast-dump")) { - invokeAstDumpCommand(Arrays.copyOfRange(args, 1, args.length)); -} - -// Add new method: -private static void invokeAstDumpCommand(String[] args) { - if (args.length != 2) { - throw new RuntimeException("Invalid arguments for ast-dump"); - } - - String argsInputFile = args[0]; - String resultsOutputFile = args[1]; - - Gson gson = new Gson(); - - // Read input - PmdAstDumpInputData inputData; - try (FileReader reader = new FileReader(argsInputFile)) { - inputData = gson.fromJson(reader, PmdAstDumpInputData.class); - } catch (Exception e) { - throw new RuntimeException("Could not read input", e); - } - - // Execute AST dump - PmdAstDumper astDumper = new PmdAstDumper(); - PmdAstDumpResults results = astDumper.dump(inputData); - - // Write results - try (FileWriter fileWriter = new FileWriter(resultsOutputFile)) { - gson.toJson(results, fileWriter); - } catch (IOException e) { - throw new RuntimeException(e); - } -} -``` - -**Update Javadoc** to document the new command: -``` -AST-DUMP: - - Generates Abstract Syntax Tree representation of source files - - Invocation: java -cp {classPath} com.salesforce.sfca.pmdwrapper.PmdWrapper ast-dump {inputFile} {outputFile} - - {inputFile}: JSON file with PmdAstDumpInputData structure - - {outputFile}: JSON file for PmdAstDumpResults -``` - ---- - -### 3.3 TypeScript Integration Layer - -#### A. New TypeScript Types (in `pmd-wrapper.ts`) - -```typescript -export type PmdAstDumpInputData = { - language: string, // Language ID (apex, xml, etc.) - fileToDump: string, // Single file to dump AST for - encoding?: string // File encoding (default: 'UTF-8') -} - -export type PmdAstDumpResults = { - file: string, // File path - ast: string | null, // AST representation in XML (null if error) - error: PmdProcessingError | null // Error details (null if successful) -} -``` - -**Note**: Only XML format is supported, so no format parameter is needed. - -#### B. New Method in `PmdWrapperInvoker` Class - -```typescript -async invokeAstDumpCommand( - language: string, - fileToDump: string, - workingFolder: string, - encoding: string = 'UTF-8', - emitProgress: (percComplete: number) => void -): Promise { - - emitProgress(5); - - // Prepare input data - const inputData: PmdAstDumpInputData = { - language: language, - fileToDump: fileToDump, - encoding: encoding - }; - - const inputFile = path.join(workingFolder, 'astDumpInput.json'); - await fs.promises.writeFile(inputFile, JSON.stringify(inputData), 'utf-8'); - emitProgress(10); - - const resultsOutputFile = path.join(workingFolder, 'astDumpResults.json'); - const javaCmdArgs = [PMD_WRAPPER_JAVA_CLASS, 'ast-dump', inputFile, resultsOutputFile]; - const javaClassPaths = [ - path.join(PMD_WRAPPER_LIB_FOLDER, '*'), - ...this.userProvidedJavaClasspathEntries.map(toJavaClasspathEntry) - ]; - - this.emitLogEvent(LogLevel.Fine, `Calling AST dump for file: ${fileToDump}`); - - await this.javaCommandExecutor.exec(javaCmdArgs, javaClassPaths, (stdOutMsg: string) => { - if (stdOutMsg.startsWith(STDOUT_ERROR_MARKER)) { - const errorMessage = stdOutMsg.slice(STDOUT_ERROR_MARKER.length).replaceAll('{NEWLINE}','\n'); - throw new Error(errorMessage); - } else if (stdOutMsg.startsWith(STDOUT_WARNING_MARKER)) { - const warningMessage = stdOutMsg.slice(STDOUT_WARNING_MARKER.length).replaceAll('{NEWLINE}','\n'); - this.emitLogEvent(LogLevel.Warn, `[JAVA StdOut]: ${warningMessage}`); - } else { - this.emitLogEvent(LogLevel.Fine, `[JAVA StdOut]: ${stdOutMsg}`); - } - }); - - emitProgress(95); - - // Read and parse results - const resultsFileContents = await fs.promises.readFile(resultsOutputFile, 'utf-8'); - const results: PmdAstDumpResults = JSON.parse(resultsFileContents); - emitProgress(100); - - return results; -} -``` - -**Note**: Simplified signature - only one file, no format parameter (XML only). - ---- - -#### C. New Method in `pmd-engine.ts` (if needed) - -Add high-level API in the PMD engine to expose AST dump functionality to users: - -```typescript -async generateAst( - language: string, - file: string, - options?: { - encoding?: string - } -): Promise { - // Implementation using PmdWrapperInvoker - // Returns XML AST representation for a single file -} -``` - -**Note**: For multiple files, users should call this method multiple times. - ---- - -## 4. Data Flow Diagram - -``` -User Code (TypeScript) - ↓ -pmd-engine.ts (generateAst method) [optional high-level API] - ↓ -pmd-wrapper.ts (PmdWrapperInvoker.invokeAstDumpCommand) - ↓ -[Creates JSON input file] → astDumpInput.json - { - "language": "apex", - "fileToDump": "/path/to/MyClass.cls", - "encoding": "UTF-8" - } - ↓ -JavaCommandExecutor.exec() - ↓ -PmdWrapper.java (main → invokeAstDumpCommand) - ↓ -PmdAstDumper.java (dump method) - ↓ - ├─→ LanguageRegistry.PMD.getLanguageById() - ├─→ Language.createProcessor() - ├─→ Read file content (with encoding) - ├─→ TextDocument.readOnlyString() - ├─→ LanguageProcessor.parse() → RootNode (AST) - └─→ XmlTreeRenderer.renderSubtree() → XML string - ↓ -[Writes JSON output] → astDumpResults.json - { - "file": "/path/to/MyClass.cls", - "ast": "...", - "error": null - } - ↓ -TypeScript reads and parses results - ↓ -User receives PmdAstDumpResults - ↓ -User accesses result.ast (if successful) or result.error (if failed) -``` - -**Key Points**: -- Single file input → Single file output -- XML format only -- Error captured in results (not thrown) - ---- - -## 5. Dependencies Required - -### Java Dependencies (Already Available) -All required PMD APIs are already available in your current dependencies: -- ✅ `pmd-core` (contains all AST and rendering APIs) -- ✅ `pmd-apex`, `pmd-java`, `pmd-xml`, etc. (language modules) -- ✅ `gson` (for JSON serialization) - -**No additional dependencies needed!** - -### PMD API Classes Used -From `pmd-core-7.21.0`: -- `net.sourceforge.pmd.lang.LanguageRegistry` - Get language by ID -- `net.sourceforge.pmd.lang.Language` - Language definition -- `net.sourceforge.pmd.lang.LanguageProcessor` - Parse files for specific language -- `net.sourceforge.pmd.lang.document.TextDocument` - Document representation -- `net.sourceforge.pmd.lang.ast.RootNode` - Root of the AST -- `net.sourceforge.pmd.util.treeexport.XmlTreeRenderer` - Render AST as XML -- `net.sourceforge.pmd.util.treeexport.TreeRenderer` - Base renderer interface -- `java.io.StringWriter` - Capture XML output -- `java.nio.file.Files` - File reading -- `java.nio.file.Paths` - Path handling - ---- - -## 6. Supported Languages - -Based on your current PMD language modules, AST dump will support: -- ✅ **Apex** (pmd-apex-7.21.0.jar) -- ✅ **Visualforce** (pmd-visualforce-7.21.0.jar) -- ✅ **HTML** (pmd-html-7.21.0.jar) -- ✅ **JavaScript** (pmd-javascript-7.21.0.jar) -- ✅ **XML** (pmd-xml-7.21.0.jar) - -Language IDs to use: -- `apex` - Apex classes and triggers -- `visualforce` - Visualforce pages -- `html` - HTML files -- `javascript` - JavaScript files -- `xml` - XML files -- `xsl` - XSL stylesheets - ---- - -## 7. Usage Examples - -### Example 1: Dump Apex Class AST (XML format) - -**Input JSON** (`astDumpInput.json`): -```json -{ - "language": "apex", - "fileToDump": "/path/to/MyClass.cls", - "encoding": "UTF-8" -} -``` - -**Java Command**: -```bash -java -cp "dist/java-lib/*" \ - com.salesforce.sfca.pmdwrapper.PmdWrapper \ - ast-dump \ - astDumpInput.json \ - astDumpResults.json -``` - -**Output JSON** (`astDumpResults.json`) - Success case: -```json -{ - "file": "/path/to/MyClass.cls", - "ast": "\n\n \n ...", - "error": null -} -``` - -**Output JSON** (`astDumpResults.json`) - Error case: -```json -{ - "file": "/path/to/MyClass.cls", - "ast": null, - "error": { - "file": "/path/to/MyClass.cls", - "message": "ParseException", - "detail": "Unexpected token at line 15, column 8" - } -} -``` - -### Example 2: TypeScript Usage - -```typescript -// In your code analyzer -const pmdWrapperInvoker = new PmdWrapperInvoker( - javaCommandExecutor, - [], - (level, msg) => console.log(msg) -); - -const result = await pmdWrapperInvoker.invokeAstDumpCommand( - 'apex', - '/path/to/MyClass.cls', - '/tmp/workdir', - 'UTF-8', - (progress) => console.log(`Progress: ${progress}%`) -); - -// Check if successful -if (result.ast) { - console.log('AST generated successfully:'); - console.log(result.ast); -} else if (result.error) { - console.error('Error generating AST:', result.error.message); -} -``` - -### Example 3: Processing Multiple Files - -To process multiple files, call the method multiple times: - -```typescript -const files = ['/path/to/File1.cls', '/path/to/File2.cls']; - -for (const file of files) { - const result = await pmdWrapperInvoker.invokeAstDumpCommand( - 'apex', - file, - '/tmp/workdir', - 'UTF-8', - (progress) => console.log(`${file}: ${progress}%`) - ); - - if (result.ast) { - console.log(`AST for ${file}:`, result.ast); - } -} -``` - ---- - -## 8. Error Handling - -### Scenarios Handled: -1. **Invalid Language ID**: Error stored in `error` field -2. **File Not Found**: Error stored in `error` field -3. **Parse Errors**: Error stored in `error` field with details -4. **Encoding Issues**: Error stored in `error` field - -### Error Response Examples: - -**Success Response**: -```json -{ - "file": "/path/to/MyClass.cls", - "ast": "...", - "error": null -} -``` - -**Parse Error Response**: -```json -{ - "file": "/path/to/BadFile.cls", - "ast": null, - "error": { - "file": "/path/to/BadFile.cls", - "message": "ParseException: Unexpected token at line 15", - "detail": "net.sourceforge.pmd.lang.apex.ParseException: ..." - } -} -``` - -**File Not Found Response**: -```json -{ - "file": "/path/to/missing.cls", - "ast": null, - "error": { - "file": "/path/to/missing.cls", - "message": "File not found", - "detail": "java.io.FileNotFoundException: /path/to/missing.cls" - } -} -``` - -**Invalid Language Response**: -```json -{ - "file": "/path/to/file.txt", - "ast": null, - "error": { - "file": "/path/to/file.txt", - "message": "Language not supported: unknown", - "detail": "java.lang.RuntimeException: Language not supported: unknown" - } -} -``` - ---- - -## 9. Testing Strategy - -### A. Unit Tests to Create - -#### Java Tests: -**File**: `PmdAstDumperTest.java` - -Test cases: -```java -- testDumpApexClassAsXml() // Successfully dump Apex class -- testDumpApexTriggerAsXml() // Successfully dump Apex trigger -- testDumpVisualforceAsXml() // Successfully dump Visualforce page -- testDumpXmlAsXml() // Successfully dump XML file -- testInvalidLanguage() // Error: language not supported -- testFileNotFound() // Error: file doesn't exist -- testParseError() // Error: syntax error in file -- testDifferentEncodings() // Test UTF-8, ISO-8859-1, etc. -- testXmlOutputStructure() // Verify XML format is valid -- testEmptyFile() // Handle empty source files -``` - -**Location**: `/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/` - -#### TypeScript Tests: -**File**: `pmd-ast-dump.test.ts` - -Test cases: -```typescript -- testInvokeAstDumpCommandSuccess() // Successful AST generation -- testInvokeAstDumpCommandError() // Error handling -- testXmlFormatValidation() // Verify XML output is well-formed -- testMultipleFilesSequential() // Process multiple files in sequence -- testProgressReporting() // Verify progress callbacks work -- testDifferentLanguages() // Test apex, xml, visualforce, etc. -``` - -**Location**: `/packages/code-analyzer-pmd-engine/test/` - -### B. Integration Tests -- End-to-end test calling from TypeScript through Java -- Test with real Apex, Visualforce, XML files -- Verify AST structure is correct -- Performance testing with large files - ---- - -## 10. Performance Considerations - -### Expected Performance (Single File): -- **Small files** (<1KB): ~50-100ms per file -- **Medium files** (1-10KB): ~100-500ms per file -- **Large files** (>10KB): ~500ms-2s per file - -### Processing Multiple Files: -Since only one file is processed per request, to handle multiple files: -1. **Sequential Processing**: Call the API multiple times (simpler, predictable) -2. **Parallel Processing**: Use Promise.all() to process multiple files concurrently (faster) - -Example of parallel processing: -```typescript -const files = ['file1.cls', 'file2.cls', 'file3.cls']; -const results = await Promise.all( - files.map(file => pmdWrapperInvoker.invokeAstDumpCommand( - 'apex', file, '/tmp/workdir', 'UTF-8', (p) => {} - )) -); -``` - -### Memory Considerations: -- Each AST held in memory temporarily -- XML output can be 5-10x larger than source -- Processing one file at a time keeps memory usage predictable -- For parallel processing, limit concurrency to avoid memory issues - ---- - -## 11. Limitations & Known Issues - -### Limitations: -1. **Single File Per Request**: Only one file can be processed per API call (not batch processing) -2. **XML Format Only**: Only XML format is supported (text format not supported in v1) -3. **Language Support**: Limited to languages with PMD modules installed -4. **AST Depth**: Full AST can be very large for complex files -5. **Memory**: Large files may require increased heap size - -### Not Supported (v1): -- Batch processing (multiple files in one request) -- Text format output -- JSON format output -- Custom AST node filtering -- Partial AST extraction -- AST modification or manipulation -- Direct AST querying (use PMD's XPath instead) - -### Future Enhancements (Potential v2): -- Support for text format -- Batch processing for multiple files -- AST node filtering options -- Performance optimizations - ---- - -## 12. Alternative Approaches Considered - -### Approach 1: Direct CLI Wrapper -**Pros**: Simpler, no code changes -**Cons**: Requires PMD CLI installation, harder to integrate - -### Approach 2: Separate Microservice -**Pros**: Language-agnostic -**Cons**: Complex deployment, network overhead - -### Approach 3: JavaScript AST Parser -**Pros**: No Java dependency -**Cons**: Would need separate parsers for each language, inconsistent with PMD analysis - -**Chosen Approach**: **Library Integration** (this plan) -**Reason**: Consistent with existing architecture, no external dependencies, reuses existing PMD infrastructure - ---- - -## 13. Migration Path (If Upgrading PMD) - -When upgrading PMD version: -1. Update `gradle/libs.versions.toml` (pmd version) -2. Check for API changes in: - - `LanguageProcessor` API - - `TreeRenderer` API - - `TextDocument` API -3. Update `PmdAstDumper.java` if APIs changed -4. Re-run tests -5. Update documentation - ---- - -## 14. Security Considerations - -### Input Validation: -- ✅ Validate language ID exists -- ✅ Validate file paths (no directory traversal) -- ✅ Validate format is 'xml' or 'text' -- ✅ Limit file size to prevent DoS -- ✅ Sanitize file content before parsing - -### Output Safety: -- ✅ AST output is read-only representation -- ✅ No code execution in AST generation -- ✅ Error messages don't expose sensitive paths - ---- - -## 15. Documentation to Update - -After implementation: -1. **User Documentation**: - - Add AST dump API reference - - Add usage examples - - Add troubleshooting guide - -2. **Developer Documentation**: - - Update architecture diagrams - - Document new Java classes - - Update TypeScript API docs - -3. **README Files**: - - Update feature list - - Add AST dump to capabilities - ---- - -## 16. Summary - -### Files to Create: -1. `PmdAstDumpInputData.java` (Input structure) -2. `PmdAstDumpResults.java` (Output structure) -3. `PmdAstDumper.java` (Core implementation) -4. `PmdAstDumperTest.java` (Unit tests) -5. `pmd-ast-dump.test.ts` (Integration tests) - -### Files to Modify: -1. `PmdWrapper.java` (Add ast-dump command) -2. `pmd-wrapper.ts` (Add TypeScript types and method) -3. `pmd-engine.ts` (Optional: Add high-level API) - -### Dependencies: -- ✅ No new dependencies required (all APIs in pmd-core) - -### Estimated Effort: -- **Java Implementation**: 3-4 hours (simplified - single file, XML only) -- **TypeScript Integration**: 1-2 hours (simplified API) -- **Testing**: 3-4 hours -- **Documentation**: 1-2 hours -- **Total**: ~8-12 hours (reduced due to simplified scope) - -### Benefits: -✅ No PMD CLI installation required -✅ Consistent with existing architecture -✅ Full control over AST generation -✅ Easy to extend for new languages -✅ Programmatic access from TypeScript -✅ Simple API - one file in, one AST out -✅ Predictable memory usage (single file processing) -✅ Easy error handling (error embedded in result) - ---- - -## 17. Next Steps - -1. **Review this document** - Confirm approach -2. **Create Java classes** - Implement core functionality -3. **Update PmdWrapper** - Add new command -4. **Add TypeScript types** - Type definitions -5. **Implement TypeScript method** - Integration layer -6. **Write tests** - Unit and integration tests -7. **Test with real files** - Apex, Visualforce, XML -8. **Update documentation** - User and developer docs -9. **Review and iterate** - Code review and refinement - ---- - -## Questions to Consider (For Future Versions) - -### Answered in v1: -✅ **Format support**: XML only (simplifies implementation) -✅ **Batch processing**: Single file only (simplifies API and error handling) -✅ **Error handling**: Errors embedded in result object (no exceptions thrown) - -### For Future Consideration (v2+): -1. **Should we limit file size?** Prevent memory issues with very large files -2. **Should we add batch processing?** Process multiple files in one request -3. **Should we support text format?** In addition to XML -4. **Should we cache parsed ASTs?** For repeated operations on same file -5. **Should we support AST filtering?** Extract only specific node types -6. **Should we add XPath support?** Query AST nodes directly -7. **Should we add streaming?** For very large files to reduce memory usage - ---- - -**End of Implementation Plan** diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java index 325626b8..5bdbd51d 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/test/java/com/salesforce/sfca/pmdwrapper/PmdAstDumpTest.java @@ -224,6 +224,278 @@ void whenCallingAstDumpWithInvalidApexSyntax_thenReturnsError(@TempDir Path temp assertThat(results.error, is(notNullValue())); } + @Test + void whenCallingAstDumpWithNullLanguage_thenThrowsException(@TempDir Path tempDir) throws Exception { + // Create a test file + String testFile = createTempFile(tempDir, "test.txt", "some content"); + + // Create input JSON with null language + String inputFileContents = "{\n" + + " \"language\": null,\n" + + " \"fileToDump\": \"" + makePathJsonSafe(testFile) + "\",\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command - should throw exception + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + RuntimeException thrown = assertThrows(RuntimeException.class, () -> callPmdWrapper(args)); + assertThat(thrown.getMessage(), containsString("'language' field is required")); + } + + @Test + void whenCallingAstDumpWithEmptyLanguage_thenThrowsException(@TempDir Path tempDir) throws Exception { + // Create a test file + String testFile = createTempFile(tempDir, "test.txt", "some content"); + + // Create input JSON with empty language + String inputFileContents = "{\n" + + " \"language\": \" \",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(testFile) + "\",\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command - should throw exception + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + RuntimeException thrown = assertThrows(RuntimeException.class, () -> callPmdWrapper(args)); + assertThat(thrown.getMessage(), containsString("'language' field is required")); + } + + @Test + void whenCallingAstDumpWithNullFileToDump_thenThrowsException(@TempDir Path tempDir) throws Exception { + // Create input JSON with null fileToDump + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": null,\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command - should throw exception + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + RuntimeException thrown = assertThrows(RuntimeException.class, () -> callPmdWrapper(args)); + assertThat(thrown.getMessage(), containsString("'fileToDump' field is required")); + } + + @Test + void whenCallingAstDumpWithEmptyFileToDump_thenThrowsException(@TempDir Path tempDir) throws Exception { + // Create input JSON with empty fileToDump + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \" \",\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command - should throw exception + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + RuntimeException thrown = assertThrows(RuntimeException.class, () -> callPmdWrapper(args)); + assertThat(thrown.getMessage(), containsString("'fileToDump' field is required")); + } + + @Test + void whenCallingAstDumpWithNullEncoding_thenDefaultsToUtf8(@TempDir Path tempDir) throws Exception { + // Create a simple Apex class + String apexCode = "public class TestClass { }"; + String apexFile = createTempFile(tempDir, "TestClass.cls", apexCode); + + // Create input JSON with null encoding + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(apexFile) + "\",\n" + + " \"encoding\": null\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Assert the AST was generated successfully (encoding defaulted to UTF-8) + assertThat(results.file, is(apexFile)); + assertThat(results.ast, is(notNullValue())); + assertThat(results.error, is(nullValue())); + } + + @Test + void whenCallingAstDumpWithEmptyEncoding_thenDefaultsToUtf8(@TempDir Path tempDir) throws Exception { + // Create a simple Apex class + String apexCode = "public class TestClass { }"; + String apexFile = createTempFile(tempDir, "TestClass.cls", apexCode); + + // Create input JSON with empty encoding + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(apexFile) + "\",\n" + + " \"encoding\": \" \"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Assert the AST was generated successfully (encoding defaulted to UTF-8) + assertThat(results.file, is(apexFile)); + assertThat(results.ast, is(notNullValue())); + assertThat(results.error, is(nullValue())); + } + + @Test + void whenCallingAstDumpWithInvalidEncoding_thenReturnsError(@TempDir Path tempDir) throws Exception { + // Create a simple Apex class + String apexCode = "public class TestClass { }"; + String apexFile = createTempFile(tempDir, "TestClass.cls", apexCode); + + // Create input JSON with invalid encoding + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(apexFile) + "\",\n" + + " \"encoding\": \"INVALID-ENCODING-NAME\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Assert error is returned + assertThat(results.file, is(apexFile)); + assertThat(results.ast, is(nullValue())); + assertThat(results.error, is(notNullValue())); + assertThat(results.error.message, anyOf( + containsString("INVALID-ENCODING-NAME"), + containsString("Charset"), + containsString("encoding"))); + } + + @Test + void whenCallingAstDumpWithDirectory_thenReturnsError(@TempDir Path tempDir) throws Exception { + // Use the temp directory itself as the file to dump + String dirPath = tempDir.toAbsolutePath().toString(); + + // Create input JSON pointing to a directory + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(dirPath) + "\",\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Assert error is returned + assertThat(results.file, is(dirPath)); + assertThat(results.ast, is(nullValue())); + assertThat(results.error, is(notNullValue())); + assertThat(results.error.message, containsString("Not a regular file")); + } + + @Test + void whenCallingAstDumpWithEmptyFile_thenGeneratesAst(@TempDir Path tempDir) throws Exception { + // Create an empty Apex file + String apexFile = createTempFile(tempDir, "Empty.cls", ""); + + // Create input JSON + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(apexFile) + "\",\n" + + " \"encoding\": \"UTF-8\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Empty files may generate an AST or return an error depending on PMD behavior + // Either outcome is acceptable - we're verifying no exception is thrown + assertThat(results.file, is(apexFile)); + // Don't assert on ast or error - PMD behavior may vary for empty files + } + + @Test + void whenCallingAstDumpWithIso88591Encoding_thenGeneratesAst(@TempDir Path tempDir) throws Exception { + // Create an Apex file with special characters in ISO-8859-1 encoding + String apexCode = "public class TestClass {\n" + + " // Comment with special char: \u00E9\n" + // é in ISO-8859-1 + " public String name;\n" + + "}"; + Path apexPath = tempDir.resolve("TestClass.cls"); + Files.write(apexPath, apexCode.getBytes("ISO-8859-1")); + String apexFile = apexPath.toAbsolutePath().toString(); + + // Create input JSON with ISO-8859-1 encoding + String inputFileContents = "{\n" + + " \"language\": \"apex\",\n" + + " \"fileToDump\": \"" + makePathJsonSafe(apexFile) + "\",\n" + + " \"encoding\": \"ISO-8859-1\"\n" + + "}"; + String inputFile = createTempFile(tempDir, "astDumpInput.json", inputFileContents); + + String resultsOutputFile = tempDir.resolve("astDumpOutput.json").toAbsolutePath().toString(); + + // Execute ast-dump command + String[] args = {"ast-dump", inputFile, resultsOutputFile}; + callPmdWrapper(args); + + // Read and parse the results + String resultsJsonString = new String(Files.readAllBytes(Paths.get(resultsOutputFile))); + Gson gson = new Gson(); + PmdAstDumpResults results = gson.fromJson(resultsJsonString, PmdAstDumpResults.class); + + // Assert the AST was generated successfully with correct encoding + assertThat(results.file, is(apexFile)); + assertThat(results.ast, is(notNullValue())); + assertThat(results.error, is(nullValue())); + } + // ===================== HELPER METHODS ===================== private static String createTempFile(Path tempDir, String fileName, String fileContents) throws Exception { From ce4978ea3164c699dee7df340c0f66916d08c5f7 Mon Sep 17 00:00:00 2001 From: Arun Tyagi Date: Tue, 10 Mar 2026 09:58:38 +0530 Subject: [PATCH 5/9] Fix OOM risk: Remove unnecessary file content loading in AST dump PROBLEM: - PmdAstDumper.readFileContent() loaded entire file into memory - Return value was discarded - only used for validation - Could cause OutOfMemoryError with large files (>100MB) - PMD's TreeExporter already reads the file internally SOLUTION: - Replace readFileContent() with validateFilePath() - Only check file exists, is regular file, and encoding is valid - No longer loads file content into memory - Lightweight validation without memory overhead IMPACT: - Prevents OOM errors with large Apex/Visualforce files - Same validation behavior, better performance - All 69 tests pass (18 AST dump + 51 other tests) --- .../salesforce/sfca/pmdwrapper/PmdAstDumper.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java index 77283a16..02eb4e12 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java @@ -35,9 +35,9 @@ public PmdAstDumpResults dump(PmdAstDumpInputData inputData) { try { System.out.println("Generating AST for file '" + inputData.fileToDump + "' with language '" + inputData.language + "'"); - // Verify file exists + // Verify file exists and is valid (lightweight validation without reading content) Path filePath = Paths.get(inputData.fileToDump); - readFileContent(filePath, inputData.encoding); + validateFilePath(filePath, inputData.encoding); // Get language Language language = LanguageRegistry.PMD.getLanguageById(inputData.language); @@ -104,9 +104,11 @@ private void validateInputData(PmdAstDumpInputData inputData) { } /** - * Reads file content using the specified encoding + * Validates file path and encoding without reading file content. + * This avoids loading large files into memory unnecessarily. + * PMD's TreeExporter will handle reading the file content. */ - private String readFileContent(Path filePath, String encoding) throws IOException { + private void validateFilePath(Path filePath, String encoding) throws IOException { if (!Files.exists(filePath)) { throw new IOException("File not found: " + filePath); } @@ -114,7 +116,7 @@ private String readFileContent(Path filePath, String encoding) throws IOExceptio throw new IOException("Not a regular file: " + filePath); } - Charset charset = Charset.forName(encoding); - return Files.readString(filePath, charset); + // Validate encoding by attempting to get the Charset (throws if invalid) + Charset.forName(encoding); } } From 61cf998b1e8f668aabd7acdf6ab8c08a1b85a0ef Mon Sep 17 00:00:00 2001 From: Arun Tyagi Date: Tue, 10 Mar 2026 10:02:02 +0530 Subject: [PATCH 6/9] Keep PmdEngine and CpdEngine internal - don't export classes PROBLEM: - PmdEngine and CpdEngine were exported as public API in index.ts - These are internal implementation classes that should not be directly accessed - Exposing them creates unwanted API surface and support burden - Users could bypass the plugin system by directly instantiating engines - Future internal changes would become breaking changes SOLUTION: - Remove class exports from index.ts (lines 14-15) - Keep only type exports: PmdAstDumpResults, GenerateAstOptions, PmdProcessingError - Users access engines through PmdCpdEnginesPlugin (correct pattern) - Engines remain accessible internally for testing IMPACT: - Cleaner public API with minimal surface area - Users must use plugin system (intended design pattern) - Internal implementation can evolve without breaking changes - All 127 TypeScript tests pass with 98.84% coverage - Type exports still available for consumers of AST dump functionality --- packages/code-analyzer-pmd-engine/src/index.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/code-analyzer-pmd-engine/src/index.ts b/packages/code-analyzer-pmd-engine/src/index.ts index 3518aa06..9a27a671 100644 --- a/packages/code-analyzer-pmd-engine/src/index.ts +++ b/packages/code-analyzer-pmd-engine/src/index.ts @@ -9,7 +9,5 @@ function createEnginePlugin(): EnginePlugin { // a createEnginePlugin function that creates the plugin (so that it can be dynamically loaded). export { createEnginePlugin, PmdCpdEnginesPlugin } -// Export types for AST dump functionality -export type { PmdAstDumpResults, GenerateAstOptions, PmdProcessingError } from "./pmd-wrapper" -export { PmdEngine } from "./pmd-engine" -export { CpdEngine } from "./cpd-engine" \ No newline at end of file +// Export types for AST dump functionality (types only, engines remain internal) +export type { PmdAstDumpResults, GenerateAstOptions, PmdProcessingError } from "./pmd-wrapper" \ No newline at end of file From 73b8cf3564f249c730fe8769c0a732f6b2c661ee Mon Sep 17 00:00:00 2001 From: Arun Tyagi Date: Tue, 10 Mar 2026 10:26:28 +0530 Subject: [PATCH 7/9] Re-export PmdEngine for AST generation API access CONTEXT: - Previous commit removed PmdEngine export to keep it internal - MCP provider (internal Salesforce tool) needs direct access to PmdEngine - They use it to call generateAst() API for AST XML generation PROBLEM: - MCP provider code broke: "Module has no exported member 'PmdEngine'" - They instantiate PmdEngine directly: new PmdEngine(config) - Then call generateAst() method for on-demand AST generation SOLUTION: - Re-export PmdEngine from index.ts - Add clear documentation: use for AST generation, prefer plugin for normal usage - Keep CpdEngine internal (not needed by consumers) - Document that direct instantiation is for specialized use cases RATIONALE: - generateAst() is a valid public API use case - MCP provider is internal Salesforce code, not third-party - Alternative (factory function) adds unnecessary complexity - Clear documentation guides proper usage IMPACT: - MCP provider builds successfully - All 127 tests pass with 98.84% coverage - API surface: minimal (only PmdEngine + types, not CpdEngine) --- packages/code-analyzer-pmd-engine/src/index.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/code-analyzer-pmd-engine/src/index.ts b/packages/code-analyzer-pmd-engine/src/index.ts index 9a27a671..a3fe0c42 100644 --- a/packages/code-analyzer-pmd-engine/src/index.ts +++ b/packages/code-analyzer-pmd-engine/src/index.ts @@ -9,5 +9,10 @@ function createEnginePlugin(): EnginePlugin { // a createEnginePlugin function that creates the plugin (so that it can be dynamically loaded). export { createEnginePlugin, PmdCpdEnginesPlugin } -// Export types for AST dump functionality (types only, engines remain internal) -export type { PmdAstDumpResults, GenerateAstOptions, PmdProcessingError } from "./pmd-wrapper" \ No newline at end of file +// Export types for AST dump functionality +export type { PmdAstDumpResults, GenerateAstOptions, PmdProcessingError } from "./pmd-wrapper" + +// Export PmdEngine for direct access to generateAst() API +// NOTE: For normal engine usage, prefer accessing through PmdCpdEnginesPlugin. +// Direct instantiation is primarily for specialized use cases like AST generation tools. +export { PmdEngine } from "./pmd-engine" \ No newline at end of file From e439beabf332360adb975c42481c97ab3e10be59 Mon Sep 17 00:00:00 2001 From: Arun Tyagi Date: Wed, 11 Mar 2026 10:52:54 +0530 Subject: [PATCH 8/9] updated vars name --- .../com/salesforce/sfca/pmdwrapper/PmdAstDumper.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java index 02eb4e12..d08fcf94 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdAstDumper.java @@ -52,25 +52,25 @@ public PmdAstDumpResults dump(PmdAstDumpInputData inputData) { config.setFile(filePath); // Capture output to string - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream capturedPrintStream = new PrintStream(outputStream, true, StandardCharsets.UTF_8); PrintStream originalOut = System.out; try { // Redirect System.out to capture XML output - System.setOut(ps); + System.setOut(capturedPrintStream); // Create and export AST (TreeExporter writes to System.out) TreeExporter exporter = new TreeExporter(config); exporter.export(); // Get the XML output - results.ast = baos.toString(StandardCharsets.UTF_8); + results.ast = outputStream.toString(StandardCharsets.UTF_8); } finally { // Restore original System.out System.setOut(originalOut); - ps.close(); + capturedPrintStream.close(); } System.out.println("Successfully generated AST for file '" + inputData.fileToDump + "'"); From 5596c58007a865dc69129587e67ca4191d452f33 Mon Sep 17 00:00:00 2001 From: Arun Tyagi Date: Wed, 11 Mar 2026 11:04:01 +0530 Subject: [PATCH 9/9] return e.message in place of full trace --- .../main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java index d22b7fd2..33257701 100644 --- a/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java +++ b/packages/code-analyzer-pmd-engine/pmd-cpd-wrappers/src/main/java/com/salesforce/sfca/pmdwrapper/PmdWrapper.java @@ -165,7 +165,7 @@ private static void invokeRunCommand(String[] args) { try (FileReader reader = new FileReader(argsInputFile)) { inputData = gson.fromJson(reader, PmdRunInputData.class); } catch (Exception e) { - throw new RuntimeException("Could not read contents from \"" + argsInputFile + "\"", e); + throw new RuntimeException("Could not read contents from \"" + argsInputFile + "\": " + e.getMessage(), e); } PmdRunner pmdRunner = new PmdRunner(); @@ -197,7 +197,7 @@ private static void invokeAstDumpCommand(String[] args) { try (FileReader reader = new FileReader(argsInputFile)) { inputData = gson.fromJson(reader, PmdAstDumpInputData.class); } catch (Exception e) { - throw new RuntimeException("Could not read contents from \"" + argsInputFile + "\"", e); + throw new RuntimeException("Could not read contents from \"" + argsInputFile + "\": " + e.getMessage(), e); } // Execute AST dump