Skip to content

Commit 486f2f3

Browse files
committed
File block v5
1 parent 6d047b5 commit 486f2f3

23 files changed

Lines changed: 700 additions & 58 deletions

File tree

apps/docs/components/ui/icon-mapping.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
264264
file: DocumentIcon,
265265
file_v2: DocumentIcon,
266266
file_v4: DocumentIcon,
267+
file_v5: DocumentIcon,
267268
findymail: FindymailIcon,
268269
firecrawl: FirecrawlIcon,
269270
fireflies: FirefliesIcon,

apps/docs/content/docs/de/tools/file.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: Mehrere Dateien lesen und parsen
66
import { BlockInfoCard } from "@/components/ui/block-info-card"
77

88
<BlockInfoCard
9-
type="file_v4"
9+
type="file_v5"
1010
color="#40916C"
1111
/>
1212

apps/docs/content/docs/en/execution/files.mdx

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,37 +25,39 @@ You can access any of these properties when referencing files from previous bloc
2525

2626
## The File Block
2727

28-
The **File block** is the universal entry point for files in your workflows. It accepts files from any source and outputs standardized file objects that work with all integrations.
28+
The **File block** is the universal entry point for files in your workflows. It has several operations:
2929

30-
**Inputs:**
31-
- **Uploaded files** - Drag and drop or select files directly
32-
- **External URLs** - Any publicly accessible file URL
33-
- **Files from other blocks** - Pass files from Gmail attachments, Slack downloads, etc.
30+
| Operation | What it does | Key output |
31+
| --- | --- | --- |
32+
| **Read** | Returns workspace file objects you select or reference. Does **not** load file text. | `files` (`UserFile[]`) |
33+
| **Get Content** | Extracts the text of one or more files (PDF, DOCX, CSV, etc. are parsed automatically). | `contents` (`string[]`, one entry per file) |
34+
| **Fetch** | Downloads and parses a file from an external URL, with optional request headers. | `files`, `combinedContent` |
35+
| **Write** | Creates a new workspace file. | `id`, `name`, `size`, `url` |
36+
| **Append** | Appends content to an existing workspace file. | `id`, `name`, `size`, `url` |
3437

35-
**Outputs:**
36-
- A list of `UserFile` objects with consistent structure (`name`, `url`, `base64`, `type`, `size`)
37-
- `combinedContent` - Extracted text content from all files (for documents)
38+
`UserFile` objects share a consistent structure (`name`, `url`, `base64`, `type`, `size`), so they work with every integration.
3839

39-
**Example usage:**
40+
### Reading file text: chain Read → Get Content
4041

41-
```
42-
// Get all files from the File block
43-
<file.files>
42+
**Read intentionally returns only file objects, not their text.** To read the text of files, pass the file objects into a **Get Content** operation:
4443

45-
// Get the first file
46-
<file.files[0]>
44+
```
45+
// Block 1 — File (Read): returns the selected workspace files
46+
<file1.files>
4747
48-
// Get combined text content from parsed documents
49-
<file.combinedContent>
48+
// Block 2 — File (Get Content): set its file input to <file1.files>
49+
// → returns an array of extracted text, one entry per file
50+
<file2.contents> // all file contents
51+
<file2.contents[0]> // text of the first file
5052
```
5153

52-
The File block automatically:
53-
- Detects file types from URLs and extensions
54-
- Extracts text from PDFs, CSVs, and documents
55-
- Generates base64 encoding for binary files
56-
- Creates presigned URLs for secure access
54+
The same pattern works for files produced anywhere — feed `<gmail.attachments>`, `<agent.files>`, or `<start.files>` straight into **Get Content**. You never need to extract `base64` or `url` yourself to get text.
55+
56+
<Callout type="info">
57+
Extracted `contents` can be large. The File block stores it through the execution large-value system automatically, so big documents don't bloat your run payloads.
58+
</Callout>
5759

58-
Use the File block when you need to normalize files from different sources before passing them to other blocks like Vision, STT, or email integrations.
60+
Use the File block when you need to normalize files from different sources, read their text, or hand standardized file objects to other blocks like Vision, STT, or email integrations.
5961

6062
## Passing Files Between Blocks
6163

@@ -138,7 +140,7 @@ Use `url` for direct downloads or `base64` for inline processing.
138140
## Blocks That Work with Files
139141

140142
**File inputs:**
141-
- **File** - Parse documents, images, and text files
143+
- **File** - Read file objects, extract text with Get Content, fetch URLs, write, and append
142144
- **Vision** - Analyze images with AI models
143145
- **Mistral Parser** - Extract text from PDFs
144146

apps/docs/content/docs/es/tools/file.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: Leer y analizar múltiples archivos
66
import { BlockInfoCard } from "@/components/ui/block-info-card"
77

88
<BlockInfoCard
9-
type="file_v4"
9+
type="file_v5"
1010
color="#40916C"
1111
/>
1212

apps/docs/content/docs/fr/tools/file.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: Lire et analyser plusieurs fichiers
66
import { BlockInfoCard } from "@/components/ui/block-info-card"
77

88
<BlockInfoCard
9-
type="file_v4"
9+
type="file_v5"
1010
color="#40916C"
1111
/>
1212

apps/docs/content/docs/ja/tools/file.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: 複数のファイルを読み込んで解析する
66
import { BlockInfoCard } from "@/components/ui/block-info-card"
77

88
<BlockInfoCard
9-
type="file_v4"
9+
type="file_v5"
1010
color="#40916C"
1111
/>
1212

apps/docs/content/docs/zh/tools/file.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: 读取并解析多个文件
66
import { BlockInfoCard } from "@/components/ui/block-info-card"
77

88
<BlockInfoCard
9-
type="file_v4"
9+
type="file_v5"
1010
color="#40916C"
1111
/>
1212

apps/sim/app/api/tools/file/manage/route.ts

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { Buffer, isUtf8 } from 'buffer'
12
import { createLogger } from '@sim/logger'
23
import { getErrorMessage } from '@sim/utils/errors'
34
import { generateShortId } from '@sim/utils/id'
@@ -7,8 +8,10 @@ import { parseRequest } from '@/lib/api/server'
78
import { checkInternalAuth } from '@/lib/auth/hybrid'
89
import { splitWorkspaceFilePath } from '@/lib/copilot/tools/server/files/workspace-file'
910
import { acquireLock, releaseLock } from '@/lib/core/config/redis'
11+
import { generateRequestId } from '@/lib/core/utils/request'
1012
import { ensureAbsoluteUrl } from '@/lib/core/utils/urls'
1113
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
14+
import { isSupportedFileType, parseBuffer } from '@/lib/file-parsers'
1215
import { ensureWorkspaceFileFolderPath } from '@/lib/uploads/contexts/workspace/workspace-file-folder-manager'
1316
import {
1417
fetchWorkspaceFileBuffer,
@@ -18,11 +21,14 @@ import {
1821
uploadWorkspaceFile,
1922
} from '@/lib/uploads/contexts/workspace/workspace-file-manager'
2023
import { getFileExtension, getMimeTypeFromExtension } from '@/lib/uploads/utils/file-utils'
24+
import { downloadFileFromStorage } from '@/lib/uploads/utils/file-utils.server'
2125
import { performMoveWorkspaceFileItems } from '@/lib/workspace-files/orchestration'
2226
import {
2327
assertActiveWorkspaceAccess,
2428
isWorkspaceAccessDeniedError,
2529
} from '@/lib/workspaces/permissions/utils'
30+
import { assertToolFileAccess } from '@/app/api/files/authorization'
31+
import type { UserFile } from '@/executor/types'
2632

2733
export const dynamic = 'force-dynamic'
2834

@@ -122,6 +128,46 @@ const extractFileIdsFromInput = (fileInput: unknown): string[] => {
122128
.filter((id) => id.length > 0)
123129
}
124130

131+
/** Per-file download cap for the content operation. Aligned with the durable large-value ceiling. */
132+
const MAX_GET_CONTENT_FILE_BYTES = 64 * 1024 * 1024
133+
/** Combined extracted-text cap so the content array stays within the large-value-ref ceiling. */
134+
const MAX_GET_CONTENT_TOTAL_BYTES = 64 * 1024 * 1024
135+
136+
const isLikelyTextBuffer = (buffer: Buffer): boolean => isUtf8(buffer) && !buffer.includes(0)
137+
138+
/**
139+
* Download a stored file and extract its text content. Parseable types (PDF, DOCX,
140+
* CSV, etc.) go through the shared file-parsers; other UTF-8 files are returned as
141+
* raw text; binary files yield a short placeholder rather than corrupt bytes.
142+
*/
143+
const extractUserFileTextContent = async (
144+
userFile: UserFile,
145+
requestId: string
146+
): Promise<string> => {
147+
const buffer = await downloadFileFromStorage(userFile, requestId, logger, {
148+
maxBytes: MAX_GET_CONTENT_FILE_BYTES,
149+
})
150+
151+
const extension = getFileExtension(userFile.name)
152+
if (extension && isSupportedFileType(extension)) {
153+
try {
154+
const result = await parseBuffer(buffer, extension)
155+
return result.content ?? ''
156+
} catch (error) {
157+
logger.warn('Falling back to raw text after parser failure', {
158+
name: userFile.name,
159+
error: getErrorMessage(error, 'Unknown error'),
160+
})
161+
}
162+
}
163+
164+
if (isLikelyTextBuffer(buffer)) {
165+
return buffer.toString('utf-8')
166+
}
167+
168+
return `[Binary file: ${userFile.name} (${userFile.type || 'application/octet-stream'}, ${buffer.length} bytes). Cannot extract text content.]`
169+
}
170+
125171
export const POST = withRouteHandler(async (request: NextRequest) => {
126172
const auth = await checkInternalAuth(request, { requireWorkflowId: false })
127173
if (!auth.success) {
@@ -231,6 +277,69 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
231277
})
232278
}
233279

280+
case 'content': {
281+
const { fileId, fileInput } = body
282+
const requestId = generateRequestId()
283+
284+
const selectedFileIds = Array.isArray(fileId)
285+
? fileId.map((id) => id.trim()).filter(Boolean)
286+
: fileId
287+
? normalizeFileIdList(fileId)
288+
: extractFileIdsFromInput(fileInput)
289+
const selectedInputFiles = fileId ? [] : extractUserFilesFromInput(fileInput)
290+
291+
if (selectedFileIds.length === 0 && selectedInputFiles.length === 0) {
292+
return NextResponse.json({ success: false, error: 'File is required' }, { status: 400 })
293+
}
294+
295+
const workspaceFiles = await Promise.all(
296+
selectedFileIds.map((id) => getWorkspaceFile(workspaceId, id))
297+
)
298+
const missingFileId = selectedFileIds.find((_, index) => !workspaceFiles[index])
299+
if (missingFileId) {
300+
return NextResponse.json(
301+
{ success: false, error: `File not found: "${missingFileId}"` },
302+
{ status: 404 }
303+
)
304+
}
305+
306+
const userFiles: UserFile[] = workspaceFiles
307+
.map((file) => workspaceFileToUserFile(file))
308+
.filter((file): file is NonNullable<ReturnType<typeof workspaceFileToUserFile>> =>
309+
Boolean(file)
310+
)
311+
.concat(selectedInputFiles)
312+
313+
const contents: string[] = []
314+
let totalBytes = 0
315+
for (const userFile of userFiles) {
316+
const denied = await assertToolFileAccess(userFile.key, userId, requestId, logger)
317+
if (denied) return denied
318+
319+
const content = await extractUserFileTextContent(userFile, requestId)
320+
totalBytes += Buffer.byteLength(content, 'utf8')
321+
if (totalBytes > MAX_GET_CONTENT_TOTAL_BYTES) {
322+
return NextResponse.json(
323+
{
324+
success: false,
325+
error: `Combined file content is too large to return safely. Maximum is ${
326+
MAX_GET_CONTENT_TOTAL_BYTES / (1024 * 1024)
327+
} MB.`,
328+
},
329+
{ status: 413 }
330+
)
331+
}
332+
contents.push(content)
333+
}
334+
335+
logger.info('File content extracted', { count: contents.length })
336+
337+
return NextResponse.json({
338+
success: true,
339+
data: { contents },
340+
})
341+
}
342+
234343
case 'write': {
235344
const { fileName, content, contentType } = body
236345
const { folderSegments, leafName } = splitWorkspaceFilePath(fileName)

apps/sim/blocks/blocks.test.ts

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@ describe.concurrent('Blocks Module', () => {
109109
expect(block?.tools.config?.tool({ operation: 'file_get' })).toBe('file_get')
110110
})
111111

112-
it('should expose v4 with read and fetch routed to the expected tools', () => {
112+
it('should keep v4 read and fetch routed to the expected tools', () => {
113113
const block = getBlock('file_v4')
114114

115115
expect(block).toBeDefined()
116-
expect(block?.hideFromToolbar).toBe(false)
116+
expect(block?.hideFromToolbar).toBe(true)
117117
expect(block?.subBlocks[0].options?.map((option) => option.id)).toEqual([
118118
'file_read',
119119
'file_fetch',
@@ -160,6 +160,70 @@ describe.concurrent('Blocks Module', () => {
160160
workspaceId: 'workspace-1',
161161
})
162162
})
163+
164+
it('should expose v5 with read (files only) and a get content operation', () => {
165+
const block = getBlock('file_v5')
166+
167+
expect(block).toBeDefined()
168+
expect(block?.hideFromToolbar).toBe(false)
169+
expect(block?.subBlocks[0].options?.map((option) => option.id)).toEqual([
170+
'file_read',
171+
'file_get_content',
172+
'file_fetch',
173+
'file_write',
174+
'file_append',
175+
])
176+
expect(block?.subBlocks.find((subBlock) => subBlock.id === 'readFile')?.multiple).toBe(true)
177+
expect(block?.tools.config?.tool({ operation: 'file_read' })).toBe('file_read')
178+
expect(block?.tools.config?.tool({ operation: 'file_get_content' })).toBe('file_get_content')
179+
expect(block?.tools.config?.tool({ operation: 'file_fetch' })).toBe('file_fetch')
180+
181+
// Read exposes files only; the redundant single-file output is gone
182+
expect(block?.outputs.files).toBeDefined()
183+
expect(block?.outputs.file).toBeUndefined()
184+
// Get content exposes a contents array
185+
expect(block?.outputs.contents).toBeDefined()
186+
187+
// Get content resolves canonical IDs
188+
expect(
189+
block?.tools.config?.params?.({
190+
operation: 'file_get_content',
191+
getContentInput: '["file-1","file-2"]',
192+
_context: { workspaceId: 'workspace-1' },
193+
})
194+
).toEqual({
195+
fileId: ['file-1', 'file-2'],
196+
workspaceId: 'workspace-1',
197+
})
198+
199+
// Get content resolves selected file objects
200+
expect(
201+
block?.tools.config?.params?.({
202+
operation: 'file_get_content',
203+
getContentInput: [
204+
{
205+
key: 'workspace/workspace-1/example.md',
206+
name: 'example.md',
207+
path: '/api/files/serve/workspace%2Fworkspace-1%2Fexample.md?context=workspace',
208+
size: 123,
209+
type: 'text/markdown',
210+
},
211+
],
212+
_context: { workspaceId: 'workspace-1' },
213+
})
214+
).toEqual({
215+
fileInput: [
216+
{
217+
key: 'workspace/workspace-1/example.md',
218+
name: 'example.md',
219+
path: '/api/files/serve/workspace%2Fworkspace-1%2Fexample.md?context=workspace',
220+
size: 123,
221+
type: 'text/markdown',
222+
},
223+
],
224+
workspaceId: 'workspace-1',
225+
})
226+
})
163227
})
164228

165229
describe('Agent block', () => {

0 commit comments

Comments
 (0)