-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy patherrors.go
More file actions
263 lines (236 loc) · 9.51 KB
/
errors.go
File metadata and controls
263 lines (236 loc) · 9.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
package agentcore
import (
"context"
"errors"
"fmt"
"sort"
"strings"
"github.com/voocel/litellm"
)
// Errors produced by agentcore fall into two layers:
//
// 1. Agent layer — loop control, agent state machine, context management.
// Surfaced as sentinel errors (Err*) or typed errors (*Error).
// Match with errors.Is(err, ErrXxx) or errors.As(err, &SomeError{}).
//
// 2. Model layer — LLM provider errors flow through Unwrap to
// *litellm.LiteLLMError. Match with errors.As(err, &litellm.LiteLLMError{})
// and inspect .Type for the category (rate_limit, quota, auth, ...).
//
// User cancellation surfaces as context.Canceled, not a custom sentinel —
// use errors.Is(err, context.Canceled) for abort detection.
// Sentinel errors. Use with errors.Is.
var (
ErrMaxTurns = errors.New("max turns reached")
ErrNoModel = errors.New("no model configured")
ErrNoMessages = errors.New("cannot continue: no messages in context")
ErrAlreadyRunning = errors.New("agent is already running")
ErrBadContinuation = errors.New("cannot continue from this message role without queued messages")
ErrStopGuard = errors.New("stop guard escalated: run terminated")
ErrContextOverflow = errors.New("context window overflow")
ErrStreamPartial = errors.New("stream closed without done event")
ErrToolValidation = errors.New("tool argument validation failed")
ErrInjectNilMessage = errors.New("inject message is nil")
)
// Provider runtime sentinels. These categorize errors returned by the LLM
// provider at call time (litellm errors, network failures, server responses).
// Use ClassifyProvider to derive the most specific sentinel from an error chain,
// or match directly with errors.Is.
var (
ErrProviderRateLimit = errors.New("provider rate limit")
ErrProviderTimeout = errors.New("provider timeout")
ErrProviderStreamIdle = errors.New("provider stream idle")
ErrProviderNetwork = errors.New("provider network")
ErrProviderAuth = errors.New("provider auth")
)
// MaxTurnsError carries the configured turn limit. errors.Is matches ErrMaxTurns.
type MaxTurnsError struct {
Limit int
}
func (e *MaxTurnsError) Error() string { return fmt.Sprintf("max turns (%d) reached", e.Limit) }
func (e *MaxTurnsError) Is(target error) bool { return target == ErrMaxTurns }
// PartialStreamError indicates a stream closed without a terminal done event.
// Partial carries any content received before truncation; callers can inspect
// it for diagnostics but MUST NOT persist it as a completed message — the
// stream did not finish cleanly (missing StopReason, possibly truncated
// tool_call args, unclosed thinking blocks).
type PartialStreamError struct {
Partial Message
}
func (e *PartialStreamError) Error() string { return "stream closed without done event" }
func (e *PartialStreamError) Is(target error) bool { return target == ErrStreamPartial }
// ContextOverflowError wraps an underlying context-overflow cause (typically
// a litellm error). errors.Is matches ErrContextOverflow; Unwrap reaches the
// raw cause so callers can extract provider-specific details if needed.
type ContextOverflowError struct {
Cause error
}
func (e *ContextOverflowError) Error() string {
if e.Cause == nil {
return "context window overflow"
}
return "context window overflow: " + e.Cause.Error()
}
func (e *ContextOverflowError) Unwrap() error { return e.Cause }
func (e *ContextOverflowError) Is(target error) bool { return target == ErrContextOverflow }
// ToolValidationError is returned when tool call arguments fail schema
// validation. The agent loop surfaces it as a tool_result with IsError=true,
// not as a fatal loop error, so the model can self-correct on the next turn.
// errors.Is matches ErrToolValidation.
type ToolValidationError struct {
ToolName string
Issues []ValidationIssue
}
func (e *ToolValidationError) Error() string { return formatValidationIssues(e.ToolName, e.Issues) }
func (e *ToolValidationError) Is(target error) bool { return target == ErrToolValidation }
// ValidationIssue describes a single schema mismatch from tool arg validation.
type ValidationIssue struct {
Kind string // IssueMissing or IssueType
Path string
Expected string // for IssueType only
Received string // for IssueType only
Hint string // optional fix hint, appended to the rendered message
}
const (
IssueMissing = "missing"
IssueType = "type"
)
// IsContextOverflow reports whether err indicates a context-overflow condition
// at any layer (agentcore wrapper or raw litellm provider). Convenience for
// callers that want to detect "request too big" without caring where it surfaced.
func IsContextOverflow(err error) bool {
if errors.Is(err, ErrContextOverflow) {
return true
}
return litellm.IsContextOverflowError(err)
}
// streamIdleMsgPattern matches the rendered message of a stream-idle abort.
const streamIdleMsgPattern = "stream idle timeout"
// IsStreamIdleMessage reports whether s contains the rendered marker of a
// stream idle-timeout abort. Useful when only the error string survives
// (sub-agent JSON results, structured event payloads that flatten the chain).
func IsStreamIdleMessage(s string) bool {
return strings.Contains(strings.ToLower(s), streamIdleMsgPattern)
}
// ClassifyProvider inspects an LLM/provider error and returns the most specific
// matching sentinel from this package's Err* variables. Returns nil when err is
// nil; returns err unchanged when no classification applies, so callers can wrap
// with their own context.
//
// Stream-idle is checked before generic timeout: it is a stuck connection that
// failover can typically rescue, whereas a generic timeout may just be a slow
// model. Both error-chain matching (via litellm.IsStreamIdleError) and message
// pattern matching are supported because sub-agent JSON results flatten the
// original error to a plain string.
//
// Context overflow is intentionally not returned here — use IsContextOverflow
// (which already handles both agentcore and litellm layers).
func ClassifyProvider(err error) error {
if err == nil {
return nil
}
if sentinel := classifyProviderSentinel(err); sentinel != nil {
return sentinel
}
return err
}
func classifyProviderSentinel(err error) error {
if litellm.IsStreamIdleError(err) {
return ErrProviderStreamIdle
}
if errors.Is(err, context.DeadlineExceeded) {
return ErrProviderTimeout
}
msg := strings.ToLower(err.Error())
switch {
case strings.Contains(msg, streamIdleMsgPattern):
return ErrProviderStreamIdle
case containsAny(msg, "rate limit", "too many requests", "429"):
return ErrProviderRateLimit
case containsAny(msg, "deadline exceeded", "timeout", "timed out"):
return ErrProviderTimeout
case containsAny(msg, "invalid api key", "incorrect api key", "unauthorized", "authentication failed", "forbidden", "401", "403"):
return ErrProviderAuth
case containsAny(msg, "connection refused", "connection reset", "no such host", "dial tcp", "tls handshake timeout", "server misbehaving", "broken pipe", "eof"):
return ErrProviderNetwork
}
return nil
}
// IsFailoverEligible reports whether err matches a transient provider error
// suitable for cross-provider failover: rate_limit, timeout, network, or
// stream_idle. Returns false for auth errors, context_overflow, user
// cancellation, or unclassified errors.
func IsFailoverEligible(err error) bool {
if err == nil || errors.Is(err, context.Canceled) {
return false
}
classified := ClassifyProvider(err)
return errors.Is(classified, ErrProviderRateLimit) ||
errors.Is(classified, ErrProviderTimeout) ||
errors.Is(classified, ErrProviderNetwork) ||
errors.Is(classified, ErrProviderStreamIdle)
}
// FailoverReason returns a stable short label ("rate_limit" / "timeout" /
// "stream_idle" / "network") suitable for structured logging. Returns "" when
// err is not failover-eligible.
func FailoverReason(err error) string {
if err == nil {
return ""
}
classified := ClassifyProvider(err)
switch {
case errors.Is(classified, ErrProviderStreamIdle):
return "stream_idle"
case errors.Is(classified, ErrProviderRateLimit):
return "rate_limit"
case errors.Is(classified, ErrProviderTimeout):
return "timeout"
case errors.Is(classified, ErrProviderNetwork):
return "network"
}
return ""
}
func containsAny(msg string, patterns ...string) bool {
for _, pattern := range patterns {
if strings.Contains(msg, pattern) {
return true
}
}
return false
}
// formatValidationIssues renders issues as a single multi-line block.
// Missing params come first (most fundamental error), then type mismatches;
// within each group, paths sort alphabetically for stable output.
func formatValidationIssues(toolName string, issues []ValidationIssue) string {
sort.SliceStable(issues, func(i, j int) bool {
if issues[i].Kind != issues[j].Kind {
return issues[i].Kind == IssueMissing
}
return issues[i].Path < issues[j].Path
})
lines := make([]string, 0, len(issues))
for _, it := range issues {
var line string
switch it.Kind {
case IssueMissing:
line = fmt.Sprintf("The required parameter `%s` is missing", it.Path)
case IssueType:
line = fmt.Sprintf(
"The parameter `%s` type is expected as `%s` but provided as `%s`",
it.Path, it.Expected, it.Received,
)
default:
continue
}
if it.Hint != "" {
line += ". " + it.Hint
}
lines = append(lines, line)
}
noun := "issue"
if len(lines) > 1 {
noun = "issues"
}
header := fmt.Sprintf("InputValidationError: %s failed due to the following %s:", toolName, noun)
return header + "\n" + strings.Join(lines, "\n")
}