Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 38 additions & 20 deletions Package.resolved

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ let package = Package(
.package(url: "https://github.com/mattt/JSONSchema", from: "1.3.0"),
.package(url: "https://github.com/mattt/llama.swift", .upToNextMajor(from: "2.7484.0")),
.package(url: "https://github.com/mattt/PartialJSONDecoder", from: "1.0.0"),
// mlx-swift-lm must be >= 2.25.5 for ToolSpec/tool calls and UserInput(chat:processing:tools:).
.package(url: "https://github.com/ml-explore/mlx-swift-lm", from: "2.25.5"),
// mlx-swift-lm >= 2.30.3 for fast SDPA, Gemma3n per-layer intermediate_size,
// cache race fix, Memory API, and chat rehydration. >= 2.25.5 for ToolSpec/tool calls.
.package(url: "https://github.com/ml-explore/mlx-swift-lm", from: "2.30.3"),
.package(url: "https://github.com/swiftlang/swift-syntax", from: "600.0.0"),
],
targets: [
Expand Down
30 changes: 29 additions & 1 deletion Sources/AnyLanguageModel/GenerationOptions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,25 @@ public struct GenerationOptions: Sendable, Equatable, Codable {
/// an error will be thrown.
public var maximumResponseTokens: Int?

/// Maximum number of tokens to retain in the KV cache.
///
/// When set, uses a rotating cache that evicts oldest tokens beyond this limit.
/// When `nil` (default), the cache grows unbounded.
///
/// Recommended values: 2048–4096 for iPhone, `nil` for Mac.
public var maxKVSize: Int?

/// Bit width for KV cache quantization (for example, 4 or 8).
///
/// Reduces cache memory usage at slight quality cost.
/// When `nil` (default), the cache uses full precision.
public var kvBits: Int?

/// Group size for KV cache quantization.
///
/// Only meaningful when ``kvBits`` is set. Default is 64.
public var kvGroupSize: Int

/// Storage for model-specific custom options.
private var customOptionsStorage: CustomOptionsStorage = .init()

Expand Down Expand Up @@ -157,14 +176,23 @@ public struct GenerationOptions: Sendable, Equatable, Codable {
/// responses. Must be between `0` and `1`, inclusive.
/// - maximumResponseTokens: The maximum number of tokens the model is allowed
/// to produce before being artificially halted. Must be positive.
/// - maxKVSize: Maximum tokens in the KV cache. When set, enables a rotating cache.
/// - kvBits: Bit width for KV cache quantization.
/// - kvGroupSize: Group size for KV cache quantization. Default is 64.
public init(
sampling: SamplingMode? = nil,
temperature: Double? = nil,
maximumResponseTokens: Int? = nil
maximumResponseTokens: Int? = nil,
maxKVSize: Int? = nil,
kvBits: Int? = nil,
kvGroupSize: Int = 64
) {
self.sampling = sampling
self.temperature = temperature
self.maximumResponseTokens = maximumResponseTokens
self.maxKVSize = maxKVSize
self.kvBits = kvBits
self.kvGroupSize = kvGroupSize
}
}

Expand Down
Loading